#removes double quotes at beginning and end of string
some_string.gsub!(/\A"(.*)"\z/m, '\1')
#turns ActiveRecord::RecordNotFound into active_record_not_found
Klass.to_s.delete(':').gsub( /([A-Za-z])([A-Z])/, '\1' << '_' << '\2' ).downcaseMoldy. Mossy even. Spores [Now with Labels!] from this blog may have already reached the moon.
Monday, November 12, 2007
Ruby One-Liners
| Reactions: |
Friday, November 09, 2007
Easy Import from CSV or PSV to ActiveRecord
More late night coding has produced...
A mixin to help me import data from a KnowWho data export of legislative data!
The KnowWho flat files are either CSV or PSV. I used rake tasks to do the import. Here's the rake task for the house:
As you can see I am importing a single flat file of data into many different tables.
As an example here is my Position class:
And here's the all important mixin which gives each of my classes the ability to seamlessly import the ActiveRecord objects. I've imported several hundred thousand rows this way and nary a problem! This goes in a file called ImportObject.rb in the lib/ directory of the project, and is extended/included on a per model basis.
Oh and for some Stupid Simple Logging of the import I created a yell_psv method in environment.rb:
How 'bout them apples!
A mixin to help me import data from a KnowWho data export of legislative data!
The KnowWho flat files are either CSV or PSV. I used rake tasks to do the import. Here's the rake task for the house:
#The column order in the KnowWho CSV/PSV file
#"ZipS"|"ZipL"|"LowS"|"LowL"|"HighS"|"HighL"|"District"|"DistrictDesc"|"Member"|"Prefix"|"FirstName"|"MiddleName"|"LastName"|"Suffix"|"Tag"|"PartyDesc"|
# 0 1 2 3 4 5 6 7 8 9 10 12 13 14 16 17
#"Room"|"Building"|"Street"|"City"|"State"|"ZipCode"|"Telephone"|"MemberID"|"PersonID"
# 18 19 20 21 22 23 24 25 26
desc "import KnowWho House PSV"
task :house_psv => :environment do
day = "2007-10-08".to_date
yell_psv("rake import:house BEGINS at " + Time.now.strftime("%m/%d/%Y %I:%M %p"), day, "House")
puts "rake import:house BEGINS at " + Time.now.strftime("%m/%d/%Y %I:%M %p")
require 'fastercsv'
file = "#{RAILS_ROOT}/lib/psv/" + day.strftime("%Y_%m_%d") + "/Zip9Digit_USHouse_PSV_100807.txt"
yell_psv("Importing file: " + file, day, "House")
count = 1
arr = Array.new
#Optimize the party query
parts = ["Democratic Party","Republican Party"]
parties = PoliticalParty.find(:all, :conditions => ["name in (?)", parts])
FasterCSV.foreach(file, :col_sep => "|",
:headers => :first_row,:return_headers => true,
:skip_blanks => true ) do |row|
puts "CSV ROW: #{count.to_s}"
unless row.header_row?
if row['LastName'] == 'Vacant' && row['FirstName'].blank?
yell_psv("Vacant!", day, "House")
puts "Vacant!"
leader_id = nil
else
party = parties.select {|x| x.name == row['PartyDesc']}[0]
political_party = party.blank? ? PoliticalParty.find_or_create_by_row(row, day) : party
leader = Leader.find_or_create_by_row(row, day, political_party.id)
leader_id = leader.id
end
district = HouseDistrict.find_or_create_by_row(row, day)
zip_range = ZipRange.find_or_create_by_row(row, day, district.id)
position = Position.find_or_create_representative(row, day, district.id, leader_id, Department.find_by_name("United States House of Representatives").id)
addr = Address.new(:street1=>row['Building'],:street2=>['Room'],:city=>row['City'],:state=>row['State'],
:postal_code=>row['ZipCode'],:country=>'USA')
phone = row['Telephone'].blank? ? nil : PhoneNumber.new(:number_type=>"default",:number=>row['Telephone'])
position.addresses << addr unless addr.nil?
position.phone_numbers << phone unless phone.nil?
else
puts "This is the Header Row"
end
count+=1
end
yell_psv("rake import:house ENDS at " + Time.now.strftime("%m/%d/%Y %I:%M %p"), day, "House")
puts "rake import:house ENDS at " + Time.now.strftime("%m/%d/%Y %I:%M %p")
endAs you can see I am importing a single flat file of data into many different tables.
As an example here is my Position class:
class Position < ActiveRecord::Base
validates_uniqueness_of :name
validates_uniqueness_of :house_district_id, :allow_nil => true
validates_uniqueness_of :senate_district_id, :allow_nil => true
validates_presence_of :name, :department_id
acts_as_addressable
acts_as_phoneable
belongs_to :department
belongs_to :house_district
belongs_to :senate_district
belongs_to :leader
# Add methods from ImportObject as class level methods (for instance level use include)
extend ImportObject
#"District"|"DistrictDesc"|"Room"|"Building"|
# 4 5 14 15
def self.find_or_create_senator(row, day, district_id, leader_id, department_id)
i = row['District']
obj = self.find_by_name(i)
hash = { "room" => row['Room'], "building" => row['Building'].split('- ')[1], "building_cd" => row['Building'].split('- ')[0],
"name" => row['District'], "department_id" => department_id,
"office_name" => row['DistrictDesc'], "senate_district_id" => district_id, "leader_id" => leader_id, "enabled" => true,
"data_date" => day, "data_source" => "KnowWho"}
del_arr = ["name", "data_source"]
return self.import_object(row, day, obj, hash, i, del_arr)
end
#"District"|"DistrictDesc"|"Room"|"Building"|
# 4 5 14 15
def self.find_or_create_representative(row, day, district_id, leader_id, department_id)
i = row['District']
obj = self.find_by_name(i)
hash = { "room" => row['Room'], "building" => row['Building'].split('- ')[1], "building_cd" => row['Building'].split('- ')[0],
"name" => row['District'], "department_id" => department_id,
"office_name" => row['DistrictDesc'], "house_district_id" => district_id, "leader_id" => leader_id, "enabled" => true,
"data_date" => day, "data_source" => "KnowWho"}
del_arr = ["name", "data_source"]
return self.import_object(row, day, obj, hash, i, del_arr)
end
endAnd here's the all important mixin which gives each of my classes the ability to seamlessly import the ActiveRecord objects. I've imported several hundred thousand rows this way and nary a problem! This goes in a file called ImportObject.rb in the lib/ directory of the project, and is extended/included on a per model basis.
module ImportObject
def self.included(base)
# Initialize module.
end
def import_object(row, day, obj, hash, lookup_key, del_arr, skip = false)
klasss = self.to_s
if obj.blank?
yell_psv(klasss + " #{lookup_key} not found, creating new:", day, klasss)
puts klasss + " #{lookup_key} not found, creating new:"
obj = self.new
obj.attributes = obj.attributes.merge(hash)
if obj.save
puts klasss + " #{lookup_key} created"
yell_psv(klasss + " #{lookup_key} created.", day, klasss)
else
puts klasss + " #{lookup_key} falied to create."
yell_psv(klasss + " #{lookup_key} failed to create.", day, klasss)
end
return obj
elsif obj.data_date.is_a?(Date) && obj.data_date < day && !skip
del_arr.each {|x| hash.delete(x)}
obj.attributes = obj.attributes.merge(hash)
if obj.save
yell_psv(klasss + " #{lookup_key} updated.", day, klasss)
puts klasss + " #{lookup_key} updated."
return obj
else
yell_psv(klasss + " #{lookup_key} failed to update.", day, klasss)
puts klasss + " #{lookup_key} failed to update."
return nil
end
else
yell_psv(klasss + " #{lookup_key} skipped update.", day, klasss)
puts klasss + " #{lookup_key} skipped update."
return obj
end
end
endOh and for some Stupid Simple Logging of the import I created a yell_psv method in environment.rb:
def yell_psv(msg, date = Date.today, filename = "")
# stupid simple logging:
f = File.open(File.expand_path(File.dirname(__FILE__) + "/../log/" + filename + date.strftime("%Y_%m_%d") + ".log"),"a")
f.puts msg
f.close
endHow 'bout them apples!
Labels:
ActiveRecord,
Rails,
Ruby
| Reactions: |
Wednesday, November 07, 2007
Where's Peter been?!?
I have been in front of my computer most of the time. I have been writing web apps almost non-stop for a while now.
The newest I cannot yet reveal, but I hope to be able to soon, when it launches.
I have also been contributing heavily to RVideo, and am working at my day job on some projects that will soon be open sourced which are based on RVideo. We've extracted them from some commercial apps we're doing.
Watch this space!
The newest I cannot yet reveal, but I hope to be able to soon, when it launches.
I have also been contributing heavily to RVideo, and am working at my day job on some projects that will soon be open sourced which are based on RVideo. We've extracted them from some commercial apps we're doing.
Watch this space!
| Reactions: |
Subscribe to:
Posts (Atom)

