Thursday, July 26, 2012
Scraping mobile phone number from web
web.search("td").each do |e|
e = e.inner_html
if e.length > 10
tmp = e.gsub(/(\r|\n|\t|\D)/, "")
tmp.split(/(010|012|013|016|014|017|018|019)/).each do |s|
if s =~ /(010|012|013|016|014|017|018|019)/
pos = tmp =~ /#{s}/
#puts pos
if pos
number = "#{s}#{tmp[pos+3..pos+9]}"
puts number
Contact.create(:group_id => 1, :phone => number) if number.length == 10
end
end
end
end
end
Tuesday, July 17, 2012
scraping pictures at myjodoh.net
agent = Mechanize.new
agent.get("http://www.myjodoh.net/img.php?display=193941.jpg")
cookie = Mechanize::Cookie.new("PHPSESSID" , "460a4eac8d62000529e894727c32bd31") #take session from ur web browser :P
cookie.domain = "myjodoh.net"
cookie.path = "/"
agent.cookie_jar.add(agent.history.last.uri, cookie)
(1..466).each do |i|
web = Nokogiri::HTML(open("http://www.myjodoh.net/index.php?mod=result&page=#{i}&se=a%3A11%3A%7Bs%3A4%3A%22nSex%22%3Bs%3A1%3A%220%22%3Bs%3A5%3A%22nAge1%22%3Bs%3A2%3A%2218%22%3Bs%3A5%3A%22nAge2%22%3Bs%3A2%3A%2260%22%3Bs%3A8%3A%22nHeight1%22%3Bs%3A3%3A%22140%22%3Bs%3A8%3A%22nHeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A8%3A%22nWeight1%22%3Bs%3A2%3A%2240%22%3Bs%3A8%3A%22nWeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A7%3A%22nStatus%22%3Bs%3A1%3A%220%22%3Bs%3A10%3A%22nStateFrom%22%3Bs%3A2%3A%2299%22%3Bs%3A9%3A%22nStateNow%22%3Bs%3A2%3A%2299%22%3Bs%3A5%3A%22order%22%3Bs%3A5%3A%22dLast%22%3B%7D&PHPSESSIONID=460a4eac8d62000529e894727c32bd31"))
web.search(".picborder").each do |x|
filename = x['src'].gsub("http://www.myjodoh.net/thumb1/","")
url = x['src'].gsub("thumb1/", "img.php?display=")
puts filename
File.open(filename, 'w+') do |file|
file << agent.get_file(url)
end
end
end
Scrape email from yellow pages and send them email with 300 second delay
em = []
(1..63).each do |i|
yp = Nokogiri::HTML(open("http://www.yellowpages.com.my/search.jsp?sfor=all&name=logistic&w=&p=#{i}"))
emails = yp.search("a.email").map{|x| x["onclick"].gsub("SqueezeBox.open('/plainmail.jsp?id=", "").split("',")[0] }
emails.each do |eid|
t = Nokogiri::HTML(open("http://yellowpages.com.my/plainmail.jsp?id=#{eid}"))
em.push(t.search("input").first["value"])
end
InvoiceMailer.inquiry(em).deliver
sleep 300
end
Labels:
nokogiri,
open-uri,
ruby,
scrape,
scrape yellow pages
Friday, July 13, 2012
GOD ruby gem for unicorn bundle exec configuration file example.
God.watch do |w|
w.name = 'unicorn'
w.interval = 30.seconds
w.start = "cd #{rails_root} && bundle exec unicorn -c /home/system/deploy/akeyu/config/unicorn.rb -D"
w.stop = "kill -QUIT `cat #{rails_root}/tmp/pids/unicorn.pid`"
w.restart = "kill -USR2 `cat #{rails_root}/tmp/pids/unicorn.pid`"
w.start_grace = 10.seconds
w.restart_grace = 10.seconds
w.pid_file = "#{rails_root}/tmp/pids/unicorn.pid"
w.behavior(:clean_pid_file)
w.start_if do |start|
start.condition(:process_running) do |c|
c.interval = 5.seconds
c.running = false
end
end
end
Monday, July 9, 2012
nginx log file location from using brew installation
tail -f /usr/local/Cellar/nginx/1.0.10/logs/error.log
and of course access.log
Saturday, July 7, 2012
ruby on rails daemon
akob:ceramahonline akob$ rails plugin install git://github.com/dougal/daemon_generator.git
Initialized empty Git repository in /Users/akob/hak/ceramahonline/vendor/plugins/daemon_generator/.git/
remote: Counting objects: 25, done.
remote: Compressing objects: 100% (21/21), done.
remote: Total 25 (delta 2), reused 20 (delta 1)
Unpacking objects: 100% (25/25), done.
From git://github.com/dougal/daemon_generator
* branch HEAD -> FETCH_HEAD
Daemon Generator
================
To get yourself rolling:
> sudo gem install daemons
> ./script/generate daemon
Then insert your code in the lib/daemons/
Individual control script:
> ./lib/daemons/
App-wide control script (I add this to my capistrano recipe's after_restart task):
> ./script/daemons [start|stop|restart]
akob:ceramahonline akob$
Key to SEO success
1. Title best describing content
2. meta description 2-3 sentence
3. meta keyword with comma forecasting user search behavior that relate with our content 3. used H1 only 1. and use other Hn as well
4. social networking, share, like, comment, +
5. short & readable URL
2. meta description 2-3 sentence
3. meta keyword with comma forecasting user search behavior that relate with our content 3. used H1 only 1. and use other Hn as well
4. social networking, share, like, comment, +
5. short & readable URL
Subscribe to:
Posts (Atom)