Thursday, July 26, 2012

Scraping mobile phone number from web


web.search("td").each do |e|
e = e.inner_html
if e.length > 10
tmp = e.gsub(/(\r|\n|\t|\D)/, "")

tmp.split(/(010|012|013|016|014|017|018|019)/).each do |s|
if s =~ /(010|012|013|016|014|017|018|019)/
pos = tmp =~ /#{s}/
#puts pos
if pos
number = "#{s}#{tmp[pos+3..pos+9]}"
puts number
Contact.create(:group_id => 1, :phone => number) if number.length == 10
end
end
end

end
end

Tuesday, July 17, 2012

scraping pictures at myjodoh.net



agent = Mechanize.new
agent.get("http://www.myjodoh.net/img.php?display=193941.jpg")
cookie = Mechanize::Cookie.new("PHPSESSID" , "460a4eac8d62000529e894727c32bd31") #take session from ur web browser :P
cookie.domain = "myjodoh.net"
cookie.path = "/"
agent.cookie_jar.add(agent.history.last.uri, cookie)


(1..466).each do |i|
web = Nokogiri::HTML(open("http://www.myjodoh.net/index.php?mod=result&page=#{i}&se=a%3A11%3A%7Bs%3A4%3A%22nSex%22%3Bs%3A1%3A%220%22%3Bs%3A5%3A%22nAge1%22%3Bs%3A2%3A%2218%22%3Bs%3A5%3A%22nAge2%22%3Bs%3A2%3A%2260%22%3Bs%3A8%3A%22nHeight1%22%3Bs%3A3%3A%22140%22%3Bs%3A8%3A%22nHeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A8%3A%22nWeight1%22%3Bs%3A2%3A%2240%22%3Bs%3A8%3A%22nWeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A7%3A%22nStatus%22%3Bs%3A1%3A%220%22%3Bs%3A10%3A%22nStateFrom%22%3Bs%3A2%3A%2299%22%3Bs%3A9%3A%22nStateNow%22%3Bs%3A2%3A%2299%22%3Bs%3A5%3A%22order%22%3Bs%3A5%3A%22dLast%22%3B%7D&PHPSESSIONID=460a4eac8d62000529e894727c32bd31"))
web.search(".picborder").each do |x|
filename = x['src'].gsub("http://www.myjodoh.net/thumb1/","")
url = x['src'].gsub("thumb1/", "img.php?display=")
puts filename
File.open(filename, 'w+') do |file|
     file << agent.get_file(url)
end
end
end

Scrape email from yellow pages and send them email with 300 second delay


em = []
(1..63).each do |i|
yp = Nokogiri::HTML(open("http://www.yellowpages.com.my/search.jsp?sfor=all&name=logistic&w=&p=#{i}"))
emails = yp.search("a.email").map{|x| x["onclick"].gsub("SqueezeBox.open('/plainmail.jsp?id=", "").split("',")[0] }
emails.each do |eid|
  t = Nokogiri::HTML(open("http://yellowpages.com.my/plainmail.jsp?id=#{eid}"))
  em.push(t.search("input").first["value"])
end
InvoiceMailer.inquiry(em).deliver
sleep 300
end

Friday, July 13, 2012

GOD ruby gem for unicorn bundle exec configuration file example.


God.watch do |w|
  w.name = 'unicorn'
  w.interval = 30.seconds
  w.start = "cd #{rails_root} && bundle exec unicorn -c /home/system/deploy/akeyu/config/unicorn.rb -D"
  w.stop = "kill -QUIT `cat #{rails_root}/tmp/pids/unicorn.pid`"
  w.restart = "kill -USR2 `cat #{rails_root}/tmp/pids/unicorn.pid`"
  w.start_grace = 10.seconds
  w.restart_grace = 10.seconds
  w.pid_file = "#{rails_root}/tmp/pids/unicorn.pid"
  w.behavior(:clean_pid_file)
  w.start_if do |start|
    start.condition(:process_running) do |c|
      c.interval = 5.seconds
      c.running = false
    end
  end
end

Monday, July 9, 2012

nginx log file location from using brew installation

tail -f /usr/local/Cellar/nginx/1.0.10/logs/error.log and of course access.log

Saturday, July 7, 2012

ruby on rails daemon


akob:ceramahonline akob$ rails plugin install git://github.com/dougal/daemon_generator.git
Initialized empty Git repository in /Users/akob/hak/ceramahonline/vendor/plugins/daemon_generator/.git/
remote: Counting objects: 25, done.
remote: Compressing objects: 100% (21/21), done.
remote: Total 25 (delta 2), reused 20 (delta 1)
Unpacking objects: 100% (25/25), done.
From git://github.com/dougal/daemon_generator
 * branch            HEAD       -> FETCH_HEAD
Daemon Generator
================

To get yourself rolling:
> sudo gem install daemons
> ./script/generate daemon

Then insert your code in the lib/daemons/.rb stub. All pid's and logs will live in the normal log/ folder.  This helps to make things Capistrano friendly.

Individual control script:
> ./lib/daemons/_ctl [start|stop|restart]

App-wide control script (I add this to my capistrano recipe's after_restart task):
> ./script/daemons [start|stop|restart]
akob:ceramahonline akob$

Key to SEO success

1. Title best describing content
2. meta description 2-3 sentence
3. meta keyword with comma forecasting user search behavior that relate with our content 3. used H1 only 1. and use other Hn as well
4. social networking, share, like, comment, +
5. short & readable URL