Tuesday, July 17, 2012

scraping pictures at myjodoh.net



agent = Mechanize.new
agent.get("http://www.myjodoh.net/img.php?display=193941.jpg")
cookie = Mechanize::Cookie.new("PHPSESSID" , "460a4eac8d62000529e894727c32bd31") #take session from ur web browser :P
cookie.domain = "myjodoh.net"
cookie.path = "/"
agent.cookie_jar.add(agent.history.last.uri, cookie)


(1..466).each do |i|
web = Nokogiri::HTML(open("http://www.myjodoh.net/index.php?mod=result&page=#{i}&se=a%3A11%3A%7Bs%3A4%3A%22nSex%22%3Bs%3A1%3A%220%22%3Bs%3A5%3A%22nAge1%22%3Bs%3A2%3A%2218%22%3Bs%3A5%3A%22nAge2%22%3Bs%3A2%3A%2260%22%3Bs%3A8%3A%22nHeight1%22%3Bs%3A3%3A%22140%22%3Bs%3A8%3A%22nHeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A8%3A%22nWeight1%22%3Bs%3A2%3A%2240%22%3Bs%3A8%3A%22nWeight2%22%3Bs%3A3%3A%22200%22%3Bs%3A7%3A%22nStatus%22%3Bs%3A1%3A%220%22%3Bs%3A10%3A%22nStateFrom%22%3Bs%3A2%3A%2299%22%3Bs%3A9%3A%22nStateNow%22%3Bs%3A2%3A%2299%22%3Bs%3A5%3A%22order%22%3Bs%3A5%3A%22dLast%22%3B%7D&PHPSESSIONID=460a4eac8d62000529e894727c32bd31"))
web.search(".picborder").each do |x|
filename = x['src'].gsub("http://www.myjodoh.net/thumb1/","")
url = x['src'].gsub("thumb1/", "img.php?display=")
puts filename
File.open(filename, 'w+') do |file|
     file << agent.get_file(url)
end
end
end

1 comment:

  1. the 7-inch pill market office 2010 activation key has exploded. The actual fact that more compact tablets tend to cost about fifty percent as much as their more office mac 2011 product key substantial counterparts provides a great deal to complete with that.

    ReplyDelete