百度图片都是从哪抓取的呢?
答:Ruby代码
require 'rubygems'
require 'hpricot'
require 'open-uri'
require 'net/http'
class BaiduPicture
def initialize(url,size)
@urls = []
@size = size
@urls e
p "exception in get_picture_elements"
p e
[]
end
end
# 获取图片
def get_picture(url)
get_picture_elements(url)。 each do |link|
begin
href = ...全部
答:Ruby代码
require 'rubygems'
require 'hpricot'
require 'open-uri'
require 'net/http'
class BaiduPicture
def initialize(url,size)
@urls = []
@size = size
@urls e
p "exception in get_picture_elements"
p e
[]
end
end
# 获取图片
def get_picture(url)
get_picture_elements(url)。
each do |link|
begin
href = t_attribute("href")
if /^u。
*/ =~ href。split("&")[1]
pic_addr = href。split("&")[1]。split("=")[1]
p pic_addr
str = Net:: t( rse(URI。
encode(pic_addr)))
file = w(pic_addr。split("/")。last,"wb")
file。
write(str)
ose
else
hole_link = " " + href
p hole_link
tmp = open(hole_link)
tmp_doc = Hpricot(tmp)
pic_tag = tmp_ t_element_by_id("main_pic")。
get_elements_by_tag_name("img")。first
pic_addr = pic_ t_attribute("src")
str = Net:: t( rse(URI。
encode(pic_addr)))
file = w(pic_addr。split("/")。last,"wb")
file。
write str
ose
end
rescue Exception => e
p "exception in get_picture"
next
end
end
end
# 轮询页面
def loop_pages
@urls。
each do |url|
get_picture(url)
sleep(10)
end
p "pictures over"
end
end
require 'rubygems'
require 'hpricot'
require 'open-uri'
require 'net/http'
class BaiduPicture
def initialize(url,size)
@urls = []
@size = size
@urls e
p "exception in get_picture_elements"
p e
[]
end
end
# 获取图片
def get_picture(url)
get_picture_elements(url)。
each do |link|
begin
href = t_attribute("href")
if /^u。
*/ =~ href。split("&")[1]
pic_addr = href。split("&")[1]。split("=")[1]
p pic_addr
str = Net:: t( rse(URI。
encode(pic_addr)))
file = w(pic_addr。split("/")。last,"wb")
file。
write(str)
ose
else
hole_link = " " + href
p hole_link
tmp = open(hole_link)
tmp_doc = Hpricot(tmp)
pic_tag = tmp_ t_element_by_id("main_pic")。
get_elements_by_tag_name("img")。first
pic_addr = pic_ t_attribute("src")
str = Net:: t( rse(URI。
encode(pic_addr)))
file = w(pic_addr。split("/")。last,"wb")
file。
write str
ose
end
rescue Exception => e
p "exception in get_picture"
next
end
end
end
# 轮询页面
def loop_pages
@urls。
each do |url|
get_picture(url)
sleep(10)
end
p "pictures over"
end
end
Ruby代码
w(" ",10)。
loop_pages
w(" ",10)。loop_pages抓取10页
这个url使用需要
这样的格式,点击搜索结果的第二页连接能够得到这个连接
。收起