require 'nokogiri' require 'open-uri' #require 'httparty' require 'sqlite3' require 'csv' require 'time' SQL =< limit_num break end if (count_up % 100) == 0 num += 1 end address = db1.execute("select url from tbl_manga where id = '#{count_up}'") ex_title = db1.execute("select title from tbl_manga where id = '#{count_up}'") isbn = db2.execute("select isbn from tbl_bookdata where ex_id = '#{count_up}'") author = db2.execute("select author from tbl_bookdata where ex_id = '#{count_up}'") if isbn[0] isbn = isbn[0].pop else isbn = '' end if author[0] author = author[0].pop else author = '' end book = ex_title[0].pop URL = address[0].pop ex_id = count_up puts puts ex_id #puts author #puts book #puts #response = HTTParty.get(URL) #doc = Nokogiri::HTML(response.body) doc = Nokogiri::HTML(open(URL)) doc.remove_namespaces! title = doc.css('title').inner_text figure = doc.css('figure') #puts figure #puts figure.inner_html temp_hostname = '' figure.css('img').each do |tag| if tag.attribute('data-src') #p tag.attribute('data-src').inner_html image = tag.attribute('data-src').value ###### ###### uri = URI.parse(image) if temp_hostname != uri.host temp_hostname = uri.host if counter == 0 then counter = 1 new_db.execute("insert into tbl_hostdomain (id, domain_name) values('#{counter}','#{uri.host}');") end search_flag = new_db.execute("select id from tbl_hostdomain where domain_name ='#{uri.host}' ;") if search_flag.any? then p counter,uri.host next else counter += 1 new_db.execute("insert into tbl_hostdomain (id, domain_name) values('#{counter}','#{uri.host}');") CSV.open("imagefile-hostdomain"+"#{num}"+".csv","a++") do |f| f << [uri.host.to_s] end end end next #この後はスキップされます ###### ###### CSV.open("image-list_"+"#{num}"+".csv","a+") do |f| f << [author,book,ex_id,URL,title,isbn,image] end end end sleep 0.02 end } new_db.close db1.close db2.close;nil