Hinweis. Aus irgendeinem Grund kann ich nur bis zu 20 herunterladen. ..
Wie Sie kommentiert haben, habe ich es auf 400dl erhöht, während ich Javascript mit Selen aktiviert habe. Benötigt einen Chrome-Treiber. https://sites.google.com/a/chromium.org/chromedriver/downloads
get_img.py
#-*- coding:utf-8 -*-
import os
import urllib2
import re
from bs4 import BeautifulSoup
def get_ulist_o(search_word):
#Vorherige Version
#http://stackoverflow.com/questions/20716842/python-download-images-from-google-image-search
url="https://www.google.co.in/search?q="+search_word+"&source=lnms&tbm=isch"
header = {'User-Agent': 'Mozilla/5.0'}
soup=BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)),"lxml")
ulist = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
return ulist
def get_ulist(search_word,n):
#Neue Version
from selenium import webdriver
from ast import literal_eval
if n>400:print("n should be less than 400");exit()
url="https://www.google.co.in/search?q="+search_word+"&source=lnms&tbm=isch"
chromedriver = "./chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get(url)
cnt=0
while (cnt<n):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
page_source= driver.page_source
soup=BeautifulSoup(page_source,"lxml")
soup= soup.find_all('div', class_="rg_meta")
cnt=len(soup)
else:
driver.quit()
ulist=[]
for i in soup:
dic=i.text.replace("false","False").replace("true","True")
ulist.append(literal_eval(dic)["ou"])
return ulist[:n]
def get_img(search_word,n):
FOLDERNAME=str(search_word)
if os.path.exists(FOLDERNAME)==False:
os.mkdir(FOLDERNAME)
urls=get_ulist(search_word,n)
for cntr,img in enumerate(urls):
print "[%03d]Donloading.. %s"%(cntr,img)
try:
raw_img = urllib2.urlopen(img).read()
f = open('%s/%s_%03d.jpg' % (FOLDERNAME, search_word, cntr), 'wb')
f.write(raw_img)
f.close()
except:
pass
get_img("Süße Katze",10)
Bitte verwenden Sie es nicht für irgendeinen Zweck.
Recommended Posts