[PYTHON] Download the top n Google image searches

Note. For some reason, I can only download up to 20. ..

7/13 postscript

As you commented, I increased it to 400dl while enabling javascript using selenium. Requires a chrome driver. https://sites.google.com/a/chromium.org/chromedriver/downloads

get_img.py


#-*- coding:utf-8 -*-
import os
import urllib2
import re
from bs4 import BeautifulSoup

def get_ulist_o(search_word):
    #Previous version
    #http://stackoverflow.com/questions/20716842/python-download-images-from-google-image-search
    url="https://www.google.co.in/search?q="+search_word+"&source=lnms&tbm=isch"
    header = {'User-Agent': 'Mozilla/5.0'}
    soup=BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)),"lxml") 
    ulist = [a['src'] for a in soup.find_all("img", {"src": re.compile("gstatic.com")})]
    return ulist

def get_ulist(search_word,n):
    #New version
    from selenium import webdriver
    from ast import literal_eval
    if n>400:print("n should be less than 400");exit()

    url="https://www.google.co.in/search?q="+search_word+"&source=lnms&tbm=isch"
    chromedriver = "./chromedriver"
    driver = webdriver.Chrome(chromedriver)
    driver.get(url)

    cnt=0
    while (cnt<n):
      driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
      page_source= driver.page_source
      soup=BeautifulSoup(page_source,"lxml")
      soup= soup.find_all('div', class_="rg_meta")
      cnt=len(soup)
    else:
      driver.quit()

    ulist=[]
    for i in soup:
      dic=i.text.replace("false","False").replace("true","True")
      ulist.append(literal_eval(dic)["ou"])
    return ulist[:n]

def get_img(search_word,n):
    FOLDERNAME=str(search_word)
    if os.path.exists(FOLDERNAME)==False:
      os.mkdir(FOLDERNAME)

    urls=get_ulist(search_word,n)

    for cntr,img in enumerate(urls): 
      print "[%03d]Donloading.. %s"%(cntr,img)
      try:
          raw_img = urllib2.urlopen(img).read()
          f = open('%s/%s_%03d.jpg' % (FOLDERNAME, search_word, cntr), 'wb')
          f.write(raw_img)
          f.close()
      except:
          pass

get_img("Cute cat",10)


at the end

Please do not use it for any purpose.

Recommended Posts

Download the top n Google image searches
Judge the extension and download the image
[Python] Download original images from Google Image Search
Download the image from the text file containing the URL
A program that searches for the same image
Download the csv file created by Google Colaboratory
[Small story] Download the image of Ghibli immediately
Get the image of "Suzu Hirose" by Google image search.
Since google image download did not work, it corresponds
Scraping google search (image)
Download the top 10 views from one Youtube channel at once
How to deal with UnicodeDecodeError when executing google image download