[PYTHON] Get Twitter userData

Since there is only screen_name in the follower list of twitter, I decided to add the user id information to the follower list of twitter.

Twitter API GET users/show https://dev.twitter.com/rest/reference/get/users/show

Twitter follower list data (csv)

name account followdate
Kuwa kazhu 2015/12/26
Giri noko123 2015/12/26
Taro taro123 2017/09/21

API limit users / show up to 900/15 minutes

API restrictions are troublesome on Twitter, but the way to avoid it is by the method of the administrator of Code 7 Ward. I am always grateful for your help. Get a lot of tweets with TwitterAPI. Consider server-side errors (in python)

twitter_user_get.py



# coding=utf-8

import sys
import tweetkey
import os
from requests_oauthlib import OAuth1Session
import csv
import time
import json
import datetime


def init():

    #Set various keys from the usual external file
    CONSUMER_KEY = tweetkey.twkey['CONSUMER_KEY']
    CONSUMER_SECRET = tweetkey.twkey['CONSUMER_SECRET']
    ACCESS_TOKEN = tweetkey.twkey['ACCESS_TOKEN']
    ACCESS_SECRET = tweetkey.twkey['ACCESS_SECRET']

    twitter = OAuth1Session(CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_SECRET)

    return(twitter)

def twittergetter(input_name,output_name,api):

    TweetIDList = []
    with open(input_name, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  #When you want to skip the header

        #Added ID column to HEAD for output
        header.append("id")

        fout = open(output_name, 'wt')
        writer = csv.writer(fout)
        writer.writerow(header)

        for row in reader:
            #Get account from array 2 of csv and throw
            screen_name = row[2]
            print(screen_name)
            url = "https://api.twitter.com/1.1/users/show.json?screen_name=" + screen_name
            # params = {'count': 100}
            # get_users has no parameters
            req = api.get(url)

            if req.status_code == 200:
                users = json.loads(req.text)
                print(users)
                #userid is in id
                id = users["id"]
                print(id)
                #Add the returned id to the array and write
                row.append(id)
                writer.writerow(row)

            else:
          #At 404, screen_Since the name has already changed or withdrawal, write as it is
                if req.status_code == 404:
                    writer.writerow(row)
                else:
                    print ("Error: %d" % req.status_code)
                    time.sleep(240)

            #Header confirmation (number of times limit)
            # X-Rate-Limit-Check because it is rare that Remaining is not included
            if ('X-Rate-Limit-Remaining' in req.headers and 'X-Rate-Limit-Reset' in req.headers):
                if (int(req.headers['X-Rate-Limit-Remaining']) == 0):
                    waitUntilReset(int(req.headers['X-Rate-Limit-Reset']))
                    checkLimit()
            else:
                print('not found  -  X-Rate-Limit-Remaining or X-Rate-Limit-Reset')
                checkLimit()

        fout.close()
    return()


def checkLimit():
    '''
Query the limit and wait until it becomes accessible
    '''
    unavailableCnt = 0
    while True:
        url = "https://api.twitter.com/1.1/application/rate_limit_status.json"
        res = api.get(url)
        print(res.text)

        if res.status_code == 503:
            # 503 : Service Unavailable
            if unavailableCnt > 10:
                raise Exception('Twitter API error %d' % res.status_code)

            unavailableCnt += 1
            print('Service Unavailable 503')
            waitUntilReset(time.mktime(datetime.datetime.now().timetuple()) + 30)
            continue

        unavailableCnt = 0

        if res.status_code != 200:
            raise Exception('Twitter API error %d' % res.status_code)

        remaining, reset = getLimitContext(json.loads(res.text))
        if (remaining == 0):
            waitUntilReset(reset)
        else:
            break

def getLimitContext(res_text):
    '''
Get information on the number of times limit (at startup)
    '''
    remaining = res_text['resources']['users']['/users/show/:id']['remaining']
    reset = res_text['resources']['users']['/users/show/:id']['reset']
    print(reset)
    return int(remaining), int(reset)

def waitUntilReset(reset):
    '''
sleep until reset time
    '''
    seconds = reset - time.mktime(datetime.datetime.now().timetuple())
    seconds = max(seconds, 0)
    print('\n     =====================')
    print('     == waiting %d sec ==' % seconds)
    print('     =====================')
    sys.stdout.flush()
    time.sleep(seconds + 10)  #Just in case+10 seconds

if __name__ == '__main__':
    api = init()
    input_name = os.getcwd() + "/twitterList.csv"
    output_name = os.getcwd() + "/twitterList_add_id.csv"
    twittergetter(input_name,output_name, api)

However, when I try to convert screen_name for 120,000 people, it doesn't end even after 2 days. .. ..

Recommended Posts

Get Twitter userData
Get Twitter Trends
Get Twitter timeline with python
Get celebrity tweet history from twitter
Get data from Twitter using Tweepy
Get delay information on Twitter and tweet
Get images by keyword search from Twitter
Get Twitter bookmarks on CentOS using Selenium
[python] Get Twitter timeline for multiple users
Program to get favorite images on Twitter
Get a lot of Twitter tweets at once