In this article, we use tweepy to set a specific tweet + a reply to the corresponding tweet. Describe the code to get. In the code below, over 100 replies were posted as an example Collects tweet and reply information.
The set of tweets and replies I got is "Id" included in the status of the tweet and "in_reply_to_status_id" included in the status of the reply The destination can be linked by collating.
Please note that the free version of the Twitter API has a limitation that tweets older than 7 days cannot be collected.
gather.py
# coding:utf-8
import tweepy
import csv
import time
from datetime import datetime, date, timedelta
import re
#Get current date information
today = datetime.today()
#Specify the range of posting time of the tweet to be acquired(Example: From 2 days ago to today)
tweet_begin_date = datetime.strftime(today - timedelta(days=2), '%Y-%m-%d_00:00:00_JST')
tweet_end_date = datetime.strftime((today), '%Y-%m-%d_23:59:00_JST')
#Specify the range of posting time of the reply to be acquired(Example: From 2 days ago to today)
reply_begin_date = datetime.strftime(today - timedelta(days=2), '%Y-%m-%d_00:00:00_JST')
reply_end_date = datetime.strftime((today), '%Y-%m-%d_23:59:00_JST')
#Acquisition result csv file output destination directory
csv_dir = '/hoge/'
# Twitter API KEY
Consumer_key = 'xxxx'
Consumer_secret = 'xxxx'
Access_token = 'xxxx'
Access_secret = 'xxxx'
#For Twitter API authentication
def authTwitter():
        auth = tweepy.OAuthHandler(Consumer_key, Consumer_secret)
        auth.set_access_token(Access_token, Access_secret)
        api = tweepy.API(auth, retry_count=3,retry_delay=40,retry_errors=set([401, 404, 500, 502, 503, 504]), wait_on_rate_limit = True, wait_on_rate_limit_notify=True)
        return(api)
#For tweet data acquisition
def gather_tweet_and_reply(s,t):
        api = authTwitter() #Authentication
        tweet_list = []
        reply_list = []
        tweet_id_list = []
        user_id_list = []
        tweets = tweepy.Cursor(api.search, q = s,     #Search string
                 include_entities = True,   #Get all omitted links
                 tweet_mode = 'extended',   #Get all omitted tweets
                 since = tweet_begin_date,    #Specifying the collection start date and time
                 until = tweet_end_date,      #Specifying the collection end period
                 lang = 'ja').items()       #Get only Japanese tweets
        #Store searched tweets in a list
        for tweet in tweets:
                tweet_list.append([tweet.id, tweet.user.screen_name, tweet.created_at, tweet.full_text.replace('\n',''), tweet.favorite_count, tweet.retweet_count])
                tweet_id_list.append(tweet.id)
                user_id_list.append(tweet.user.screen_name)
        # user_id_The user name stored in list searches for the destination reply
        for user_id in user_id_list:
                replies = tweepy.Cursor(api.search, q = t + " to:" + str(user_id),   #Search string
                          include_entities = True,   #Get all omitted links
                          tweet_mode = 'extended',   #Get all omitted tweets
                          since = reply_begin_date,    #Specifying the reply collection start date and time
                          until = reply_end_date,      #Specifying the reply collection end date and time
                          lang = 'ja').items()       #Get only Japanese tweets
                #Countermeasures against session disconnection due to mass transmission of requests
                time.sleep(5)
                #The destination ID of the reply is tweet_id_If it is in list, store it in the list
                for reply in replies:
                        if reply.in_reply_to_status_id in tweet_id_list:
                                reply_list.append([reply.id, reply.in_reply_to_status_id, reply.user.screen_name, reply.created_at, reply.full_text.replace('\n',''), reply.favorite_count, reply.retweet_count])
        #Output result as csv
        with open(csv_dir+'tweet_'+ today.strftime('%Y%m%d_%H%M%S') + '.csv', 'w',newline='',encoding='utf-8') as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerow(["id","user","created_at","text","fav","RT"])
                writer.writerows(tweet_list)
        pass
        with open(csv_dir+'reply_'+ today.strftime('%Y%m%d_%H%M%S') + '.csv', 'w',newline='',encoding='utf-8') as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerow(["id","to_id","user","created_at","text","fav","RT"])
                writer.writerows(reply_list)
        pass
def main():
        gather_tweet_and_reply("lang:ja exclude:retweets min_replies:100","lang:ja filter:replies exclude:retweets")
if __name__ == "__main__":
        main()
・ Summary of procedures from Twitter API registration (account application method) to approval * Information as of August 2019 ・ I didn't know what I could get from the Tweepy status list, so I took it out
Recommended Posts