Dies ist der Code, um den Handle-Namen mit Beautifulsoup aus dem Artikel von Business Insider mit dem Titel "100 einflussreichste technische Twitter-Benutzer" zu extrahieren und die Twitter-Kontoinformationen mit der Twitter-REST-API abzurufen.
The 100 Most Influential Tech People On Twitter
Eine solche Seite
# coding: utf-8
from bs4 import BeautifulSoup
import requests,json
from requests_oauthlib import OAuth1Session
from requests.exceptions import ConnectionError, ReadTimeout, SSLError
res = requests.get("http://www.businessinsider.com/100-influential-tech-people-on-twitter-2014-4?op=1")
soup = BeautifulSoup(res.text)
count = 100
user_list = []
user = {}
for line in soup.body.get_text().split('\n'):
if line.find('Occupation:') > -1:
if len(user.keys()) != 0:
user_list.append(user)
user = {}
print count, line
user['rank'] = count
user['occupation'] = line.replace('Occupation:','').replace(u'\xa0','')
count -=1
for c in ['@','Why:','Tech PI:','PI:']:
if line.find(c) > -1:
print ' ', line
if line.find('Tech PI:') >-1 :
user['tech_pi'] = line.replace('Tech PI:','').replace(u'\xa0','')
elif line.find('PI:') >-1:
user['pi'] = line.replace('PI:','').replace(u'\xa0','')
elif line.find('@') >-1 :
user['handle'] = line.replace('Handle:','') .replace(u'\xa0','').replace(u'@','')
elif line.find('Why:') >-1 :
user['Why:'] = line.replace('Why:','').replace(u'\xa0','')
break
handle_list = [d['handle'] for d in user_list]
KEYS = { #Legen Sie den Schlüssel fest, den Sie mit Ihrem Konto erhalten haben
'consumer_key':'**********',
'consumer_secret':'**********',
'access_token':'**********',
'access_secret''**********',
}
twitter = OAuth1Session(KEYS['consumer_key'],KEYS['consumer_secret'],
KEYS['access_token'],KEYS['access_secret'])
url = 'https://api.twitter.com/1.1/users/lookup.json?'
params = {'screen_name':','.join(handle_list)}
req = twitter.get(url, params = params)
user_list = json.loads(req.text)
for u in user_list:
d_data = json.dumps(u, sort_keys=True, indent=4)
print d_data
Die Ausgabe sieht so aus.
100 Occupation: CEO/founder of News Corporation; Creator of FOX Broadcasting
Handle: @rupertmurdoch
Why: See how tech fits into the greater news cycle from Rupert himself. Yeah, he writes his own tweets.
Tech PI: 83
PI: 86
99 Occupation: Assistant professor at the University of North Carolina, Chapel Hill with her own tech site at www.technosociology.org
Handle: @zeynep
Why: Catch Zeynep's musings on everything ranging from international Web policies to social justice.
Tech PI: 84
PI: 77
98 Occupation: Data Scientist in Residence at Accel, Scientist Emeritus at bitly, co-founder of HackNY, co-host of DataGotham, and member of NYCResistor
Handle: @hmason
Why: Hilary is on top of the chatter when it comes to today's tech news.
Tech PI: 84
PI: 77
・
・
・
Die von der REST-API abgerufenen Informationen werden von json beibehalten.
"contributors_enabled": false,
"created_at": "Sat Dec 31 18:29:24 +0000 2011",
"default_profile": true,
"default_profile_image": false,
"description": "",
"entities": {
"description": {
"urls": []
}
},
"favourites_count": 13,
"follow_request_sent": false,
"followers_count": 570445,
"following": false,
"friends_count": 96,
"geo_enabled": false,
"id": 451586190,
"id_str": "451586190",
"is_translation_enabled": false,
"is_translator": false,
"lang": "en",
"listed_count": 7145,
"location": "",
"name": "Rupert Murdoch ",
"notifications": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png ",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png ",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/1732184156/Twitter_normal.jpg ",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/1732184156/Twitter_normal.jpg ",
"profile_link_color": "0084B4",
"profile_location": null,
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"protected": false,
"screen_name": "rupertmurdoch",
"status": {
"contributors": null,
"coordinates": null,
"created_at": "Fri Apr 10 12:33:22 +0000 2015",
"entities": {
"hashtags": [],
"symbols": [],
"urls": [],
"user_mentions": []
},
"favorite_count": 63,
"favorited": false,
"geo": null,
"id": 586507259578032128,
"id_str": "586507259578032128",
"in_reply_to_screen_name": null,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"lang": "en",
"place": null,
"retweet_count": 89,
"retweeted": false,
"source": "<a href=\"http://twitter.com/#!/download/ipad\" rel=\"nofollow\">Twitter for iPad</a>",
"text": "Guardian today suggests my dad's expose of Gallipoli fiasco led to my anti-establishment views. Maybe, but confirmed by many later \nevents.",
"truncated": false
},
"statuses_count": 1423,
"time_zone": null,
"url": null,
"utc_offset": null,
"verified": true
・
・
・
Recommended Posts