Please take responsibility for the acquisition of the content.
from tqdm import tqdm
import urllib.parse
import time
jp_keyword = ''#Enter a search keyword
page_num=int()#()Enter the total number of pages in
#URL encode Japanese
keyword = urllib.parse.quote(jp_keyword)
#Title list
title_list=[]
#Post date list
date_list=[]
for i in tqdm(range(1,page_num-1)):
url="https://news.yahoo.co.jp/search/?p="+keyword+"&st=n&ei=UTF-8&b="+str(i)+"1"
print(url)
res = requests.get(url)
#Leave a gap so as not to overload the server
time.sleep(2)
#Create a BeautifulSoup object from the response HTML
soup = BeautifulSoup(res.content, 'html.parser')
#Get the title tag string
title_text = soup.find_all('h2')
for x in title_text:
title_list.append(x.text)
date_text=soup.find_all('span', class_="d")
for x in date_text:
date_list.append(x.text)
Quick Start: Natural Language API Settings (https://cloud.google.com/natural-language/docs/setup?hl=ja) Use API Keys (https://cloud.google.com/docs/authentication/api-keys?hl=ja)
key=""#Enter API key
#API URL
url = 'https://language.googleapis.com/v1/documents:analyzeSentiment?key=' + key
def sentimental(text):
header = {'Content-Type': 'application/json'}
body = {
"document": {
"type": "PLAIN_TEXT",
"language": "JA",#Specify language
"content": text
},
"encodingType": "UTF8"
}
#Receive the result in json format.
response = requests.post(url, headers=header, json=body).json()
#Return score
return response["documentSentiment"]["score"]
Put the score in the score list
score_list=[]
for word in tqdm(wordlist):
score_list.append(sentimental(word))
import pandas as pd
df = pd.DataFrame()
df["word"]=title_list
df["date"]=date_list
df["score"]=score_list
import pickle
with open('sentimental_df.pickle', 'wb') as web:
pickle.dump(df , web)
import pickle
with open('sentimental_df.pickle', 'rb') as web:
df = pickle.load(web)
print (df)
Use API Keys (https://cloud.google.com/docs/authentication/api-keys?hl=ja) Sentiment analysis by hitting Google Natural Language API with Python Natural Language Improve development efficiency! How to use pickle in Python [for beginners] URL encoding / decoding in Python (urllib.parse.quote, unquote) Quick Start: Natural Language API Settings (https://cloud.google.com/natural-language/docs/setup?hl=ja)
Recommended Posts