Obtenez le fichier Excel sur la page officielle de Tokyo Stock Exchange et [pandas](http: / Stocker dans /pandas.pydata.org/ "pandas"). Au fait, sauvegardons-le en tant que fichier CSV.
getStockNameYJ.py
# -*- coding: utf-8 -*-
from urllib2 import *
from lxml import html
import numpy as np
import pandas as pd
base_url = 'http://www.tse.or.jp'
def getTosyo1DataURI():
contents = urlopen(base_url + '/market/data/listed_companies/index.html').read()
dom = html.fromstring(contents)
ep = dom.xpath(u'.//td[contains(text(), "Marché, partie 1 (stock national)")]')[0].getparent()
e = ep.xpath('.//a')[0]
return base_url + e.attrib['href']
def getStockNameDF():
ds = np.DataSource(None)
f = ds.open(getTosyo1DataURI())
df = pd.ExcelFile(f).parse('Sheet1')
f.close()
return pd.DataFrame({'code': df[u"code"].astype('int64'), 'name': df[u"nom commercial"]})
def saveCSV(df):
df[['code','name']].to_csv('tosyo1.csv', index=False, encoding='utf-8')
if __name__ == '__main__':
df = getStockNameDF()
#df.save('stock_name.pdpy')
#df = pd.DataFrame.load('stock_name.pdpy')
saveCSV(df)
Recommended Posts