I'm 9uant, who is doing quantitative analysis and data science in the financial area. We also do twitter, so if you are interested, please follow us!
Since it was troublesome to acquire the table data that corresponds the securities code and EDINET code, I would like to share it.
The securities code is a 4-digit number used to obtain stock price information of listed companies. Actually, add 0 to the end to make it 5 digits. The EDINET code is an alphabet + number used when acquiring company information on EDINET, which is a database of financial results information.
We will introduce how to convert a 4-digit securities code to an EDINET code.
[EDINET Taxomino and Code List](https://disclosure.edinet-fsa.go.jp/E01EW/BLMainController.jsp?uji.bean=ee.bean.W1E62071.EEW1E62071Bean&uji.verb=W1E62071InitDisplay&TID=W1E62071&PID=W0EZ0001&SSION There is an "EDINET code list" at the bottom of 2 & dflg = 0 & iflg = 0). This time, we will use Python to get this csv data.
You need to download the chrome driver in advance. Please download from chromedriver download site according to your chrome version.
get_edinet_code_csv.py
import glob
import os
import shutil
import time
from selenium import webdriver
import zipfile
def get_edinet_code_csv(edinetcode_dir):
'''
Download the EDINET code list csv file to the specified directory
Prameter:
edinetcode_dir: str
Directory for downloading EDINET code list csv files
Return:
edinet_code_list_path: str
The path where the EDINET code list csv file resides
'''
'''
#Delete the directory if it already exists
if os.path.exists(edinetcode_dir):
shutil.rmtree(edinetcode_dir)
'''
#Download zip file from chrome with selenium
chromeOptions = webdriver.ChromeOptions()
prefs = {"download.default_directory" : edinetcode_dir} #Specifying the save destination directory
chromeOptions.add_experimental_option("prefs",prefs)
chromeOptions.add_argument('--headless') #Hide browser
driver = webdriver.Chrome('chromedriver path', chrome_options=chromeOptions)
#Access the EDINET code list on EDINET
driver.get('https://disclosure.edinet-fsa.go.jp/E01EW/BLMainController.jsp?uji.bean=ee.bean.W1E62071.EEW1E62071Bean&uji.verb=W1E62071InitDisplay&TID=W1E62071&PID=W0EZ0001&SESSIONKEY=&lgKbn=2&dflg=0&iflg=0')
driver.execute_script("EEW1E62071EdinetCodeListDownloadAction('lgKbn=2&dflg=0&iflg=0&dispKbn=1');")
time.sleep(5)
driver.quit()
#Get the path of the downloaded zip file
list_of_files = glob.glob(edinetcode_dir+r'/*') #Add wildcard
latest_file = max(list_of_files, key=os.path.getctime) #Get the file path with the latest creation date and time
#Extract the zip file to the same directory
zip_f = zipfile.ZipFile(latest_file)
zip_f.extractall(edinetcode_dir)
zip_f.close()
#Delete zip file
os.remove(latest_file)
list_of_files = glob.glob(edinetcode_dir+r'/*') #Add wildcard
return max(list_of_files, key=os.path.getctime) #Returns the path of the extracted csv file
Converts the securities code array to the EDINET code array.
stockcode_to_edinetcode.py
import numpy as np
import pandas as pd
edinet_code_path=get_edinet_code_csv(r"EDINET code list csv download destination directory path")
edinet_code_df=pd.read_csv(edinet_code_path,encoding="cp932",header=1,usecols=['EDINET code', 'Submitter name', 'Securities code'])
def stockcode_to_edinetcode(codes):
'''
Get the EDINET code array corresponding to the security code (array)
Parameter:
codes: int or float or str or list
Securities code or its array
Return:
edinet_codes: list
An array of EDINET code corresponding to the order of arguments
'''
#Convert all arguments to an array
if type(codes) in (str, int, float):
codes = [int(codes)]
edinet_codes = []
for code in codes:
#Convert 4-digit securities code to 5-digit
if len(str(int(code)))==4:
code = str(int(code))+'0'
tmp = edinet_code_df[edinet_code_df['Securities code']==int(code)]['EDINET code']
if len(tmp)==0: #Np if the corresponding EDINET code does not exist.Returns nan
edinet_codes.append(np.nan)
else:
edinet_codes.append(tmp.to_list()[0])
return edinet_codes
Recommended Posts