Get weather information with Python & scraping

The PG that is obtained by scraping the meteorological data released by the Japan Meteorological Agency and output as a csv file is summarized as an amnesia.


How to get weather information using the weather API in Python List of precautions for web scraping [Python Web scraping technique collection "There is no value that cannot be obtained" JavaScript support @ Added 6/12] (


OS : Windows10 64 bit python : 3.7.4

Main package name Use
BeautifulSoup Web page analysis & tag search&Data shaping
logging Log output and settings
namedtuple Tuple operation
pandas For Dataframe operations
requests Get web page


It's easy to do. ① Use a package called requests to get the data in the web page you want to get


r = requests.get('URL')

② Analyze the contents with a package called Beautiful Soup


soup = BeautifulSoup(r.text,'lxml')
                rows = soup.findAll("tr",class_="mtx") #Get tags by narrowing down the conditions

③ Collect as Dataframe with pandas and output as csv


       weatherData = pd.DataFrame(dataList[1:])
       #Column name settings
       weatherData.columns  = nameList
       #Output as csv file

Creation example

# coding: UTF-8
#Weather information acquisition process
#Target data:Weather information in Fukuoka prefecture
#Processing content: Acquire weather information from the Japan Meteorological Agency and output a csv file
#* Confirmed that it can be used for commercial purposes according to the Japan Meteorological Agency Terms of Use.
# python ver = 3.7.4
import os
import sys
import requests 
import logging.config
from time import time
import datetime as dt
import sqlite3
import pprint
import pandas as pd
from bs4 import BeautifulSoup 
from collections import namedtuple
import csv
#Variable list
now = #Current date and time
now =now.strftime("%Y%m%d")

#File location where the exe resides
#Set the working folder two before the absolute path where CODE exists
#Change it to specify where you want it to be your work file, if needed
os.chdir(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../"))
exePath = os.getcwd()

place_codeA = [82]
place_codeB = [47807]
place_name = ["Fukuoka"] 
nameList = ["point","date", "Average air pressure on land(hPa)", "Average atmospheric pressure in the sea(hPa)","'Precipitation(mm)", "Average temperature(℃)", "Average humidity(%)", "Average wind speed(m/s)", "Daylight hours(h)","Noon(06:00-18:00)","Night(18:00-Next day 06:00)"]
dataList = [nameList]
collist = ["Point"]
base_url = ""

#List of named tuple lists
#[ ONL : OutputNameList]output/Tuple of column name for aggregation processing
#Column name 0-9,10-19,20-29
partsO =["PT","TIME", "EHPA","SHPA", "RAIN", "TEMPER", "HUMID","WIND","SUM","AM","PM"]

OLNM = namedtuple("ONL", partsO)

OCNL = OLNM("point","date", "Average air pressure on land(hPa)", "Average atmospheric pressure in the sea(hPa)","'Precipitation(mm)", "Average temperature(℃)", "Average humidity(%)", "Average wind speed(m/s)", "Daylight hours(h)","Noon(06:00-18:00)","Night(18:00-Next day 06:00)")

#Function list
# ParseFloat:Convert a string to a float type
def ParseFloat(str):
    return float(str)
    return 0.0
#Main processing
def main():
   returnValue = 0
   #Log settings
   logging.config.fileConfig(f"{exePath}/python/logging.conf", defaults={'logfilename': f"log/UI_{now}.log" })
   logger = logging.getLogger()"Meteorological information acquisition processing Start recording")
   startYear = 2018
   endYear = 2018
       #Loop at place
       for place in place_name:
          index = place_name.index(place)
          #2018 in for statement~Until 2019
          for year in range(startYear,endYear + 1):
             #January of the year~Covers 12 times in December.
             for month in range(1,13):
                #Apply two city codes, year and month.
                r = requests.get(base_url%(place_codeA[index], place_codeB[index], year, month))
                r.encoding = r.apparent_encoding
                #Scraping the target table.
                soup = BeautifulSoup(r.text,'lxml')
                rows = soup.findAll("tr",class_="mtx") #Specify the tag and specify the class name
                rows = rows[4:]
                #Acquires one line from the 1st to the last day
                for row in rows:
                   data = row.findAll("td")
                   #Extract only the information you want
                   rowData = [] #Initialization
                   rowData.append(place) #point
                   rowData.append(str(year) + "/" + str(month) + "/" + str(data[0].string)) #date
                   rowData.append(ParseFloat(data[1].string)) #Average air pressure on land(hPa)
                   rowData.append(ParseFloat(data[2].string)) #Average atmospheric pressure in the sea(hPa)
                   rowData.append(ParseFloat(data[3].string)) # 'Precipitation(mm)
                   rowData.append(ParseFloat(data[6].string)) #Average temperature(℃)
                   rowData.append(ParseFloat(data[9].string)) #Average humidity(%)
                   rowData.append(ParseFloat(data[11].string)) #Average wind speed(m/s)
                   rowData.append(ParseFloat(data[16].string)) #Daylight hours(h)
                   rowData.append(data[19].string) #Noon(06:00-18:00)
                   rowData.append(data[20].string) #Night(18:00-Next day 06:00)
                   #Add data to the next line
       #Convert to Dataframe
       weatherData = pd.DataFrame(dataList[1:])
       #Column name settings
       weatherData.columns  = nameList
       #Output as csv file
   #When an exception occurs
   except:"Weather information acquisition process Abnormal end")
       logger.exception("[Ended abnormally]")
   #When it ends normally
   else:"Weather information acquisition process completed normally")
       print("{0}".format(returnValue), end="")
#If you did not import it as a package, do the following
if __name__ == "__main__":

