"""
30.Reading morphological analysis results Permalink
Morphological analysis result (neko.txt.Implement a program that reads mecab).
However, each morpheme is stored in a mapping type with the key of surface, uninflected, part of speech (pos), and part of speech subclassification 1 (pos1).
Express one sentence as a list of morphemes (mapping type). For the rest of the problems in Chapter 4, use the program created here.
- `neko.txt.mecab`Is`ans30.sh`Created in.
-Future questions`30_neko_mecab.json`use
Part of speech,Part of speech細分類1,Part of speech細分類2,Part of speech細分類3,Inflected form,Utilization type,Prototype,reading,pronunciation
['noun', '代noun', 'General', '*', '*', '*', 'I', 'Wagamama', 'Wagamama']
"""
from typing import List
import MeCab
import utils
def read_file(path: str) -> List[str]:
data = []
with open(path) as f:
for line in f:
line = line.strip()
if line != "":
data.append(line)
return data
def parse(sent: str) -> List[str]:
node = tagger.parseToNode(sent)
result = []
while node:
node_dic = {}
features = node.feature.split(",")
node_dic["surface"] = node.surface #Surface
node_dic["base"] = features[6] #Uninflected word (base)
node_dic["pos"] = features[0] #Part of speech (pos)
node_dic["pos1"] = features[1] #Part of speech subclassification 1 (pos1)
result.append(node_dic)
node = node.next
return result
file_path = "neko.txt"
data = read_file(file_path)
# ['one', 'I am a cat.', 'There is no name yet.', 'I have no idea where I was born.']
tagger = MeCab.Tagger("-r /usr/local/etc/mecabrc")
result = [parse(sent) for sent in data]
# ans30
utils.save_json(result, "30_neko_mecab.json")
data = utils.read_json("30_neko_mecab.json")
utils.py
:
import itertools
import json
from typing import Any, List
def save_json(data: Any, save_path: str) -> None:
"""Save data to json format.
Args:
data (Any): The data to store.
save_path (str): Path to save.
"""
with open(save_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
def read_json(path: str) -> List[Any]:
"""Read json data
Args:
path (str): Path of file to read.
Returns:
List[Any]: FTSE entity data.
"""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return data
def flat(sequence: List[List[Any]]) -> List[Any]:
return list(itertools.chain(*sequence))
Recommended Posts