"""
32.Prototyp des Verbs
Extrahieren Sie alle Originalformen des Verbs
[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'einer', 'base': 'einer', 'pos': 'Substantiv', 'pos1': 'Nummer'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'ich', 'base': 'ich', 'pos': 'Substantiv', 'pos1': '代Substantiv'},
{'surface': 'Ist', 'base': 'Ist', 'pos': 'Partikel', 'pos1': '係Partikel'},
{'surface': 'Katze', 'base': 'Katze', 'pos': 'Substantiv', 'pos1': 'Allgemeines'},
{'surface': 'damit', 'base': 'Ist', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': 'Gibt es', 'base': 'Gibt es', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': '。', 'base': '。', 'pos': 'Symbol', 'pos1': 'Phrase'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
"""
import itertools
from typing import List
import utils
def get_verb(sentence: List[dict]) -> List[str, str]:
result = []
for word in sentence:
if word["pos"] == "Verb":
result.append(word["base"])
return result
data = utils.read_json("30_neko_mecab.json")
verbs = [get_verb(sentence) for sentence in data if get_verb(sentence)]
# [['Geboren', 'Tsukuri'], ['Machen', 'Schrei', 'Machen', 'Ist'], ['Start', 'sehen'], ['Hör mal zu'], ['Erfassung', 'Kochen', 'Essen']]
flat = list(itertools.chain(*verbs))
# ['Geboren', 'Tsukuri', 'Machen', 'Schrei', 'Machen']
Recommended Posts