"""
33."A B" Permalink
Extrahieren Sie die Nomenklatur, in der zwei Nomenklaturen durch "Nein" verbunden sind.
[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'einer', 'base': 'einer', 'pos': 'Substantiv', 'pos1': 'Nummer'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'ich', 'base': 'ich', 'pos': 'Substantiv', 'pos1': '代Substantiv'},
{'surface': 'Ist', 'base': 'Ist', 'pos': 'Partikel', 'pos1': '係Partikel'},
{'surface': 'Katze', 'base': 'Katze', 'pos': 'Substantiv', 'pos1': 'Allgemeines'},
{'surface': 'damit', 'base': 'Ist', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': 'Gibt es', 'base': 'Gibt es', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': '。', 'base': '。', 'pos': 'Symbol', 'pos1': 'Phrase'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
"""
import itertools
from typing import List
import utils
def get_none_phase(sentence: List[dict]) -> List[str]:
result = []
for i, word in enumerate(sentence):
if (
word["surface"] == "von"
and sentence[i - 1]["pos"] == "Substantiv"
and sentence[i + 1]["pos"] == "Substantiv"
):
result.append(sentence[i - 1]["surface"] + "von" + sentence[i + 1]["surface"])
return result
data = utils.read_json("30_neko_mecab.json")
none_phases = [get_none_phase(sentence) for sentence in data]
# In [75]: none_phases[:10]
# Out[75]: [[], [], [], [], [], [], [], [], [], ['Seine Handfläche']]
flat = list(itertools.chain(*none_phases))
# ['Seine Handfläche', 'Auf der Handfläche', 'Studentengesicht', 'Sollte Gesicht', 'Mitten im Gesicht']
Recommended Posts