"""
32.The original form of the verb
Extract all the original forms of the verb
[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
  {'surface': 'one', 'base': 'one', 'pos': 'noun', 'pos1': 'number'},
  {'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
 [{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
  {'surface': 'I', 'base': 'I', 'pos': 'noun', 'pos1': '代noun'},
  {'surface': 'Is', 'base': 'Is', 'pos': 'Particle', 'pos1': '係Particle'},
  {'surface': 'Cat', 'base': 'Cat', 'pos': 'noun', 'pos1': 'General'},
  {'surface': 'so', 'base': 'Is', 'pos': 'Auxiliary verb', 'pos1': '*'},
  {'surface': 'is there', 'base': 'is there', 'pos': 'Auxiliary verb', 'pos1': '*'},
  {'surface': '。', 'base': '。', 'pos': 'symbol', 'pos1': 'Kuten'},
  {'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
"""
import itertools
from typing import List
import utils
def get_verb(sentence: List[dict]) -> List[str, str]:
    result = []
    for word in sentence:
        if word["pos"] == "verb":
            result.append(word["base"])
    return result
data = utils.read_json("30_neko_mecab.json")
verbs = [get_verb(sentence) for sentence in data if get_verb(sentence)]
# [['Born', 'Tsukuri'], ['To do', 'cry', 'To do', 'Is'], ['start', 'to see'], ['listen'], ['capture', 'Boil', 'Eat']]
flat = list(itertools.chain(*verbs))
# ['Born', 'Tsukuri', 'To do', 'cry', 'To do']
        Recommended Posts