[PYTHON] 100 language processing knocks (2020): 31

"""
31.verb
Extract all the surface forms of the verb.

[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
  {'surface': 'one', 'base': 'one', 'pos': 'noun', 'pos1': 'number'},
  {'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
 [{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
  {'surface': 'I', 'base': 'I', 'pos': 'noun', 'pos1': '代noun'},
  {'surface': 'Is', 'base': 'Is', 'pos': 'Particle', 'pos1': '係Particle'},
  {'surface': 'Cat', 'base': 'Cat', 'pos': 'noun', 'pos1': 'General'},
  {'surface': 'so', 'base': 'Is', 'pos': 'Auxiliary verb', 'pos1': '*'},
  {'surface': 'is there', 'base': 'is there', 'pos': 'Auxiliary verb', 'pos1': '*'},
  {'surface': '。', 'base': '。', 'pos': 'symbol', 'pos1': 'Kuten'},
  {'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
"""

import itertools
from typing import List

import utils


def get_verb(sentence: List[dict]) -> List:
    result = []
    for word in sentence:
        if word["pos"] == "verb":
            result.append(word["surface"])
    return result


data = utils.read_json("30_neko_mecab.json")
verbs = [get_verb(sentence) for sentence in data if get_verb(sentence)]
# [['Born', 'Tsuka'], ['Shi', 'Crying', 'Shi', 'Is'], ['start', 'You see'], ['listen'], ['Catch', 'Boiled', 'Eat']]
flat = list(itertools.chain(*verbs))
#  ['Born', 'Tsuka', 'Shi', 'Crying', 'Shi']

Recommended Posts

100 language processing knocks 03 ~ 05
100 language processing knocks (2020): 40
100 language processing knocks (2020): 32
100 language processing knocks (2020): 35
100 language processing knocks (2020): 47
100 language processing knocks (2020): 39
100 language processing knocks (2020): 22
100 language processing knocks (2020): 42
100 language processing knocks (2020): 29
100 language processing knocks (2020): 49
100 language processing knocks (2020): 45
100 language processing knocks (2020): 10-19
100 language processing knocks (2020): 30
100 language processing knocks (2020): 00-09
100 language processing knocks (2020): 31
100 language processing knocks (2020): 48
100 language processing knocks (2020): 44
100 language processing knocks (2020): 41
100 language processing knocks (2020): 37
100 language processing knocks (2020): 25
100 language processing knocks (2020): 23
100 language processing knocks (2020): 33
100 language processing knocks (2020): 20
100 language processing knocks (2020): 27
100 language processing knocks (2020): 46
100 language processing knocks (2020): 21
100 language processing knocks (2020): 36
100 amateur language processing knocks: 41
100 amateur language processing knocks: 71
100 amateur language processing knocks: 56
100 amateur language processing knocks: 24
100 amateur language processing knocks: 59
100 amateur language processing knocks: 70
100 amateur language processing knocks: 60
100 amateur language processing knocks: 92
100 amateur language processing knocks: 30
100 amateur language processing knocks: 06
100 amateur language processing knocks: 84
100 amateur language processing knocks: 81
100 amateur language processing knocks: 33
100 amateur language processing knocks: 40
100 amateur language processing knocks: 45
100 amateur language processing knocks: 43
100 amateur language processing knocks: 55
100 amateur language processing knocks: 22
100 amateur language processing knocks: 61
100 amateur language processing knocks: 94
100 amateur language processing knocks: 54
100 amateur language processing knocks: 04
100 amateur language processing knocks: 63
100 amateur language processing knocks: 12
100 amateur language processing knocks: 14
100 amateur language processing knocks: 08
100 amateur language processing knocks: 42
100 language processing knocks ~ Chapter 1
100 amateur language processing knocks: 19
100 amateur language processing knocks: 73
100 amateur language processing knocks: 75
100 amateur language processing knocks: 98
100 amateur language processing knocks: 83
100 amateur language processing knocks: 95