"""
36.Top 10 der häufigsten Wörter
Zeigen Sie die 10 häufig vorkommenden Wörter und ihre Häufigkeit in einem Diagramm an (z. B. ein Balkendiagramm).
data:
[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'einer', 'base': 'einer', 'pos': 'Substantiv', 'pos1': 'Nummer'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': 'ich', 'base': 'ich', 'pos': 'Substantiv', 'pos1': '代Substantiv'},
{'surface': 'Ist', 'base': 'Ist', 'pos': 'Partikel', 'pos1': '係Partikel'},
{'surface': 'Katze', 'base': 'Katze', 'pos': 'Substantiv', 'pos1': 'Allgemeines'},
{'surface': 'damit', 'base': 'Ist', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': 'Gibt es', 'base': 'Gibt es', 'pos': 'Hilfsverb', 'pos1': '*'},
{'surface': '。', 'base': '。', 'pos': 'Symbol', 'pos1': 'Phrase'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
Memo:
-Japanisch mit matplotlib anzeigen
"""
from collections import Counter
from typing import List
import matplotlib.pyplot as plt
import utils
plt.style.use("ggplot")
plt.rcParams["font.family"] = "Hiragino Mincho ProN" #Japanische Unterstützung
def get_tf(sentence_list: List[List[dict]]) -> list:
words = [word["surface"] for sent in sentence_list for word in sent[1:-1]]
c = Counter(words)
return c.most_common()
def plot_tf(x: list, y: list) -> None:
x_pos = [i for i, _ in enumerate(x)]
plt.bar(x, y)
plt.xlabel("Term")
plt.ylabel("Frequency")
plt.title("TF Graph")
plt.xticks(x_pos, x)
plt.show()
data = utils.read_json("30_neko_mecab.json")
counter = get_tf(data)
# [('von', 9194), ('。', 7486)]
x = [word[0] for word in counter[:10]]
y = [word[1] for word in counter[:10]]
plot_tf(x, y)
# ![image-20200527192929567](https://raw.githubusercontent.com/LearnXu/images/master/imgs/image-20200527192929567.png)
Recommended Posts