Analyse des gleichzeitigen Auftretens

Vorheriger Artikel [https://qiita.com/osakasho/items/0a0b50fc17c38d96c45e] Jetzt, da ich nur morphologische Analysen durchgeführt habe, werde ich auch eine Analyse des gemeinsamen Auftretens durchführen und diese grafisch darstellen.

Installieren Sie, was Sie brauchen

pip install pyvis

Code

import spacy
nlp = spacy.load('ja_ginza_nopn')
import re
import itertools
import collections
from pyvis.network import Network
import pandas as pd
import time


"""---------Demontagemodul--------"""
def sentence_separator(path, colname):
    black_list = ["test"]
    df = pd.read_csv(path, encoding="utf_8_sig")
    data = df[colname]
    sentence = []
    for d in data:
        try:
            total_ls, noun_ls, verm_ls = ginza(d)
            sentence.append(total_ls)
        except:
            pass
    return sentence

def ginza(word):
    doc = nlp(word)
    #Umfrageergebnisse
    total_ls = []
    Noun_ls = [chunk.text for chunk in doc.noun_chunks]
    Verm_ls = [token.lemma_ for token in doc if token.pos_ == "VERB"]
    for n in Noun_ls:
        total_ls.append(n)
    for v in Verm_ls:
        total_ls.append(v)
    return total_ls, Noun_ls, Verm_ls
"""-------------------------------------"""

#Textdaten erfassen.
filename = "list.csv"
file_path = filename
colname = "Text"

#Satz
sentences = sentence_separator(file_path, colname)
sentence_combinations = [list(itertools.combinations(sentence, 2)) for sentence in sentences]
sentence_combinations = [[tuple(sorted(words)) for words in sentence] for sentence in sentence_combinations]
target_combinations = []
for sentence in sentence_combinations:
    target_combinations.extend(sentence)

#Hauptverarbeitung der Netzwerkzeichnung
def kyoki_word_network():
    # got_net = Network(height="500px", width="100%", bgcolor="#222222", font_color="white", notebook=True)
    got_net = Network(height="1000px", width="95%", bgcolor="#FFFFFF", font_color="black", notebook=True)

    # set the physics layout of the network
    # got_net.barnes_hut()
    got_net.force_atlas_2based()
    got_data = pd.read_csv("kyoki.csv")[:150]

    sources = got_data['first']  # count
    targets = got_data['second']  # first
    weights = got_data['count']  # second

    edge_data = zip(sources, targets, weights)

    for e in edge_data:
        src = e[0]
        dst = e[1]
        w = e[2]

        got_net.add_node(src, src, title=src)
        got_net.add_node(dst, dst, title=dst)
        got_net.add_edge(src, dst, value=w)

    neighbor_map = got_net.get_adj_list()

    # add neighbor data to node hover data
    for node in got_net.nodes:
        node["title"] += " Neighbors:<br>" + "<br>".join(neighbor_map[node["id"]])
        node["value"] = len(neighbor_map[node["id"]])

    got_net.show_buttons(filter_=['physics'])
    return got_net

#Zusammenfassung Rumba
ct = collections.Counter(target_combinations)
print(ct.most_common())
print(ct.most_common()[:10])

#Daten vorübergehend speichern
pd.DataFrame([{'first' : i[0][0], 'second' : i[0][1], 'count' : i[1]} for i in ct.most_common()]).to_csv('kyoki.csv', index=False, encoding="utf_8_sig")

time.sleep(1)

#Ausführung der Verarbeitung
got_net = kyoki_word_network()
got_net.show("kyoki.html")

Ergebnis

Ich denke, dass kyoki.html ausgegeben wird, also starten Sie es bitte mit einem Browser.

das Ende.

[Analyse des gemeinsamen Auftretens] Einfache Analyse des gemeinsamen Auftretens mit Python! [Python]

Analyse des gleichzeitigen Auftretens

Installieren Sie, was Sie brauchen

Code

Ergebnis