[PYTHON] Detect English grammar errors with link-grammar

How to find English grammar mistakes with link-grammar

Easy to use link-grammar

link-grammar-test.py


from linkgrammar import Sentence, ParseOptions, Dictionary, Clinkgrammar as clg

po = ParseOptions(verbosity=1)

def desc(lkg):
    print(lkg.unused_word_cost())
    print(lkg.disjunct_cost())
    ws=[w for w in lkg.words()]
    print(ws)
    #print lkg.word(1),lkg.word(0)
    print [l for l in lkg.links()]
    
po = ParseOptions(min_null_count=0, max_null_count=999)

# English is the default language
en_dir = Dictionary() # open the dictionary only once

text='This are an pen.'

sent = Sentence(text, en_dir, po)
linkages = sent.parse()

if sent.null_count() > 0 :
    print('grammar error')
for linkage in linkages:
    desc(linkage)

This output is

output.txt


grammar error
2
0.05
['LEFT-WALL', '[this]', '[are]', 'an', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: an-Ds**v-Ds-pen.n, Link: .-RW-RIGHT-WALL]
2
0.05
['LEFT-WALL', 'this.d', '[are]', '[an]', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: this.d-D*u-Ds-pen.n, Link: .-RW-RIGHT-WALL]
2
0.25
['LEFT-WALL', '[this]', '[are]', 'an', 'pen.n', '.', 'RIGHT-WALL']
[Link: LEFT-WALL-Xp-., Link: LEFT-WALL-Wa-pen.n, Link: an-Ds**x-Ds-pen.n, Link: .-RW-RIGHT-WALL]

If there is a grammatical error null_count>0 And the wrong part is It is displayed in lkg.words surrounded by [].

In python, [How to find English grammar mistakes other than link-grammar](http://stackoverflow.com/questions/10252448/how-to-check-whether-a-sentence-is-correct-simple-grammar-check -in-python)

Use link-grammar English grammar check from Emacs

~/bin/ Save this program to etc.

grammarlinkengfile.py


import os.path
import sys

from linkgrammar import Sentence, ParseOptions, Dictionary, Clinkgrammar as clg
import re

def is_no_link_ward(w):
    """
    >>> is_no_link_ward("[1234]")
    False
    >>> is_no_link_ward("[aBcD1234]")
    'aBcD1234'
    """
    l=re.findall(r"\[\d+\]",w)
    if len(l)==1 and l[0]==w:
        return False
    else:
        l=re.findall(r"\[[^\[\]]+\]",w)
        if len(l)==1 and l[0]==w:
            return l[0][1:-1]
        else:
            return False

def text_words2col_begin_end(text,words):
    """
    >>> text='This is a the test of bfgiuing and xxxvfrg'
    >>> words=['LEFT-WALL', 'this.p', 'is.v', '[a]', 'the', 'test.n', 'of', 'bfgiuing[!].g', 'and.j-n', 'xxxvfrg[?].n', 'RIGHT-WALL']
    >>> text_words2col_begin_end(text,words)
    [[0, 4], [5, 7], [8, 9], [10, 13], [14, 18], [19, 21], [22, 30], [31, 34], [35, 42]]
    """
    #print(len(text))
    t=text.lower()
    j=0
    jlis=[]
    for k,w in enumerate(words[1:-1]):
        j0=j
        w2=is_no_link_ward(w)
        #print(j,k,w,w2,t)
        if not(w2):
            w3=w
        else:
            w3=w2
        for c in w3.lower():
            if len(t)==0:
                break
            if t[0]==c:
                t=t[1:]
                j+=1
            else:
                break
        j1=j
        jlis.append([j0,j1])
        while len(t)>0 and re.match(r"\s+",t) is not None:
            t=t[1:]
            j+=1
        #print(t)
    return jlis

def desc(lkg):
    print(lkg.unused_word_cost())
    print(lkg.disjunct_cost())
    ws=[w for w in lkg.words()]
    print(ws)
    #print lkg.word(1),lkg.word(0)
    print [l for l in lkg.links()]


def text_error_cols(text):
    """
    >>> text='This is a the test of bfgiuing and xxxvfrg'
    >>> text_error_cols(text)
    [[8, 9], [10, 13]]
    """ 
    po = ParseOptions(min_null_count=0, max_null_count=999)
    en_dir = Dictionary() # open the dictionary only once
    sent = Sentence(text, en_dir, po)
    linkages = sent.parse()
    if sent.null_count() == 0 :
        return []
    else:
        error_cols=[]
        iws=[]
        for lkg in linkages:
            words=[w for w in lkg.words()]
            #desc(lkg)
            for k,w in enumerate(words):
                if is_no_link_ward(w):
                    if k in iws:
                        break
                    else:
                        iws.append(k)
                        js=text_words2col_begin_end(text,words)
                        error_cols.append(js[k-1])
        # print(iws)
        # print(error_cols)
        # for col in error_cols:
        #     print(text[col[0]:col[1]])
        return error_cols



if __name__ == '__main__':
    #import doctest
    #doctest.testmod()
    #print(sys.argv)
    fname = sys.argv[1]
    with open(fname) as f:
        texts = f.readlines()
        for i,text in enumerate(texts):
            #print(text)
            if len(text)>0:
                cols=text_error_cols(text)
                #if len(cols)> 0:
                for col in cols:
                    print(fname+":"+str(i+1)+":"+str(col[0]+1)+": linkerror")

Call grammarlinkengfile.py with the following elisp

grammarlinkengfile.el


(require 'flycheck)

(flycheck-define-checker link-grammar
  "link-grammar check"
  :command ("python" "/path/to/grammarlinkengfile.py" source)
  :error-patterns
  ((warning line-start (file-name) ":" line ":" column ": " (message) line-end))
  :modes (text-mode markdown-mode))

(add-to-list 'flycheck-checkers 'link-grammar)

(add-hook 'markdown-mode-hook 'flycheck-mode)

(load "/path/to/grammarlinkengfile") Then open the appropriate .md file and the syntax will be checked.

Recommended Posts

Detect English grammar errors with link-grammar
Detect stoop with OpenCV
Get started with Python! ~ ② Grammar ~