[PYTHON] NLP100: Kapitel 1 Vorbereitungsbewegung

100 Sprachverarbeitungsklopfen
Ich habe keine Zeit, daher gibt es viele geeignete Orte.
# coding:utf-8
#00 Reihenfolge der Zeichenfolgen in umgekehrter Reihenfolge
string = "stressed"
print string[::-1]
print "".join(reversed(string))


#01 "Patatokukashi"
string = u"Patatoku Kashii"
print string[0::2]
print string[1::2]


#02 "Pat car" + "Tax" = "Patatokukasie"
string1 = u"Pat Auto"
string2 = u"Taxi"
ret = ""
for s1, s2 in zip(string1, string2):
    ret+=(s1+s2)

print ret


#03 Umfangsrate
string = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
string.replace(", ", "").replace(".", "")
words = string.split(" ")
print "".join(map(lambda x: str(len(x)), words))


#04 Elementsymbol
string = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
words = string.split(" ")
n = [1, 5, 6, 7, 8, 9, 15, 16, 19]
d = {}
for idx, word in enumerate(words):
    if idx+1 in n:
        d[idx+1] = word[0]
    else:
        d[idx+1] = word[0:2]

print d


# 05 n-gram
def ngram(seq, n=2):
    ret = []
    N = len(seq)-n+1
    for i in xrange(N):
        ret.append(seq[i:i+n])

    return ret

seq1 = "I am an NLPer"
seq2 = seq1.split(" ")

print ngram(seq1)
print ngram(seq2)


#06 eingestellt
seq1 = "paraparaparadise"
seq2 = "paragraph"
X = set(ngram(seq1))
Y = set(ngram(seq2))
print "X: %s"%X
print "Y: %s"%Y
print "X+Y: %s"%(X|Y)
print "X-Y: %s"%(X-Y)
print "X&Y: %s"%(X&Y)


#07 Satzerstellung nach Vorlage
def template(x=12, y="Temperatur", z=22.4):
    return "%s Zeit%s ist%s"%(x, y, z)

print template()


#08 Kryptographie
string = u"Implementieren Sie die Funktionsverschlüsselung, die jedes Zeichen der angegebenen Zeichenfolge gemäß den folgenden Spezifikationen konvertiert."
def cipher(string):
    l = "abcdefghijklmnopqrstuvwxyz"
    ret = ""
    for s in string:
        if s in l:
            s = chr(219-ord(s))

        ret += s

    return ret

print cipher(string)# encode
print cipher(cipher(string))# decode


# 09 Typoglycemia
import random

def typoglycemia(string):
    ret = []
    for word in string.split(" "):
        if len(word) <= 4:
            ret.append(word)

        else:
            typo = word[0] + "".join(random.sample(word[1:-1], len(word[1:-1]))) + word[-1]
            ret.append(typo)

    return " ".join(ret)

string = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind."
print typoglycemia(string)