[PYTHON] NLP100: Chapter 1 Preparatory Movement

100 language processing knocks
I don't have much time, so there are many suitable places.
# coding:utf-8
#00 Reverse order of strings
string = "stressed"
print string[::-1]
print "".join(reversed(string))


#01 "Patatokukashi"
string = u"Patatoku Kashii"
print string[0::2]
print string[1::2]


#02 "Police car" + "Taxi" = "Patatokukashi"
string1 = u"Police car"
string2 = u"taxi"
ret = ""
for s1, s2 in zip(string1, string2):
    ret+=(s1+s2)

print ret


#03 Pi
string = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
string.replace(", ", "").replace(".", "")
words = string.split(" ")
print "".join(map(lambda x: str(len(x)), words))


#04 element symbol
string = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
words = string.split(" ")
n = [1, 5, 6, 7, 8, 9, 15, 16, 19]
d = {}
for idx, word in enumerate(words):
    if idx+1 in n:
        d[idx+1] = word[0]
    else:
        d[idx+1] = word[0:2]

print d


# 05 n-gram
def ngram(seq, n=2):
    ret = []
    N = len(seq)-n+1
    for i in xrange(N):
        ret.append(seq[i:i+n])

    return ret

seq1 = "I am an NLPer"
seq2 = seq1.split(" ")

print ngram(seq1)
print ngram(seq2)


#06 Set
seq1 = "paraparaparadise"
seq2 = "paragraph"
X = set(ngram(seq1))
Y = set(ngram(seq2))
print "X: %s"%X
print "Y: %s"%Y
print "X+Y: %s"%(X|Y)
print "X-Y: %s"%(X-Y)
print "X&Y: %s"%(X&Y)


#07 Sentence generation by template
def template(x=12, y="temperature", z=22.4):
    return "%s time%s is%s"%(x, y, z)

print template()


#08 Ciphertext
string = u"Implement the function cipher that converts each character of the given character string according to the following specifications."
def cipher(string):
    l = "abcdefghijklmnopqrstuvwxyz"
    ret = ""
    for s in string:
        if s in l:
            s = chr(219-ord(s))

        ret += s

    return ret

print cipher(string)# encode
print cipher(cipher(string))# decode


# 09 Typoglycemia
import random

def typoglycemia(string):
    ret = []
    for word in string.split(" "):
        if len(word) <= 4:
            ret.append(word)

        else:
            typo = word[0] + "".join(random.sample(word[1:-1], len(word[1:-1]))) + word[-1]
            ret.append(typo)

    return " ".join(ret)

string = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind."
print typoglycemia(string)