[PYTHON] 100 language processing knocks (2020): 00-09

Chapter 1: Preparatory movement

00. Reverse order of strings

s = "stressed"
print(s[::-1])

01. "Patatokukashi"


"""
01."Patatoku Cassie"
1 of the character string "Patatokukashi",3,5,Take out the 7th character and get the concatenated string.
"""
s = "Patatoku Kashii"
print(s[::2])
#Police car

02. "Police car" + "Taxi" = "Patatokukashi"

from functools import reduce

s1 = "Police car"
s2 = "taxi"

# method 1
res = [s1[i] + s2[i] for i in range(len(s1))]
print("".join(res))
#Patatoku Kashii

# method 2
res = [a + b for a, b in zip(s1, s2)]
print("".join(res))

# method 3
res = reduce(lambda a, b: a + b, zip(s1, s2))
print(res)


# reduce
def do_sum(x1, x2):
    return x1 + x2


print(reduce(do_sum, [1, 2, 3, 4]))  # 10
# (((1 + 2) + 3) + 4) => 10

03. Pi

"""
03.Pi
“Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.”
Break down the sentence into words, and create a list in which the number of characters (in the alphabet) of each word is arranged in order of appearance from the beginning.
"""
s = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
for c in [",", "."]:
    s = s.replace(c, "")
words = s.split(" ")

print([len(word) for word in words])
# [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]

04. Element symbol

"""
04.Element symbol
“Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.”
Is decomposed into words, 1, 5, 6, 7, 8, 9, 15, 16,The 19th word takes the first letter, and the other words take the first two letters.
Create an associative array (dictionary type or map type) from the extracted character string to the position of the word (the number of the word from the beginning).
"""

# Get words
s = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
for c in [",", "."]:
    s = s.replace(c, "")
words = s.split(" ")

# Make index
one_character_pos = [x - 1 for x in (1, 5, 6, 7, 8, 9, 15, 16, 19)]
index = {}
for i, word in enumerate(words):
    if i in one_character_pos:
        index[word[0]] = i
    else:
        index[word[:2]] = i
print(index)
# {'H': 0, 'He': 1, 'Li': 2, 'Be': 3, 'B': 4, 'C': 5, 'N': 6, 'O': 7, 'F': 8, 'Ne': 9, 'Na': 10, 'Mi': 11, 'Al': 12, 'Si': 13, 'P': 14, 'S': 15, 'Cl': 16, 'Ar': 17, 'K': 18, 'Ca': 19}

  1. n-gram
"""
05. n-gram
From a given sequence (string, list, etc.) n-Create a function that creates a gram.
Using this function, the word bi from the sentence "I am an NLPer"-gram, character bi-Get the gram.
"""
from typing import List, Sequence


def word_bigram(words: List[str]):
    return [words[i : i + 2] for i in range(len(words) - 1)]


def char_bigram(chars: str):
    return [chars[i : i + 2] for i in range(len(chars) - 1)]


s = "I am an NLPer"
print(word_bigram(s.split(" ")))
print(char_bigram(s))
# [['I', 'am'], ['am', 'an'], ['an', 'NLPer']]
# ['I ', ' a', 'am', 'm ', ' a', 'an', 'n ', ' N', 'NL', 'LP', 'Pe', 'er']


# Generalization
def n_gram(seq: Sequence, n: int):
    return [seq[i : i + n] for i in range(len(seq) - n + 1)]


print(n_gram(s.split(" "), n=2))
print(n_gram(s, n=2))
# [['I', 'am'], ['am', 'an'], ['an', 'NLPer']]
# ['I ', ' a', 'am', 'm ', ' a', 'an', 'n ', ' N', 'NL', 'LP', 'Pe', 'er']

06. Meeting

"""
06.set
The letters bi contained in “paraparaparadise” and “paragraph”-A set of gram, each,
Find it as X and Y, and find the union, intersection, and complement of X and Y. In addition, a bi called'se'-Find out if gram is included in X and Y.
https://www.javadrive.jp/python/set/index6.html#section1
"""

from typing import Sequence


# Generalization
def n_gram(seq: Sequence, n: int):
    return [seq[i : i + n] for i in range(len(seq) - n + 1)]


x = set(n_gram("paraparaparadise", n=2))
y = set(n_gram("paragraph", n=2))
# x {'ar', 'ra', 'is', 'ap', 'pa', 'di', 'ad', 'se'}
# y {'ar', 'ra', 'is', 'ap', 'pa', 'di', 'ad', 'se'}

union_res = x.union(y)
intersection_res = x.intersection(y)
difference_res = x.difference(y)
print(union_res)
print(intersection_res)
print(difference_res)
# {'ar', 'ad', 'se', 'ph', 'pa', 'is', 'gr', 'di', 'ap', 'ra', 'ag'}
# {'ra', 'ar', 'pa', 'ap'}
# {'se', 'di', 'ad', 'is'}

07. Sentence generation by template

"""
07.Sentence generation by template Permalink
Argument x, y,Implement a function that takes z and returns the string "y at x is z".
In addition, x=12, y="temperature", z=22.As 4, check the execution result.
"""


def generate(x, y, z):
    return f"{x}of time{y}Is{z}"


x = 12
y = "temperature"
z = 22.4

print(generate(x, y, z))
#The temperature at 12:00 is 22.4

08. Ciphertext

"""
08.Cryptogram
Implement the function cipher that converts each character of the given character string according to the following specifications.

If lowercase letters(219 -Character code)Replace with the character
Output other characters as they are
Use this function to encrypt / decrypt English messages.

Method:
ord: Character → ASCII code
chr: ASCII code → character
"""


def cipher(string: str):
    result = ""
    for s in string:
        if s.islower():
            result += chr(219 - ord(s))
        else:
            result += s
    return result


print(cipher("This is a encrypted message"))
print(cipher("Tsrh rh z vmxibkgvw nvhhztv"))
# Tsrh rh z vmxibkgvw nvhhztv
# This is a encrypted message

  1. Typoglycemia
"""
09. Typoglycemia

Create a program that randomly rearranges the order of the other letters, leaving the first and last letters of each word for the word string separated by spaces.
However, words with a length of 4 or less are not rearranged. Appropriate English sentences
(For example, “I could n’t believe that I could actually understand what I was reading”: the phenomenal power of the human mind .”)
And check the execution result.
"""
import random


def generate_typoglycemia(words: list) -> list:
    res = []
    for word in words:
        if len(word) < 4:
            res.append(word)
        else:
            medium = "".join(random.sample(word[1:-1], k=len(word[1:-1])))  # method 1
            # medium = "".join(random.shuffle(list(word[1:-1])))  # method2
            res.append(word[0] + medium + word[-1])
    return res


s = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
res = generate_typoglycemia(s.split())
print(" ".join(res))
# I clo'undt beevile taht I cuold alulctay urtnneadsd waht I was reanidg : the pmheoenanl poewr of the human mind .

Recommended Posts

100 language processing knocks 03 ~ 05
100 language processing knocks (2020): 40
100 language processing knocks (2020): 32
100 language processing knocks (2020): 35
100 language processing knocks (2020): 47
100 language processing knocks (2020): 39
100 language processing knocks (2020): 26
100 language processing knocks (2020): 34
100 language processing knocks (2020): 42
100 language processing knocks (2020): 49
100 language processing knocks 06 ~ 09
100 language processing knocks (2020): 43
100 language processing knocks (2020): 24
100 language processing knocks (2020): 10-19
100 language processing knocks (2020): 30
100 language processing knocks (2020): 00-09
100 language processing knocks (2020): 31
100 language processing knocks (2020): 48
100 language processing knocks (2020): 44
100 language processing knocks (2020): 41
100 language processing knocks (2020): 37
100 language processing knocks (2020): 25
100 language processing knocks (2020): 23
100 language processing knocks (2020): 33
100 language processing knocks (2020): 20
100 language processing knocks (2020): 27
100 language processing knocks (2020): 46
100 language processing knocks (2020): 21
100 language processing knocks (2020): 36
100 amateur language processing knocks: 41
100 amateur language processing knocks: 71
100 amateur language processing knocks: 56
100 amateur language processing knocks: 50
100 amateur language processing knocks: 59
100 amateur language processing knocks: 70
100 amateur language processing knocks: 62
100 amateur language processing knocks: 60
100 amateur language processing knocks: 30
100 amateur language processing knocks: 06
100 amateur language processing knocks: 84
100 amateur language processing knocks: 81
100 amateur language processing knocks: 33
100 amateur language processing knocks: 46
100 amateur language processing knocks: 88
100 amateur language processing knocks: 89
100 amateur language processing knocks: 40
100 amateur language processing knocks: 45
100 amateur language processing knocks: 43
100 amateur language processing knocks: 55
100 amateur language processing knocks: 22
100 amateur language processing knocks: 61
100 amateur language processing knocks: 94
100 amateur language processing knocks: 54
100 amateur language processing knocks: 04
100 amateur language processing knocks: 63
100 amateur language processing knocks: 78
100 amateur language processing knocks: 12
100 amateur language processing knocks: 14
100 amateur language processing knocks: 08
100 amateur language processing knocks: 42
100 language processing knocks ~ Chapter 1