Nowadays, I can't let go of machine translation to read English literature. When this happens, I especially want a tool that automatically translates English PDF files with the touch of a button. However, PDF has footnotes and page numbers mixed in addition to the text, so it is difficult to extract only the necessary text data. I created a tool ** Text_translator ** that manually copies and pastes text data and interactively performs subsequent processing in the GUI.
--GUI created with tkinter --Google Translate API uses third party googletransx --DeepL Translator is obediently issued and used Regular API --The API key of the code below has been deleted from 630 yen + 2,500 yen = 3,130 yen per month. --Use python-docx to create a translation table for Word.
text_translator.py
# -*- coding: utf-8 -*-
import tkinter as tk
import tkinter.scrolledtext as S
from tkinter import messagebox as tkMessageBox
from tkinter import filedialog as tkFileDialog
from googletransx import Translator
from docx import Document
import os
import datetime
import requests
#What happens when button 1 is clicked
def ButtonClick1():
lines = input_box.get('1.0', 'end -1c') #Get the character string entered in the input field
#"" Before line break.」、「."」、「.Replace "" and ":" with dummy characters
words = [".\n",".\"\n",".”\n",":\n"]
for i, word in enumerate(words):
lines = lines.replace(word,"XXX" + str(i))
lines = lines.replace("-\n", "") #"" Before line break-Deleted
lines = lines.replace("\n", " ") #Remove line feed symbol
#Undo the dummy characters and add more blank lines
for i, word in enumerate(words):
lines = lines.replace("XXX" + str(i), word + "\n")
lines = lines.encode('utf-8', "ignore").decode('utf-8') #Delete characters that Python cannot handle.
processed_box.delete('1.0', 'end') #Clear the shaping result column
processed_box.insert('1.0', lines) #Output the formatting result in the formatting result column
#What happens when button 2 is clicked
def ButtonClick2():
lines = get_text()
while True:
if len(lines) >= 5000: #When the number of translated characters is 5000 or more
lines1 = lines[:5000].rsplit('\n\n', 1)[0] #Paragraphs up to 5000 characters
lines2 = lines[:5000].rsplit('\n\n', 1)[1] + lines[5000:] #Remaining paragraph
translator = Translator()
lines1 = translator.translate(lines1, dest='ja').text #Google translate
translate_box.insert('end', lines1 + '\n\n') #Displayed in the translation result column
lines = lines2 #Set the rest of the paragraph
else: #When the number of translated characters is less than 5000
translator = Translator()
lines = translator.translate(lines, dest='ja').text #Google translate
translate_box.insert('end', lines) #Displayed in the translation result column
break
#What happens when button 3 is clicked
def ButtonClick3():
lines = get_text()
#Please enter the KEY issued by yourself here
DEEPL_API_KEY = 'XXXXX'
#Create a dictionary of parameters to be included in the URL query
params = {
"auth_key": DEEPL_API_KEY,
"text": lines,
"target_lang": 'JA' #Set the output text language to English
}
#POST with parameters
request = requests.post("https://api.deepl.com/v2/translate", data=params)
result = request.json()
lines = result["translations"][0]["text"]
translate_box.insert('end', lines) #Displayed in the translation result column
#What happens when button 4 is clicked
def ButtonClick4():
edit_text = processed_box.get('1.0', 'end -1c') #Get the character string entered in the formatting result field
translate_text = translate_box.get('1.0', 'end -1c') #Get the character string entered in the translation result field
fTyp=[('word file',"*.docx")] #Select Word Bilingual Template
iDir='.'
filename=tkFileDialog.askopenfilename(filetypes=fTyp,initialdir=iDir)
document = Document(filename)
for paragraph in document.paragraphs:
paragraph.text = paragraph.text.replace("The original text is given here.",edit_text) #Enter the original text in the Word translation table
paragraph.text = paragraph.text.replace("The translation is given here.",translate_text) #Include the translated text in the Word translation table
paragraphs = (paragraph
for table in document.tables
for row in table.rows
for cell in row.cells
for paragraph in cell.paragraphs)
for paragraph in paragraphs:
paragraph.text = paragraph.text.replace("The original text is given here.",edit_text) #Enter the original text in the Word translation table
paragraph.text = paragraph.text.replace("The translation is given here.",translate_text) #Include the translated text in the Word translation table
dt_now = datetime.datetime.now() #Save Word Comparison Table
dt_str = str(dt_now.hour).zfill(2)+str(dt_now.minute).zfill(2)+str(dt_now.second).zfill(2)
savefilename = filename.replace(u".docx","_replace" + dt_str + ".docx")
document.save(savefilename)
tkMessageBox.showinfo("Creation completed",os.path.basename(savefilename) + "I saved it in.") #Display save result
def get_text():
lines = processed_box.get('1.0', 'end -1c') #Get the character string entered in the formatting result field
#Get the number of characters and display it on the screen
label = tk.Label(root, text = "word count" + str(len(lines)) + " ", font = ("Helvetica",14))
label.place(relx = 0.58, y = 20)
translate_box.delete('1.0', 'end') #Clear the translation result column
return lines
#Main program
root = tk.Tk()
root.geometry("1600x800")
root.title("Text_translator")
#Label settings
label1 = tk.Label(root, text = "Enter text", font = ("Helvetica",14))
label1.place(x = 20, y = 20)
label2 = tk.Label(root, text = "Plastic surgery result", font = ("Helvetica",14))
label2.place(relx = 0.34, y = 20)
label3 = tk.Label(root, text = "Translation result", font = ("Helvetica",14))
label3.place(relx = 0.67, y = 20)
#Button settings
button1 = tk.Button(root, text = "Plastic surgery", font = ("Helvetica",14), command = ButtonClick1)
button1.place(x = 200, y = 15)
button2 = tk.Button(root, text = "Google translate", font = ("Helvetica",14), command = ButtonClick2)
button2.place(relx = 0.42, y = 15)
button3 = tk.Button(root, text = "DeepL Translator", font = ("Helvetica",14), command = ButtonClick3)
button3.place(relx = 0.50, y = 15)
button4 = tk.Button(root, text = "Word translation table creation", font = ("Helvetica",14), command = ButtonClick4)
button4.place(relx = 0.75, y = 15)
#Input box settings
input_box = S.ScrolledText(root, font = ("Helvetica",12))
input_box.place(relheight = 0.89, relwidth = 0.32, relx = 0.01, y = 60)
#Formatting result box settings
processed_box = S.ScrolledText(root, font = ("Helvetica",12))
processed_box.place(relheight = 0.89, relwidth = 0.32, relx = 0.34, y = 60)
#Translation box settings
translate_box = S.ScrolledText(root, font = ("Helvetica",12))
translate_box.place(relheight = 0.89, relwidth = 0.32, relx = 0.67, y = 60)
root.mainloop()
--Copy and paste the text in the left window
--Press the Format
button to remove the extra line breaks in the text and display it in the middle window.
--Manually fix the parts that are not well shaped
--Press Google Translate
or DeepL Translator
and the translation result will be displayed in the right window.
--By pressing Create Word Bilingual Table
and selecting a separately created Word template, the Word Bilingual Table before and after translation will be created.
--In the Word template, write "The original text is written here." And "The translated text is written here." It replaces the original and translated texts, respectively.
Many people have created automated tools for Google Translate, but I don't see many tools that use the GUI. The GUI is easy to understand. ** If you like it, please LGTM. ** **
Recommended Posts