Google translation script
In my projects, I have often tons of one-liners to translate. The best translation service I found yet, is, well, Google translation. But switching to the browser every next second becomes annoying rather fast. There does not seems to be any script automating this, so, I made a quick one, maybe it’ll help some people :p
Example
./tr.py de en hallo welt
Hello World
Code
#!/usr/bin/python
import sys, re
from htmlentitydefs import name2codepoint
from urllib import urlencode, FancyURLopener
from BeautifulSoup import BeautifulSoup
"""
Copyright (c) 2008 kang@insecure.ws
This program interact with a web translation to translate data in many languages
It is distributed under the terms of the GPL version 3.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
"""
class URLOpener(FancyURLopener):
"""Feel free to change the user-agent.
This is necessary otherwise google will refuse to serve us"""
version = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080404 Iceweasel/2.0.0.14"
url = 'http://translate.google.de/translate_t'
post = { 'ie': 'UTF8',
'text': 'hallo welt'
}
get = {
'sl': 'de',
'tl': 'en'
}
def translate(text, sl="de", tl="en"):
urlopen = URLOpener()
post['text'] = text
get['sl'] = sl
get['tl'] = tl
page = urlopen.open(url+"?"+urlencode(get), urlencode(post))
soup = BeautifulSoup(page.read())
print htmldecode(soup.find('div', id='result_box').string)
def list2str(mylist):
"""Transform a list into a string without squarre brackets"""
r = ""
for i in mylist:
r += i + " "
return r
def htmldecode(text):
"""Decode HTML entities in the given text."""
if type(text) is unicode:
uchr = unichr
else:
uchr = lambda value: value > 255 and unichr(value) or chr(value)
def entitydecode(match, uchr=uchr):
entity = match.group(1)
if entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))
elif entity in name2codepoint:
return uchr(name2codepoint[entity])
else:
return match.group(0)
charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
return charrefpat.sub(entitydecode, text)
#In-line translation
if len(sys.argv) >= 4:
translate(list2str(sys.argv[3:]), sys.argv[1], sys.argv[2])
sys.exit()
#Stdin translation
elif len(sys.argv) == 3 :
sl = sys.argv[1]
tl = sys.argv[2]
else:
print "USAGE: "+sys.argv[0]+" [text]"
print "\tsl: source language"
print "\ttl: translate to language"
print ""
print "If no text is provided, will use stdin (abort with ^D or EOF)"
print "Example:"
print sys.argv[0]+" de en hallo welt"
sys.exit()
while 1:
try:
text = raw_input("Text: ")
except EOFError:
print ""
sys.exit()
translate(text, sl, tl)
Comments