Python语言:
1 import urllib,urllib2
2
3 values={'hl':'zh-CN','ie':'utf8','text':text,'langpair':"en|zh-CN"}
4 url='http://translate.google.cn/translate_t'
5 data = urllib.urlencode(values)
6 req = urllib2.Request(url, data)
7 req.add_header('User-Agent', "Mozilla/5.0+(compatible;+Googlebot/2.1;http://www.google.com/bot.html)")
8 response = urllib2.urlopen(req)
上面最关键是text这个变量值为想翻译句子后面langpair值是语言对这里是英文翻译成简体中文可以自由改动下面就要实现个类来取出我们想要翻译结果这个类要从SGMLParser派生出来SGMLParser是在sgmllib.py中包含
Python语言:
01 from sgmllib import SGMLParser
02
03 URLLister(SGMLParser):
04 def re(self):
05 SGMLParser.re(self)
06 self.result =
07 self.open = False
08 def start_div(self, attrs):
09 id = [v for k, v in attrs k'id']
10 'result_box' in id:
11 self.open = True
12 def handle_data(self, text):
13 self.open:
14 self.result.append(text)
15 self.open = False
当feed思路方法时就会寻找开始标记为div片段当找到时它会个自身内部思路方法其实最终也就是到start_div跟handle_data这两个思路方法来找出我们想要翻译结果具体就不说了下面是完整代码:
Python语言:
01 import urllib,urllib2
02 from sgmllib import SGMLParser
03
04 URLLister(SGMLParser):
05 def re(self):
06 SGMLParser.re(self)
07 self.result =
08 self.open = False
09 def start_div(self, attrs):
10 id = [v for k, v in attrs k'id']
11 'result_box' in id:
12 self.open = True
13 def handle_data(self, text):
14 self.open:
15 self.result.append(text)
16 self.open = False
17
18 while True:
19 text = raw_input("请输入要翻译英文(退出输入q):")
20 text'q':
21 ;
22 values={'hl':'zh-CN','ie':'utf8','text':text,'langpair':"en|zh-CN"}
23 url='http://translate.google.cn/translate_t'
24 data = urllib.urlencode(values)
25 req = urllib2.Request(url, data)
26 req.add_header('User-Agent', "Mozilla/5.0+(compatible;+Googlebot/2.1;http://www.google.com/bot.html)")
27 response = urllib2.urlopen(req)
28 parser = URLLister
29 parser.feed(response.read)
30 parser.close
31 pr "翻译结果:"
32 for i in parser.result:
33 i = unicode(i,'utf-8').encode('gbk');
34 pr i
查看原图(大图)
最新评论