98 lines
3.7 KiB
Python
98 lines
3.7 KiB
Python
import json
|
|
import logging
|
|
import re
|
|
import logging as logger
|
|
|
|
import execjs
|
|
import requests
|
|
import selenium
|
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
from selenium.webdriver.firefox.options import Options
|
|
|
|
|
|
class BaiduTranslator:
|
|
def __init__(self):
|
|
driver_options = Options()
|
|
driver_options.add_argument('-headless')
|
|
self.browser = selenium.webdriver.remote.webdriver.WebDriver(command_executor="http://39.101.194.63:28050",
|
|
desired_capabilities=DesiredCapabilities.EDGE,
|
|
options=driver_options)
|
|
logging.info('Starting browser session...')
|
|
self.browser.get('https://fanyi.baidu.com/translate')
|
|
logger.info('Browser session started')
|
|
self.trans_str = ''
|
|
self.trans_url = "https://fanyi.baidu.com/v2transapi"
|
|
self.dict_cookies = self.browser.get_cookies()
|
|
self.cookies_str = ''
|
|
for item in self.dict_cookies:
|
|
self.cookies_str += ('%s=%s; ' % (item['name'], item['value']))
|
|
logger.info('Set cookies as %s' % self.cookies_str)
|
|
self.headers = {
|
|
"user-agent":
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
|
|
"Cookie": self.cookies_str
|
|
}
|
|
self.pattern = re.compile(
|
|
r"window\['common'\]\W*?=\W*?{\W*?.*?token.*?:.*?'(\w+)',")
|
|
self.pattern_gtk = re.compile(r'window.gtk\W*?=\W*?"(.*?)"')
|
|
self.token, self.gtk = self.get_token_or_gtk()
|
|
logger.info('Set token as %s' % self.token)
|
|
logger.info('Set gtk as %s' % self.gtk)
|
|
logger.info('Baidu translate api started success')
|
|
|
|
def parse_url(self, data, url="https://fanyi.baidu.com/langdetect"):
|
|
response = requests.post(url, data=data, headers=self.headers)
|
|
return json.loads(response.content.decode())
|
|
|
|
def get_token_or_gtk(self, url="https://fanyi.baidu.com/translate"):
|
|
# response = requests.get(url, headers=self.headers)
|
|
# page_str = response.content.decode()
|
|
self.browser.get(url)
|
|
page_str = self.browser.find_element_by_xpath("//*").get_attribute("outerHTML")
|
|
token = self.pattern.search(page_str).group(1)
|
|
gtk = self.pattern_gtk.search(page_str).group(1)
|
|
return token, gtk
|
|
|
|
def get_sign(self, gtk):
|
|
with open("./gen.js", 'r') as f:
|
|
js_code = f.read()
|
|
ctx = execjs.compile(js_code)
|
|
return ctx.call("e", self.trans_str, gtk)
|
|
|
|
def run(self, trans_str):
|
|
self.trans_str = trans_str
|
|
lang_detect_data = {"query": self.trans_str}
|
|
try:
|
|
lang = self.parse_url(lang_detect_data)["lan"]
|
|
except KeyError:
|
|
lang = 'en'
|
|
trans_data = {
|
|
"query": self.trans_str,
|
|
"from": "zh",
|
|
"to": "en"
|
|
} if lang == "zh" else {
|
|
"query": self.trans_str,
|
|
"from": "en",
|
|
"to": "zh"
|
|
}
|
|
|
|
sign = self.get_sign(self.gtk)
|
|
trans_data.update({
|
|
"sign": sign,
|
|
"token": self.token,
|
|
"transtype": "translang",
|
|
"simple_means_flag": 3
|
|
})
|
|
dict_response = self.parse_url(trans_data, self.trans_url)
|
|
try:
|
|
# print(dict_response) # 原始结果
|
|
result_sentence = ''
|
|
for d in dict_response['trans_result']['data']:
|
|
result_sentence += d['dst']
|
|
print(result_sentence)
|
|
except Exception as e:
|
|
print('[ERROR {}] {}'.format(repr(e), json.dumps(dict_response, ensure_ascii=False)))
|
|
|
|
def end_session(self):
|
|
self.browser.quit()
|