import json import logging import re import logging as logger import execjs import requests import selenium from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.firefox.options import Options class BaiduTranslator: def __init__(self): driver_options = Options() driver_options.add_argument('-headless') self.browser = selenium.webdriver.remote.webdriver.WebDriver(command_executor="http://39.101.194.63:28050", desired_capabilities=DesiredCapabilities.EDGE, options=driver_options) logging.info('Starting browser session...') self.browser.get('https://fanyi.baidu.com/translate') logger.info('Browser session started') self.trans_str = '' self.trans_url = "https://fanyi.baidu.com/v2transapi" self.dict_cookies = self.browser.get_cookies() self.cookies_str = '' for item in self.dict_cookies: self.cookies_str += ('%s=%s; ' % (item['name'], item['value'])) logger.info('Set cookies as %s' % self.cookies_str) self.headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", "Cookie": self.cookies_str } self.pattern = re.compile( r"window\['common'\]\W*?=\W*?{\W*?.*?token.*?:.*?'(\w+)',") self.pattern_gtk = re.compile(r'window.gtk\W*?=\W*?"(.*?)"') self.token, self.gtk = self.get_token_or_gtk() logger.info('Set token as %s' % self.token) logger.info('Set gtk as %s' % self.gtk) logger.info('Baidu translate api started success') def parse_url(self, data, url="https://fanyi.baidu.com/langdetect"): response = requests.post(url, data=data, headers=self.headers) return json.loads(response.content.decode()) def get_token_or_gtk(self, url="https://fanyi.baidu.com/translate"): # response = requests.get(url, headers=self.headers) # page_str = response.content.decode() self.browser.get(url) page_str = self.browser.find_element_by_xpath("//*").get_attribute("outerHTML") token = self.pattern.search(page_str).group(1) gtk = self.pattern_gtk.search(page_str).group(1) return token, gtk def get_sign(self, gtk): with open("./gen.js", 'r') as f: js_code = f.read() ctx = execjs.compile(js_code) return ctx.call("e", self.trans_str, gtk) def run(self, trans_str): self.trans_str = trans_str lang_detect_data = {"query": self.trans_str} try: lang = self.parse_url(lang_detect_data)["lan"] except KeyError: lang = 'en' trans_data = { "query": self.trans_str, "from": "zh", "to": "en" } if lang == "zh" else { "query": self.trans_str, "from": "en", "to": "zh" } sign = self.get_sign(self.gtk) trans_data.update({ "sign": sign, "token": self.token, "transtype": "translang", "simple_means_flag": 3 }) dict_response = self.parse_url(trans_data, self.trans_url) try: # print(dict_response) # 原始结果 result_sentence = '' for d in dict_response['trans_result']['data']: result_sentence += d['dst'] print(result_sentence) except Exception as e: print('[ERROR {}] {}'.format(repr(e), json.dumps(dict_response, ensure_ascii=False))) def end_session(self): self.browser.quit()