From 36fa73bd12835ae62327b62329e4b41b260a4706 Mon Sep 17 00:00:00 2001 From: yuxin-pc Date: Tue, 14 Apr 2026 09:50:18 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=8C=E6=AD=A5=E6=9C=AC=E5=9C=B0=E7=9A=84?= =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + .../utils/wechat_links_fetcher.py | 2 +- .../ShipSpiders/spiders/shipxy_track.py | 108 ++++++++++++------ 3 files changed, 75 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index df21a1e..3bd4c62 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ # Distribution / packaging .Python .vscode/ +/.venv/ env/ build/ develop-eggs/ diff --git a/spiders/MediaSpiders/MediaSpiders/utils/wechat_links_fetcher.py b/spiders/MediaSpiders/MediaSpiders/utils/wechat_links_fetcher.py index 82eb7ce..fe9259c 100644 --- a/spiders/MediaSpiders/MediaSpiders/utils/wechat_links_fetcher.py +++ b/spiders/MediaSpiders/MediaSpiders/utils/wechat_links_fetcher.py @@ -16,7 +16,7 @@ from MediaSpiders.utils.http_utils import http_post, UA chrome_options = Options() # 指定 chrome.exe 的完整路径 -chrome_options.binary_location = r"C:\Users\DELL\Downloads\chrome-win64\chrome.exe" +chrome_options.binary_location = "D:/chrome-win64/chrome.exe" # chrome_options.use_chromium = True driver = webdriver.Chrome( executable_path=r"D:\chromedriver.exe", diff --git a/spiders/ShipSpiders/ShipSpiders/spiders/shipxy_track.py b/spiders/ShipSpiders/ShipSpiders/spiders/shipxy_track.py index 7447924..c9c4da6 100644 --- a/spiders/ShipSpiders/ShipSpiders/spiders/shipxy_track.py +++ b/spiders/ShipSpiders/ShipSpiders/spiders/shipxy_track.py @@ -19,6 +19,16 @@ from ShipSpiders.utils.http_utils import http_post from ShipSpiders.utils.time_utils import to_unix_timestamp +def cookie_dict_to_str(dict_cookie): + str_cookie = "" + for line in dict_cookie: + str_cookie += line['name'] + str_cookie += "=" + str_cookie += line['value'] + str_cookie += ";" + return str_cookie[:-1] + + class TrackpointsSpider(scrapy.Spider): name = 'shipxy_track' settings = get_project_settings() @@ -71,21 +81,22 @@ class TrackpointsSpider(scrapy.Spider): self.driver.get('https://www.shipxy.com/Home/Login') time.sleep(2) logger.info('Logging in with user_id and password...') - pwdbutton = self.driver.find_element_by_xpath("//a[text()='密码登录']") - pwdbutton.click() + pwd_button = self.driver.find_element_by_xpath("//a[text()='密码登录']") + pwd_button.click() time.sleep(0.5) - userName = self.driver.find_element_by_id('userName') - userPWD = self.driver.find_element_by_id('userPWD') + user_name = self.driver.find_element_by_id('userName') + user_pwd = self.driver.find_element_by_id('userPWD') button = self.driver.find_element_by_id('loginBtn') - userName.send_keys(self.settings['SHIPXY_LOGIN_ACCOUNT']) - userPWD.send_keys(self.settings['SHIPXY_LOGIN_PASSWD']) + user_name.send_keys(self.settings['SHIPXY_LOGIN_ACCOUNT']) + user_pwd.send_keys(self.settings['SHIPXY_LOGIN_PASSWD']) button.click() time.sleep(5) - self.driver.get('https://www.shipxy.com/IHS') - logger.info('Logged in! Updating cookies...') + self.driver.get('https://www.shipxy.com/') + logger.info(f"Logged in as {self.settings['SHIPXY_LOGIN_ACCOUNT']}, Updating cookies...") except: pass self.dict_cookie = self.driver.get_cookies() + logger.info(self.dict_cookie) # logger.info('Getting normal ship trackpoints...') # slat, elat = -90 * 1000000, 90 * 1000000 @@ -128,41 +139,68 @@ class TrackpointsSpider(scrapy.Spider): mmsi = sensitive_target['targetValue'] track_api = f'https://www.shipxy.com/Ship/GetTrackAll' \ f'?shipid={mmsi}&btime={btime}&etime={etime}&limit=1&enc=0' + get_md5_databody = { + "shipid": mmsi, + "btime": btime, + "etime": etime, + "limit": "1", + "enc": "0" + } + md5_token = self.get_md5_token_from_webpage(get_md5_databody) + cookie_str = cookie_dict_to_str(self.dict_cookie) yield scrapy.Request(url=track_api, callback=self.parse_sensitive_ship, cookies=self.dict_cookie, - meta={'mmsi': mmsi}) + meta={'mmsi': mmsi}, + headers={ + # "Cookie": cookie_str, + "S": md5_token['sign'], + "T": md5_token['timestamp'] + }) + + def get_md5_token_from_webpage(self, data): + js_script = f'return window.R0VOQ1NJR04({data});' + result = self.driver.execute_script(js_script) + return result + + def decode_track(self, data): + js_script = f'return analyseAisTrack("{data}").data;' + result = self.driver.execute_script(js_script) + return result def parse_sensitive_ship(self, response): mmsi = response.meta['mmsi'] rsp_obj = json.loads(response.text) if rsp_obj['status'] != 0: - # logger.info('[SENSITIVE SHIP] No track data of sensitive ship MMSI: %s' % mmsi) + logger.info('[SENSITIVE SHIP] No track data of sensitive ship MMSI: %s' % mmsi) + logger.info(response.text) return data = rsp_obj['data'] - tracks = [] - track_decode_api = self.settings['TRACK_DECODE_SERVICE'] - retry_times = 1 - while retry_times <= 3: - try: - decode_data = requests.post(track_decode_api, data=data) - if decode_data.content == b'500': # 解码器返回错误值为 b'500' - logger.warning( - "解析服务错误!重启服务中... 第 %d 次" % retry_times) - time.sleep(3) - retry_times += 1 - continue - else: - tracks = json.loads(decode_data.content) - break - except: - logger.warning( - "解析失败!3 秒后重试第 %d 次..." % retry_times) - time.sleep(3) - retry_times += 1 - - if retry_times > 3: - logger.warning( - "[SENSITIVE SHIP] 数据丢失 MMSI: %s" % mmsi) - return + # logger.info(f">>>>>>>>>>>>> data: {data}") + # tracks = [] + # track_decode_api = self.settings['TRACK_DECODE_SERVICE'] + # retry_times = 1 + # while retry_times <= 3: + # try: + # decode_data = requests.post(track_decode_api, data=data) + # if decode_data.content == b'500': # 解码器返回错误值为 b'500' + # logger.warning( + # "解析服务错误!重启服务中... 第 %d 次" % retry_times) + # time.sleep(3) + # retry_times += 1 + # continue + # else: + # tracks = json.loads(decode_data.content) + # break + # except: + # logger.warning( + # "解析失败!3 秒后重试第 %d 次..." % retry_times) + # time.sleep(3) + # retry_times += 1 + # + # if retry_times > 3: + # logger.warning( + # "[SENSITIVE SHIP] 数据丢失 MMSI: %s" % mmsi) + # return + tracks = self.decode_track(data) logger.info('[SENSITIVE SHIP] MMSI %s 共 %d 个轨迹点 ' % (mmsi, len(tracks))) for track in tracks: last_time = track['utc'] * 1000