同步本地的更新

This commit is contained in:
yuxin-pc 2026-04-14 09:50:18 +08:00
parent 734cab960b
commit 36fa73bd12
3 changed files with 75 additions and 36 deletions

1
.gitignore vendored
View File

@ -8,6 +8,7 @@ __pycache__/
# Distribution / packaging
.Python
.vscode/
/.venv/
env/
build/
develop-eggs/

View File

@ -16,7 +16,7 @@ from MediaSpiders.utils.http_utils import http_post, UA
chrome_options = Options()
# 指定 chrome.exe 的完整路径
chrome_options.binary_location = r"C:\Users\DELL\Downloads\chrome-win64\chrome.exe"
chrome_options.binary_location = "D:/chrome-win64/chrome.exe"
# chrome_options.use_chromium = True
driver = webdriver.Chrome(
executable_path=r"D:\chromedriver.exe",

View File

@ -19,6 +19,16 @@ from ShipSpiders.utils.http_utils import http_post
from ShipSpiders.utils.time_utils import to_unix_timestamp
def cookie_dict_to_str(dict_cookie):
str_cookie = ""
for line in dict_cookie:
str_cookie += line['name']
str_cookie += "="
str_cookie += line['value']
str_cookie += ";"
return str_cookie[:-1]
class TrackpointsSpider(scrapy.Spider):
name = 'shipxy_track'
settings = get_project_settings()
@ -71,21 +81,22 @@ class TrackpointsSpider(scrapy.Spider):
self.driver.get('https://www.shipxy.com/Home/Login')
time.sleep(2)
logger.info('Logging in with user_id and password...')
pwdbutton = self.driver.find_element_by_xpath("//a[text()='密码登录']")
pwdbutton.click()
pwd_button = self.driver.find_element_by_xpath("//a[text()='密码登录']")
pwd_button.click()
time.sleep(0.5)
userName = self.driver.find_element_by_id('userName')
userPWD = self.driver.find_element_by_id('userPWD')
user_name = self.driver.find_element_by_id('userName')
user_pwd = self.driver.find_element_by_id('userPWD')
button = self.driver.find_element_by_id('loginBtn')
userName.send_keys(self.settings['SHIPXY_LOGIN_ACCOUNT'])
userPWD.send_keys(self.settings['SHIPXY_LOGIN_PASSWD'])
user_name.send_keys(self.settings['SHIPXY_LOGIN_ACCOUNT'])
user_pwd.send_keys(self.settings['SHIPXY_LOGIN_PASSWD'])
button.click()
time.sleep(5)
self.driver.get('https://www.shipxy.com/IHS')
logger.info('Logged in! Updating cookies...')
self.driver.get('https://www.shipxy.com/')
logger.info(f"Logged in as {self.settings['SHIPXY_LOGIN_ACCOUNT']}, Updating cookies...")
except:
pass
self.dict_cookie = self.driver.get_cookies()
logger.info(self.dict_cookie)
# logger.info('Getting normal ship trackpoints...')
# slat, elat = -90 * 1000000, 90 * 1000000
@ -128,41 +139,68 @@ class TrackpointsSpider(scrapy.Spider):
mmsi = sensitive_target['targetValue']
track_api = f'https://www.shipxy.com/Ship/GetTrackAll' \
f'?shipid={mmsi}&btime={btime}&etime={etime}&limit=1&enc=0'
get_md5_databody = {
"shipid": mmsi,
"btime": btime,
"etime": etime,
"limit": "1",
"enc": "0"
}
md5_token = self.get_md5_token_from_webpage(get_md5_databody)
cookie_str = cookie_dict_to_str(self.dict_cookie)
yield scrapy.Request(url=track_api, callback=self.parse_sensitive_ship, cookies=self.dict_cookie,
meta={'mmsi': mmsi})
meta={'mmsi': mmsi},
headers={
# "Cookie": cookie_str,
"S": md5_token['sign'],
"T": md5_token['timestamp']
})
def get_md5_token_from_webpage(self, data):
js_script = f'return window.R0VOQ1NJR04({data});'
result = self.driver.execute_script(js_script)
return result
def decode_track(self, data):
js_script = f'return analyseAisTrack("{data}").data;'
result = self.driver.execute_script(js_script)
return result
def parse_sensitive_ship(self, response):
mmsi = response.meta['mmsi']
rsp_obj = json.loads(response.text)
if rsp_obj['status'] != 0:
# logger.info('[SENSITIVE SHIP] No track data of sensitive ship MMSI: %s' % mmsi)
logger.info('[SENSITIVE SHIP] No track data of sensitive ship MMSI: %s' % mmsi)
logger.info(response.text)
return
data = rsp_obj['data']
tracks = []
track_decode_api = self.settings['TRACK_DECODE_SERVICE']
retry_times = 1
while retry_times <= 3:
try:
decode_data = requests.post(track_decode_api, data=data)
if decode_data.content == b'500': # 解码器返回错误值为 b'500'
logger.warning(
"解析服务错误!重启服务中... 第 %d" % retry_times)
time.sleep(3)
retry_times += 1
continue
else:
tracks = json.loads(decode_data.content)
break
except:
logger.warning(
"解析失败3 秒后重试第 %d 次..." % retry_times)
time.sleep(3)
retry_times += 1
if retry_times > 3:
logger.warning(
"[SENSITIVE SHIP] 数据丢失 MMSI: %s" % mmsi)
return
# logger.info(f">>>>>>>>>>>>> data: {data}")
# tracks = []
# track_decode_api = self.settings['TRACK_DECODE_SERVICE']
# retry_times = 1
# while retry_times <= 3:
# try:
# decode_data = requests.post(track_decode_api, data=data)
# if decode_data.content == b'500': # 解码器返回错误值为 b'500'
# logger.warning(
# "解析服务错误!重启服务中... 第 %d 次" % retry_times)
# time.sleep(3)
# retry_times += 1
# continue
# else:
# tracks = json.loads(decode_data.content)
# break
# except:
# logger.warning(
# "解析失败3 秒后重试第 %d 次..." % retry_times)
# time.sleep(3)
# retry_times += 1
#
# if retry_times > 3:
# logger.warning(
# "[SENSITIVE SHIP] 数据丢失 MMSI: %s" % mmsi)
# return
tracks = self.decode_track(data)
logger.info('[SENSITIVE SHIP] MMSI %s%d 个轨迹点 ' % (mmsi, len(tracks)))
for track in tracks:
last_time = track['utc'] * 1000