import mysql.connector import base64 import urllib.parse import re # === 数据库配置 === DB_CONFIG = { 'host': '47.113.231.200', 'port': 28089, 'user': 'root', 'password': 'passok123A', 'database': 'dsp', 'charset': 'utf8mb4', } def decode_rodong_url(url): """ 从朝鲜劳动新闻URL中提取并Base64解码参数部分 示例输入: http://www.rodong.rep.kp/cn/index.php?MTJAMjAyNi0wMS0wNS0wMDJAMUAxQEAwQDNA== 输出: '12@2026-01-05-002@1@1@@0@37@' 或 None(若无法解析) """ if not url or 'index.php?' not in url: return None try: # 方法1:使用 urllib.parse 解析 parsed = urllib.parse.urlparse(url) query = parsed.query # 如果 query 为空,尝试用正则兜底(应对非常规URL) if not query: match = re.search(r'index\.php\?([A-Za-z0-9+/=]+)', url) if match: query = match.group(1) else: return None # Base64 解码 decoded_bytes = base64.b64decode(query) decoded_str = decoded_bytes.decode('utf-8') return decoded_str except Exception as e: # 记录错误但不中断整体流程 print(f" 解码失败 (URL: {url[:60]}...): {e}") return None def main(): try: # 连接数据库 conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor(buffered=True) # 查询所有需要处理的记录(只处理包含 index.php? 的 URL) print("正在查询待处理的新闻记录...") cursor.execute(""" SELECT es_sid, es_urlname FROM indeximos WHERE es_sitename = '劳动新闻' AND (es_tags IS NULL OR es_tags = '') """) records = cursor.fetchall() if not records: print("没有找到需要处理的记录。") return print(f"共找到 {len(records)} 条待处理记录。") updated_count = 0 for i, (es_sid, es_urlname) in enumerate(records, 1): print(f"[{i}/{len(records)}] 处理 ID={es_sid} ...", end=" ") decoded = decode_rodong_url(es_urlname) if decoded is not None: # 更新 es_tags 字段 update_query = "UPDATE indeximos SET es_tags = %s WHERE es_sid = %s" cursor.execute(update_query, (decoded, es_sid)) conn.commit() updated_count += 1 print(f"成功 → {decoded[:50]}{'...' if len(decoded) > 50 else ''}") else: print("跳过(无法解码)") print(f"\n✅ 完成!共更新 {updated_count} 条记录。") except mysql.connector.Error as db_err: print(f"❌ 数据库错误: {db_err}") except Exception as e: print(f"❌ 脚本执行出错: {e}") finally: if 'cursor' in locals(): cursor.close() if 'conn' in locals() and conn.is_connected(): conn.close() print("数据库连接已关闭。") if __name__ == "__main__": # 动态替换 SQL 中的表名(注意:表名不能用参数化,需手动拼接,但确保安全) # 为安全起见,可加校验 if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', 'indeximos'): raise ValueError("表名包含非法字符!") # 临时替换函数中的表名(更优雅的方式是传参,此处为简洁) import sys module = sys.modules[__name__] # 修改 main 函数中的 SQL(通过字符串替换) # 实际建议:将表名作为全局变量或参数传递 # 更简单做法:在 main() 上方定义 TABLE_NAME,然后在 SQL 中直接引用 # 我们重写 main 函数内部逻辑以支持变量表名 # 重新定义带表名参数的主逻辑 def main_with_table(table_name): try: conn = mysql.connector.connect(**DB_CONFIG) cursor = conn.cursor(buffered=True) # 查询 query_sql = f""" SELECT es_sid, es_urlname FROM `{table_name}` WHERE es_urlname LIKE '%index.php?%' AND (es_tags IS NULL OR es_tags = '') """ cursor.execute(query_sql) records = cursor.fetchall() if not records: print("没有找到需要处理的记录。") return print(f"共找到 {len(records)} 条待处理记录。") updated_count = 0 for i, (es_sid, es_urlname) in enumerate(records, 1): print(f"[{i}/{len(records)}] 处理 ID={es_sid} ...", end=" ") decoded = decode_rodong_url(es_urlname) if decoded is not None: update_sql = f"UPDATE `{table_name}` SET es_tags = %s WHERE es_sid = %s" cursor.execute(update_sql, (decoded, es_sid)) conn.commit() updated_count += 1 print(f"成功 → {decoded[:50]}{'...' if len(decoded) > 50 else ''}") else: print("跳过(无法解码)") print(f"\n✅ 完成!共更新 {updated_count} 条记录。") except mysql.connector.Error as db_err: print(f"❌ 数据库错误: {db_err}") except Exception as e: print(f"❌ 脚本执行出错: {e}") finally: if 'cursor' in locals(): cursor.close() if 'conn' in locals() and conn.is_connected(): conn.close() print("数据库连接已关闭。") # 执行 main_with_table('indeximos')