mirror of
https://github.com/zc-zhangchen/any-auto-register.git
synced 2026-05-15 10:56:45 +08:00
@@ -139,7 +139,72 @@ class BaseMailbox(ABC):
|
||||
def get_current_ids(self, account: MailboxAccount) -> set:
|
||||
"""返回当前邮件 ID 集合(用于过滤旧邮件)"""
|
||||
...
|
||||
def _yyds_safe_extract(self, text: str, pattern: str = None) -> Optional[str]:
|
||||
"""通用验证码提取逻辑:若有捕获组则返回 group(1),否则返回 group(0)"""
|
||||
import re
|
||||
|
||||
text = str(text or "")
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# [修复点 1]:优先过滤掉所有 URL 链接,直接从根源防止提取到追踪链接(如 SendGrid)里的随机数字
|
||||
text = re.sub(r"https?://\S+", "", text)
|
||||
|
||||
patterns = []
|
||||
if pattern:
|
||||
# [修复点 2]:如果外部传入了纯 \d{6} 的粗糙正则,自动为其加上字母数字边界
|
||||
if pattern in (r"\d{6}", r"(\d{6})"):
|
||||
patterns.append(r"(?<![a-zA-Z0-9])(\d{6})(?![a-zA-Z0-9])")
|
||||
else:
|
||||
patterns.append(pattern)
|
||||
|
||||
# 先匹配带明显语义的验证码,避免误提取 MIME boundary、时间戳等 6 位数字。
|
||||
patterns.extend(
|
||||
[
|
||||
r"(?is)(?:verification\s+code|one[-\s]*time\s+(?:password|code)|security\s+code|login\s+code|验证码|校验码|动态码|認證碼|驗證碼)[^0-9]{0,30}(\d{6})",
|
||||
r"(?is)\bcode\b[^0-9]{0,12}(\d{6})",
|
||||
# [修复点 3]:修改兜底正则,严格要求 6 位数字前后不能有字母或数字(防止匹配 u20216706)
|
||||
r"(?<![a-zA-Z0-9])(\d{6})(?![a-zA-Z0-9])",
|
||||
]
|
||||
)
|
||||
|
||||
for regex in patterns:
|
||||
m = re.search(regex, text)
|
||||
if m:
|
||||
# 兼容逻辑:若 pattern 中有捕获组则取 group(1),否则取 group(0)
|
||||
return m.group(1) if m.groups() else m.group(0)
|
||||
return None
|
||||
|
||||
def _yyds_decode_raw_content(self, raw: str) -> str:
|
||||
"""解析邮件原始文本 (借鉴自 Fugle),处理 Quoted-Printable 和 HTML 实体"""
|
||||
import quopri, html, re
|
||||
|
||||
text = str(raw or "")
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# [修复点 4]:只有在明确包含常见邮件 Header 时,才进行 \r\n\r\n 切分。
|
||||
# 否则会误删 MaliAPI 等直接返回的已解析 JSON 正文内容(遇到普通的正文换行就错误截断了)
|
||||
if re.search(r"(?im)^(?:Return-Path|Received|Date|From|To|Subject|Content-Type):", text):
|
||||
if "\r\n\r\n" in text:
|
||||
text = text.split("\r\n\r\n", 1)[1]
|
||||
elif "\n\n" in text:
|
||||
text = text.split("\n\n", 1)[1]
|
||||
|
||||
try:
|
||||
# 处理 Quoted-Printable
|
||||
decoded_bytes = quopri.decodestring(text)
|
||||
text = decoded_bytes.decode("utf-8", errors="ignore")
|
||||
except Exception:
|
||||
pass
|
||||
# 清除 HTML 标签并反转义
|
||||
text = html.unescape(text)
|
||||
text = re.sub(r"(?im)^content-(?:type|transfer-encoding):.*$", " ", text)
|
||||
text = re.sub(r"(?im)^--+[_=\w.-]+$", " ", text)
|
||||
text = re.sub(r"(?i)----=_part_[\w.]+", " ", text)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
def create_mailbox(
|
||||
provider: str, extra: dict = None, proxy: str = None
|
||||
@@ -970,7 +1035,7 @@ class MaliAPIMailbox(BaseMailbox):
|
||||
str(message.get("snippet") or ""),
|
||||
]
|
||||
).strip()
|
||||
search_text = self._decode_raw_content(search_text) or search_text
|
||||
search_text = self._yyds_decode_raw_content(search_text) or search_text
|
||||
search_text = re.sub(
|
||||
r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
||||
"",
|
||||
@@ -979,7 +1044,7 @@ class MaliAPIMailbox(BaseMailbox):
|
||||
if keyword and keyword.lower() not in search_text.lower():
|
||||
continue
|
||||
|
||||
code = self._safe_extract(search_text, code_pattern)
|
||||
code = self._yyds_safe_extract(search_text, code_pattern)
|
||||
if code:
|
||||
self._log(f"[MaliAPI] 收到验证码: {code}")
|
||||
return code
|
||||
|
||||
Reference in New Issue
Block a user