Files
2025-04-09 17:18:30 +08:00

107 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import hashlib
import schedule
from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"
# 邮件发送函数
def send_email(content):
msg = MIMEMultipart('alternative')
msg['Subject'] = '金十数据更新通知'
msg['From'] = from_email
msg['To'] = to_email
msg.attach(MIMEText(content, 'html'))
try:
server = smtplib.SMTP_SSL('smtp.qq.com', 465)
server.login(from_email, from_password)
server.sendmail(from_email, to_email, msg.as_string())
server.quit()
print("邮件发送成功")
except Exception as e:
print(f"邮件发送失败: {e}")
# 全局存储上次匹配内容
last_matched_hash = None
# 主抓取函数
def fetch_news():
global last_matched_hash
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
try:
driver.get("https://www.jin10.com/")
target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
target_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, target_xpath))
)
script = '''
var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
element.className = "hot-filter_item is-active";
'''
driver.execute_script(script, target_xpath)
update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
update_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, update_xpath))
)
driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
time.sleep(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
matched_count = 0
modified_text = ''
for item in items[:40]:
text_content = item.get_text(strip=True)
# print(text_content)
if '默认火热沸爆爆' not in text_content:
continue
matched_count += 1
parts = text_content.split('默认火热沸爆', 1)
if len(parts) > 1 and parts[1].strip():
modified_text = parts[1].strip()[0] + "" + parts[1].strip()[1:]
print(modified_text)
if modified_text:
current_hash = hashlib.md5(modified_text.encode()).hexdigest()
if last_matched_hash and current_hash != last_matched_hash:
send_email(modified_text)
print(modified_text)
last_matched_hash = current_hash
print("last_matched_hash", last_matched_hash)
print("current_hash", current_hash)
print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆爆'的信息")
except Exception as e:
print(f"执行出错: {e}")
finally:
driver.quit()
# 定时任务配置
schedule.every(1).minutes.do(fetch_news)
print("开始定时监控...")
while True:
schedule.run_pending()
time.sleep(1)