Quant_Code/3.新闻抓取与通知/jin10_new.py

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import hashlib
import schedule

from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"

# 邮件发送函数
def send_email(content):
    msg = MIMEMultipart('alternative')
    msg['Subject'] = '金十数据更新通知'
    msg['From'] = from_email
    msg['To'] = to_email
    msg.attach(MIMEText(content, 'html'))

    try:
        server = smtplib.SMTP_SSL('smtp.qq.com', 465)
        server.login(from_email, from_password)
        server.sendmail(from_email, to_email, msg.as_string())
        server.quit()
        print("邮件发送成功")
    except Exception as e:
        print(f"邮件发送失败: {e}")

# 全局存储上次匹配内容
last_matched_hash = None

# 主抓取函数
def fetch_news():
    global last_matched_hash

    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    driver = webdriver.Chrome(options=options)

    try:
        driver.get("https://www.jin10.com/")

        target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
        target_element = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, target_xpath))
        )

        script = '''
        var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
        element.className = "hot-filter_item is-active";
        '''
        driver.execute_script(script, target_xpath)

        update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
        update_element = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, update_xpath))
        )
        driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
        time.sleep(5)

        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'lxml')
        items = soup.find_all('div', class_='jin-flash-item-container is-normal')

        matched_count = 0
        modified_text = ''

        for item in items[:40]:
            text_content = item.get_text(strip=True)
            # print(text_content)
            if '默认火热沸爆爆' not in text_content:
                continue

            matched_count += 1
            parts = text_content.split('默认火热沸爆', 1)
            if len(parts) > 1 and parts[1].strip():
                modified_text = parts[1].strip()[0] + "：" + parts[1].strip()[1:]

        print(modified_text)
        if modified_text:
            current_hash = hashlib.md5(modified_text.encode()).hexdigest()
            if last_matched_hash and current_hash != last_matched_hash:
                send_email(modified_text)
                print(modified_text)
            last_matched_hash = current_hash
            print("last_matched_hash", last_matched_hash)
            print("current_hash", current_hash)

        print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆爆'的信息")

    except Exception as e:
        print(f"执行出错: {e}")
    finally:
        driver.quit()

# 定时任务配置
schedule.every(1).minutes.do(fetch_news)

print("开始定时监控...")
while True:
    schedule.run_pending()
    time.sleep(1)