Quant_Code/3.新闻抓取与通知/jin10_new - 副本.py

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

# 初始化 WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # 无头模式，不打开浏览器界面
driver = webdriver.Chrome(options=options)

try:
    # 打开金十数据网站
    driver.get("https://www.jin10.com/")

    # 等待网页加载并找到目标元素
    target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
    target_element = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, target_xpath))
    )

    # 使用 JavaScript 修改元素的 class
    script = '''
    var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
    element.className = "hot-filter_item is-active";
    '''
    driver.execute_script(script, target_xpath)

    # 立即更新内容，假设更新内容的位置是已知的
    update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
    update_element = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, update_xpath))
    )

    # 触发页面更新，这里假设更新内容是通过某种操作，比如点击按钮或其他方式
    # 请根据实际情况调整这里的操作
    driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
    time.sleep(5)  # 等待页面内容更新

    # 获取页面内容
    page_source = driver.page_source

    # 使用 BeautifulSoup 解析 HTML
    soup = BeautifulSoup(page_source, 'lxml')

    # 提取 class 为 'jin-flash-item-container is-normal' 的 div 标签
    items = soup.find_all('div', class_='jin-flash-item-container is-normal')

    # 只爬取前5条信息
    # 初始化匹配计数器
    matched_count = 0

    # 只爬取包含关键词的信息
    for index, item in enumerate(items[:40], start=1):
        text_content = item.get_text(strip=True)
        # print(text_content)

        if '默认火热沸爆' not in text_content:
            continue

        matched_count += 1
        # print(items[1])
        parts = text_content.split('默认火热沸爆', 1)#默认火热沸爆
        if len(parts) > 1:
            print(f"匹配信息 {matched_count}:")
            if parts[1].strip():
                modified_text = parts[1].strip()[0] + "：" + parts[1].strip()[1:]
                print(modified_text)
            else:
                print("")
    print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆'的信息")

finally:
    # 关闭 WebDriver
    driver.quit()