Files
Quant_Code/3.新闻抓取与通知/jin10_feishu.py
2025-04-09 17:18:30 +08:00

133 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
# import smtplib
# from email.mime.text import MIMEText
# from email.mime.multipart import MIMEMultipart
import hashlib
import schedule
import requests
from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"
# 邮件发送函数
# def send_email(content):
# msg = MIMEMultipart('alternative')
# msg['Subject'] = '金十数据更新通知'
# msg['From'] = from_email
# msg['To'] = to_email
# msg.attach(MIMEText(content, 'html'))
# try:
# server = smtplib.SMTP_SSL('smtp.qq.com', 465)
# server.login(from_email, from_password)
# server.sendmail(from_email, to_email, msg.as_string())
# server.quit()
# print("邮件发送成功")
# except Exception as e:
# print(f"邮件发送失败: {e}")
# 飞书消息发送函数
def send_feishu_message(text):
headers = {
"Content-Type": "application/json"
}
table_html = f'{text}'
data = {
"msg_type": "text",
"content": {
"text": table_html
}
}
response = requests.post("https://open.feishu.cn/open-apis/bot/v2/hook/094b85fb-4fc3-46f3-9673-ddb9702f7885", headers=headers, json=data)
if response.status_code != 200:
print(f"飞书消息发送失败,状态码: {response.status_code}, 响应内容: {response.text}")
# 全局存储上次匹配内容
last_matched_hash = None
# 主抓取函数
def fetch_news():
global last_matched_hash
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
try:
driver.get("https://www.jin10.com/")
target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
target_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, target_xpath))
)
script = '''
var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
element.className = "hot-filter_item is-active";
'''
driver.execute_script(script, target_xpath)
update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
update_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, update_xpath))
)
driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
time.sleep(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
matched_count = 0
modified_text = ''
text_data = {}
text_list = []
for item in items[:40]:
text_content = item.get_text(strip=True)
# print(text_content)
if '默认火热沸爆' not in text_content:
continue
matched_count += 1
parts = text_content.split('默认火热沸爆', 1)
if len(parts) > 1 and parts[1].strip():
modified_text = parts[1].strip()[0] + "" + parts[1].strip()[1:]
text_data[matched_count] = modified_text
text_list.append(modified_text)
print(text_data)
print(text_data[matched_count].iloc[0])
print(text_list)
print(text_list[0])
print(text_list[-1])
if modified_text:
current_hash = hashlib.md5(modified_text.encode()).hexdigest()
if last_matched_hash and current_hash != last_matched_hash:
# send_email(modified_text)
send_feishu_message(modified_text)
print(modified_text)
last_matched_hash = current_hash
print("last_matched_hash", last_matched_hash)
print("current_hash", current_hash)
print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆'的信息")
except Exception as e:
print(f"执行出错: {e}")
finally:
driver.quit()
# 定时任务配置
schedule.every(1).minutes.do(fetch_news)
print("开始定时监控...")
while True:
schedule.run_pending()
time.sleep(1)