20250408修改

This commit is contained in:
2025-04-09 17:18:30 +08:00
parent f925dff46b
commit aaf2224484
146 changed files with 157794 additions and 5718 deletions

View File

@@ -0,0 +1,133 @@
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
# import smtplib
# from email.mime.text import MIMEText
# from email.mime.multipart import MIMEMultipart
import hashlib
import schedule
import requests
from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"
# 邮件发送函数
# def send_email(content):
# msg = MIMEMultipart('alternative')
# msg['Subject'] = '金十数据更新通知'
# msg['From'] = from_email
# msg['To'] = to_email
# msg.attach(MIMEText(content, 'html'))
# try:
# server = smtplib.SMTP_SSL('smtp.qq.com', 465)
# server.login(from_email, from_password)
# server.sendmail(from_email, to_email, msg.as_string())
# server.quit()
# print("邮件发送成功")
# except Exception as e:
# print(f"邮件发送失败: {e}")
# 飞书消息发送函数
def send_feishu_message(text):
headers = {
"Content-Type": "application/json"
}
table_html = f'{text}'
data = {
"msg_type": "text",
"content": {
"text": table_html
}
}
response = requests.post("https://open.feishu.cn/open-apis/bot/v2/hook/094b85fb-4fc3-46f3-9673-ddb9702f7885", headers=headers, json=data)
if response.status_code != 200:
print(f"飞书消息发送失败,状态码: {response.status_code}, 响应内容: {response.text}")
# 全局存储上次匹配内容
last_matched_hash = None
# 主抓取函数
def fetch_news():
global last_matched_hash
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
try:
driver.get("https://www.jin10.com/")
target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
target_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, target_xpath))
)
script = '''
var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
element.className = "hot-filter_item is-active";
'''
driver.execute_script(script, target_xpath)
update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
update_element = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.XPATH, update_xpath))
)
driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
time.sleep(5)
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'lxml')
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
matched_count = 0
modified_text = ''
text_data = {}
text_list = []
for item in items[:40]:
text_content = item.get_text(strip=True)
# print(text_content)
if '默认火热沸爆' not in text_content:
continue
matched_count += 1
parts = text_content.split('默认火热沸爆', 1)
if len(parts) > 1 and parts[1].strip():
modified_text = parts[1].strip()[0] + "" + parts[1].strip()[1:]
text_data[matched_count] = modified_text
text_list.append(modified_text)
print(text_data)
print(text_data[matched_count].iloc[0])
print(text_list)
print(text_list[0])
print(text_list[-1])
if modified_text:
current_hash = hashlib.md5(modified_text.encode()).hexdigest()
if last_matched_hash and current_hash != last_matched_hash:
# send_email(modified_text)
send_feishu_message(modified_text)
print(modified_text)
last_matched_hash = current_hash
print("last_matched_hash", last_matched_hash)
print("current_hash", current_hash)
print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆'的信息")
except Exception as e:
print(f"执行出错: {e}")
finally:
driver.quit()
# 定时任务配置
schedule.every(1).minutes.do(fetch_news)
print("开始定时监控...")
while True:
schedule.run_pending()
time.sleep(1)