133 lines
4.5 KiB
Python
133 lines
4.5 KiB
Python
from selenium import webdriver
|
||
from selenium.webdriver.common.by import By
|
||
from selenium.webdriver.support.ui import WebDriverWait
|
||
from selenium.webdriver.support import expected_conditions as EC
|
||
from bs4 import BeautifulSoup
|
||
import time
|
||
# import smtplib
|
||
# from email.mime.text import MIMEText
|
||
# from email.mime.multipart import MIMEMultipart
|
||
import hashlib
|
||
import schedule
|
||
|
||
import requests
|
||
|
||
from_email = "240884432@qq.com"
|
||
from_password = "osjyjmbqrzxtbjbf"
|
||
to_email = "240884432@qq.com"
|
||
|
||
# 邮件发送函数
|
||
# def send_email(content):
|
||
# msg = MIMEMultipart('alternative')
|
||
# msg['Subject'] = '金十数据更新通知'
|
||
# msg['From'] = from_email
|
||
# msg['To'] = to_email
|
||
# msg.attach(MIMEText(content, 'html'))
|
||
|
||
# try:
|
||
# server = smtplib.SMTP_SSL('smtp.qq.com', 465)
|
||
# server.login(from_email, from_password)
|
||
# server.sendmail(from_email, to_email, msg.as_string())
|
||
# server.quit()
|
||
# print("邮件发送成功")
|
||
# except Exception as e:
|
||
# print(f"邮件发送失败: {e}")
|
||
|
||
# 飞书消息发送函数
|
||
def send_feishu_message(text):
|
||
headers = {
|
||
"Content-Type": "application/json"
|
||
}
|
||
table_html = f'{text}'
|
||
data = {
|
||
"msg_type": "text",
|
||
"content": {
|
||
"text": table_html
|
||
}
|
||
}
|
||
response = requests.post("https://open.feishu.cn/open-apis/bot/v2/hook/094b85fb-4fc3-46f3-9673-ddb9702f7885", headers=headers, json=data)
|
||
if response.status_code != 200:
|
||
print(f"飞书消息发送失败,状态码: {response.status_code}, 响应内容: {response.text}")
|
||
|
||
# 全局存储上次匹配内容
|
||
last_matched_hash = None
|
||
|
||
# 主抓取函数
|
||
def fetch_news():
|
||
global last_matched_hash
|
||
|
||
options = webdriver.ChromeOptions()
|
||
options.add_argument('--headless')
|
||
driver = webdriver.Chrome(options=options)
|
||
|
||
try:
|
||
driver.get("https://www.jin10.com/")
|
||
|
||
target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'
|
||
target_element = WebDriverWait(driver, 20).until(
|
||
EC.presence_of_element_located((By.XPATH, target_xpath))
|
||
)
|
||
|
||
script = '''
|
||
var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
||
element.className = "hot-filter_item is-active";
|
||
'''
|
||
driver.execute_script(script, target_xpath)
|
||
|
||
update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'
|
||
update_element = WebDriverWait(driver, 20).until(
|
||
EC.presence_of_element_located((By.XPATH, update_xpath))
|
||
)
|
||
driver.execute_script('arguments[0].scrollIntoView(true);', update_element)
|
||
time.sleep(5)
|
||
|
||
page_source = driver.page_source
|
||
soup = BeautifulSoup(page_source, 'lxml')
|
||
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
|
||
|
||
matched_count = 0
|
||
modified_text = ''
|
||
text_data = {}
|
||
text_list = []
|
||
for item in items[:40]:
|
||
text_content = item.get_text(strip=True)
|
||
# print(text_content)
|
||
if '默认火热沸爆' not in text_content:
|
||
continue
|
||
|
||
matched_count += 1
|
||
parts = text_content.split('默认火热沸爆', 1)
|
||
if len(parts) > 1 and parts[1].strip():
|
||
modified_text = parts[1].strip()[0] + ":" + parts[1].strip()[1:]
|
||
text_data[matched_count] = modified_text
|
||
text_list.append(modified_text)
|
||
|
||
print(text_data)
|
||
print(text_data[matched_count].iloc[0])
|
||
print(text_list)
|
||
print(text_list[0])
|
||
print(text_list[-1])
|
||
if modified_text:
|
||
current_hash = hashlib.md5(modified_text.encode()).hexdigest()
|
||
if last_matched_hash and current_hash != last_matched_hash:
|
||
# send_email(modified_text)
|
||
send_feishu_message(modified_text)
|
||
print(modified_text)
|
||
last_matched_hash = current_hash
|
||
print("last_matched_hash", last_matched_hash)
|
||
print("current_hash", current_hash)
|
||
|
||
print(f"\n共找到 {matched_count} 条匹配'默认火热沸爆'的信息")
|
||
|
||
except Exception as e:
|
||
print(f"执行出错: {e}")
|
||
finally:
|
||
driver.quit()
|
||
|
||
# 定时任务配置
|
||
schedule.every(1).minutes.do(fetch_news)
|
||
|
||
print("开始定时监控...")
|
||
while True:
|
||
schedule.run_pending()
|
||
time.sleep(1) |