20250408修改

This commit is contained in:
2025-04-09 17:18:30 +08:00
parent f925dff46b
commit aaf2224484
146 changed files with 157794 additions and 5718 deletions

View File

@@ -0,0 +1,134 @@
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import schedule
import time
import hashlib
import csv # 导入csv模块
# 邮箱配置
from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"
# 目标URL
# 修改为新的目标URL
url = 'https://www.jjin10.com/'
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}
# 消息跟踪记录
last_records = {} # 格式:{id: (hash, timestamp)}
# 发送邮件的函数
def send_email(subject, content, to_email):
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = from_email
msg['To'] = to_email
msg.attach(MIMEText(content, 'html'))
try:
server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 使用SSL端口通常是465
server.login(from_email, from_password)
server.sendmail(from_email, to_email, msg.as_string())
server.quit()
print("邮件发送成功")
except Exception as e:
print(f"邮件发送失败: {e}")
# 爬取并发送邮件的函数
def fetch_and_notify():
global last_records
# 初始化浏览器
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
try:
driver.get(url)
# 点击爆款筛选按钮
button = WebDriverWait(driver, 10).until(
# 确保此选择器在新网页中仍适用
EC.element_to_be_clickable((By.CSS_SELECTOR, '.flash-hot_text.is-bao'))
)
driver.execute_script("arguments[0].click();", button)
time.sleep(3) # 等待内容加载
soup = BeautifulSoup(driver.page_source, 'html.parser')
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
filtered_items = []
new_item_ids = []
for item in items:
item_id = item.get('id')
if item.find('i', class_='flash-hot_text is-bao'):
filtered_items.append(item)
new_item_ids.append(item_id)
finally:
driver.quit()
if not filtered_items:
return
# 生成内容哈希并筛选新内容
new_items = []
for item in filtered_items:
item_id = item.get('id')
content = item.find('div', class_='flash-text').get_text(strip=True)
content_hash = hashlib.md5(content.encode()).hexdigest()
# 双重校验ID不存在 或 ID存在但内容哈希不同
if item_id not in last_records or last_records[item_id][0] != content_hash:
new_items.append({
'id': item_id,
'time': item.find('div', class_='item-time').get_text(strip=True),
'content': content,
'hash': content_hash
})
if new_items:
email_content = ""
for i, item in enumerate(new_items, 1):
email_content += f"<p><strong>消息 {i}:</strong><br>时间: {item['time']}<br>内容: {item['content']}</p>"
email_content += '<hr>'
# 更新记录
last_records[item['id']] = (item['hash'], time.time())
send_email("金十数据市场快讯", email_content, to_email)
# 清理过期记录保留24小时
expire_time = time.time() - 86400
last_records = {k:v for k,v in last_records.items() if v[1] > expire_time}
# 将新消息写入CSV文件
with open('news.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['id', 'time', 'content', 'hash']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for item in new_items:
writer.writerow(item)
else:
# 当没有新消息时发送提示邮件
send_email("金十数据市场快讯", "<p>没有新的市场快讯信息。</p>", to_email)
# 每5分钟运行一次
schedule.every(5).minutes.do(fetch_and_notify)
print("开始监控市场快讯信息...")
while True:
schedule.run_pending()
time.sleep(1)