Files
Quant_Code/3.新闻抓取与通知/jin10_send_mail.py
2025-04-09 17:18:30 +08:00

134 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import schedule
import time
import hashlib
import csv # 导入csv模块
# 邮箱配置
from_email = "240884432@qq.com"
from_password = "osjyjmbqrzxtbjbf"
to_email = "240884432@qq.com"
# 目标URL
# 修改为新的目标URL
url = 'https://www.jjin10.com/'
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}
# 消息跟踪记录
last_records = {} # 格式:{id: (hash, timestamp)}
# 发送邮件的函数
def send_email(subject, content, to_email):
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
msg['From'] = from_email
msg['To'] = to_email
msg.attach(MIMEText(content, 'html'))
try:
server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 使用SSL端口通常是465
server.login(from_email, from_password)
server.sendmail(from_email, to_email, msg.as_string())
server.quit()
print("邮件发送成功")
except Exception as e:
print(f"邮件发送失败: {e}")
# 爬取并发送邮件的函数
def fetch_and_notify():
global last_records
# 初始化浏览器
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
try:
driver.get(url)
# 点击爆款筛选按钮
button = WebDriverWait(driver, 10).until(
# 确保此选择器在新网页中仍适用
EC.element_to_be_clickable((By.CSS_SELECTOR, '.flash-hot_text.is-bao'))
)
driver.execute_script("arguments[0].click();", button)
time.sleep(3) # 等待内容加载
soup = BeautifulSoup(driver.page_source, 'html.parser')
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
filtered_items = []
new_item_ids = []
for item in items:
item_id = item.get('id')
if item.find('i', class_='flash-hot_text is-bao'):
filtered_items.append(item)
new_item_ids.append(item_id)
finally:
driver.quit()
if not filtered_items:
return
# 生成内容哈希并筛选新内容
new_items = []
for item in filtered_items:
item_id = item.get('id')
content = item.find('div', class_='flash-text').get_text(strip=True)
content_hash = hashlib.md5(content.encode()).hexdigest()
# 双重校验ID不存在 或 ID存在但内容哈希不同
if item_id not in last_records or last_records[item_id][0] != content_hash:
new_items.append({
'id': item_id,
'time': item.find('div', class_='item-time').get_text(strip=True),
'content': content,
'hash': content_hash
})
if new_items:
email_content = ""
for i, item in enumerate(new_items, 1):
email_content += f"<p><strong>消息 {i}:</strong><br>时间: {item['time']}<br>内容: {item['content']}</p>"
email_content += '<hr>'
# 更新记录
last_records[item['id']] = (item['hash'], time.time())
send_email("金十数据市场快讯", email_content, to_email)
# 清理过期记录保留24小时
expire_time = time.time() - 86400
last_records = {k:v for k,v in last_records.items() if v[1] > expire_time}
# 将新消息写入CSV文件
with open('news.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['id', 'time', 'content', 'hash']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for item in new_items:
writer.writerow(item)
else:
# 当没有新消息时发送提示邮件
send_email("金十数据市场快讯", "<p>没有新的市场快讯信息。</p>", to_email)
# 每5分钟运行一次
schedule.every(5).minutes.do(fetch_and_notify)
print("开始监控市场快讯信息...")
while True:
schedule.run_pending()
time.sleep(1)