20250408修改
This commit is contained in:
134
3.新闻抓取与通知/jin10_send_mail.py
Normal file
134
3.新闻抓取与通知/jin10_send_mail.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from bs4 import BeautifulSoup
|
||||
import smtplib
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
import schedule
|
||||
import time
|
||||
import hashlib
|
||||
import csv # 导入csv模块
|
||||
|
||||
# 邮箱配置
|
||||
from_email = "240884432@qq.com"
|
||||
from_password = "osjyjmbqrzxtbjbf"
|
||||
to_email = "240884432@qq.com"
|
||||
|
||||
# 目标URL
|
||||
# 修改为新的目标URL
|
||||
url = 'https://www.jjin10.com/'
|
||||
|
||||
# 设置请求头,模拟浏览器访问
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
|
||||
}
|
||||
|
||||
# 消息跟踪记录
|
||||
last_records = {} # 格式:{id: (hash, timestamp)}
|
||||
|
||||
# 发送邮件的函数
|
||||
def send_email(subject, content, to_email):
|
||||
msg = MIMEMultipart('alternative')
|
||||
msg['Subject'] = subject
|
||||
msg['From'] = from_email
|
||||
msg['To'] = to_email
|
||||
msg.attach(MIMEText(content, 'html'))
|
||||
|
||||
try:
|
||||
server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 使用SSL,端口通常是465
|
||||
server.login(from_email, from_password)
|
||||
server.sendmail(from_email, to_email, msg.as_string())
|
||||
server.quit()
|
||||
print("邮件发送成功")
|
||||
except Exception as e:
|
||||
print(f"邮件发送失败: {e}")
|
||||
|
||||
# 爬取并发送邮件的函数
|
||||
def fetch_and_notify():
|
||||
global last_records
|
||||
|
||||
# 初始化浏览器
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument('--headless')
|
||||
driver = webdriver.Chrome(options=options)
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
|
||||
# 点击爆款筛选按钮
|
||||
button = WebDriverWait(driver, 10).until(
|
||||
# 确保此选择器在新网页中仍适用
|
||||
EC.element_to_be_clickable((By.CSS_SELECTOR, '.flash-hot_text.is-bao'))
|
||||
)
|
||||
driver.execute_script("arguments[0].click();", button)
|
||||
time.sleep(3) # 等待内容加载
|
||||
|
||||
soup = BeautifulSoup(driver.page_source, 'html.parser')
|
||||
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
|
||||
|
||||
filtered_items = []
|
||||
new_item_ids = []
|
||||
|
||||
for item in items:
|
||||
item_id = item.get('id')
|
||||
if item.find('i', class_='flash-hot_text is-bao'):
|
||||
filtered_items.append(item)
|
||||
new_item_ids.append(item_id)
|
||||
|
||||
finally:
|
||||
driver.quit()
|
||||
|
||||
if not filtered_items:
|
||||
return
|
||||
|
||||
# 生成内容哈希并筛选新内容
|
||||
new_items = []
|
||||
for item in filtered_items:
|
||||
item_id = item.get('id')
|
||||
content = item.find('div', class_='flash-text').get_text(strip=True)
|
||||
content_hash = hashlib.md5(content.encode()).hexdigest()
|
||||
|
||||
# 双重校验:ID不存在 或 ID存在但内容哈希不同
|
||||
if item_id not in last_records or last_records[item_id][0] != content_hash:
|
||||
new_items.append({
|
||||
'id': item_id,
|
||||
'time': item.find('div', class_='item-time').get_text(strip=True),
|
||||
'content': content,
|
||||
'hash': content_hash
|
||||
})
|
||||
|
||||
if new_items:
|
||||
email_content = ""
|
||||
for i, item in enumerate(new_items, 1):
|
||||
email_content += f"<p><strong>消息 {i}:</strong><br>时间: {item['time']}<br>内容: {item['content']}</p>"
|
||||
email_content += '<hr>'
|
||||
# 更新记录
|
||||
last_records[item['id']] = (item['hash'], time.time())
|
||||
|
||||
send_email("金十数据市场快讯", email_content, to_email)
|
||||
|
||||
# 清理过期记录(保留24小时)
|
||||
expire_time = time.time() - 86400
|
||||
last_records = {k:v for k,v in last_records.items() if v[1] > expire_time}
|
||||
|
||||
# 将新消息写入CSV文件
|
||||
with open('news.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
|
||||
fieldnames = ['id', 'time', 'content', 'hash']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
|
||||
writer.writeheader()
|
||||
for item in new_items:
|
||||
writer.writerow(item)
|
||||
else:
|
||||
# 当没有新消息时发送提示邮件
|
||||
send_email("金十数据市场快讯", "<p>没有新的市场快讯信息。</p>", to_email)
|
||||
|
||||
# 每5分钟运行一次
|
||||
schedule.every(5).minutes.do(fetch_and_notify)
|
||||
|
||||
print("开始监控市场快讯信息...")
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(1)
|
||||
Reference in New Issue
Block a user