134 lines
4.4 KiB
Python
134 lines
4.4 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from bs4 import BeautifulSoup
|
|
import smtplib
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.mime.text import MIMEText
|
|
import schedule
|
|
import time
|
|
import hashlib
|
|
import csv # 导入csv模块
|
|
|
|
# 邮箱配置
|
|
from_email = "240884432@qq.com"
|
|
from_password = "osjyjmbqrzxtbjbf"
|
|
to_email = "240884432@qq.com"
|
|
|
|
# 目标URL
|
|
# 修改为新的目标URL
|
|
url = 'https://www.jjin10.com/'
|
|
|
|
# 设置请求头,模拟浏览器访问
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
|
|
}
|
|
|
|
# 消息跟踪记录
|
|
last_records = {} # 格式:{id: (hash, timestamp)}
|
|
|
|
# 发送邮件的函数
|
|
def send_email(subject, content, to_email):
|
|
msg = MIMEMultipart('alternative')
|
|
msg['Subject'] = subject
|
|
msg['From'] = from_email
|
|
msg['To'] = to_email
|
|
msg.attach(MIMEText(content, 'html'))
|
|
|
|
try:
|
|
server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 使用SSL,端口通常是465
|
|
server.login(from_email, from_password)
|
|
server.sendmail(from_email, to_email, msg.as_string())
|
|
server.quit()
|
|
print("邮件发送成功")
|
|
except Exception as e:
|
|
print(f"邮件发送失败: {e}")
|
|
|
|
# 爬取并发送邮件的函数
|
|
def fetch_and_notify():
|
|
global last_records
|
|
|
|
# 初始化浏览器
|
|
options = webdriver.ChromeOptions()
|
|
options.add_argument('--headless')
|
|
driver = webdriver.Chrome(options=options)
|
|
|
|
try:
|
|
driver.get(url)
|
|
|
|
# 点击爆款筛选按钮
|
|
button = WebDriverWait(driver, 10).until(
|
|
# 确保此选择器在新网页中仍适用
|
|
EC.element_to_be_clickable((By.CSS_SELECTOR, '.flash-hot_text.is-bao'))
|
|
)
|
|
driver.execute_script("arguments[0].click();", button)
|
|
time.sleep(3) # 等待内容加载
|
|
|
|
soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
items = soup.find_all('div', class_='jin-flash-item-container is-normal')
|
|
|
|
filtered_items = []
|
|
new_item_ids = []
|
|
|
|
for item in items:
|
|
item_id = item.get('id')
|
|
if item.find('i', class_='flash-hot_text is-bao'):
|
|
filtered_items.append(item)
|
|
new_item_ids.append(item_id)
|
|
|
|
finally:
|
|
driver.quit()
|
|
|
|
if not filtered_items:
|
|
return
|
|
|
|
# 生成内容哈希并筛选新内容
|
|
new_items = []
|
|
for item in filtered_items:
|
|
item_id = item.get('id')
|
|
content = item.find('div', class_='flash-text').get_text(strip=True)
|
|
content_hash = hashlib.md5(content.encode()).hexdigest()
|
|
|
|
# 双重校验:ID不存在 或 ID存在但内容哈希不同
|
|
if item_id not in last_records or last_records[item_id][0] != content_hash:
|
|
new_items.append({
|
|
'id': item_id,
|
|
'time': item.find('div', class_='item-time').get_text(strip=True),
|
|
'content': content,
|
|
'hash': content_hash
|
|
})
|
|
|
|
if new_items:
|
|
email_content = ""
|
|
for i, item in enumerate(new_items, 1):
|
|
email_content += f"<p><strong>消息 {i}:</strong><br>时间: {item['time']}<br>内容: {item['content']}</p>"
|
|
email_content += '<hr>'
|
|
# 更新记录
|
|
last_records[item['id']] = (item['hash'], time.time())
|
|
|
|
send_email("金十数据市场快讯", email_content, to_email)
|
|
|
|
# 清理过期记录(保留24小时)
|
|
expire_time = time.time() - 86400
|
|
last_records = {k:v for k,v in last_records.items() if v[1] > expire_time}
|
|
|
|
# 将新消息写入CSV文件
|
|
with open('news.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
|
|
fieldnames = ['id', 'time', 'content', 'hash']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
writer.writeheader()
|
|
for item in new_items:
|
|
writer.writerow(item)
|
|
else:
|
|
# 当没有新消息时发送提示邮件
|
|
send_email("金十数据市场快讯", "<p>没有新的市场快讯信息。</p>", to_email)
|
|
|
|
# 每5分钟运行一次
|
|
schedule.every(5).minutes.do(fetch_and_notify)
|
|
|
|
print("开始监控市场快讯信息...")
|
|
while True:
|
|
schedule.run_pending()
|
|
time.sleep(1) |