1088 lines
44 KiB
Plaintext
1088 lines
44 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 发送GET请求\n",
|
||
"response = requests.get(url, headers=headers)\n",
|
||
"response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
"# 解析HTML内容\n",
|
||
"soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
"\n",
|
||
"# 使用CSS选择器查找特定元素\n",
|
||
"selector = '#JinFlashList > div.flash-top > div.flash-top_right > div.flash-top_tool.tw-change-box > div.tool-setting.hide-dot > span:nth-child(2) > div.setting-popup > div:nth-child(3) > div > div.hot-filter_bot > div:nth-child(3)'\n",
|
||
"element = soup.select_one(selector)\n",
|
||
"\n",
|
||
"# 检查元素是否存在并打印内容\n",
|
||
"if element:\n",
|
||
" print(element.prettify())\n",
|
||
"else:\n",
|
||
" print(\"未找到指定的元素\")\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 发送GET请求\n",
|
||
"response = requests.get(url, headers=headers)\n",
|
||
"response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
"# 解析HTML内容\n",
|
||
"soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
"\n",
|
||
"# 查找所有符合条件的元素\n",
|
||
"items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
"# 提取满足条件的元素\n",
|
||
"filtered_items = []\n",
|
||
"for item in items:\n",
|
||
" hot_labels = item.select('.hot-filter_item_label')\n",
|
||
" for label in hot_labels:\n",
|
||
" if label.get_text(strip=True) == '爆':\n",
|
||
" filtered_items.append(item)\n",
|
||
" break\n",
|
||
"\n",
|
||
"# 打印提取的元素内容\n",
|
||
"for item in filtered_items:\n",
|
||
" time = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" print(f\"时间: {time}\")\n",
|
||
" print(f\"内容: {content}\")\n",
|
||
" print('-' * 40)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 发送GET请求\n",
|
||
"response = requests.get(url, headers=headers)\n",
|
||
"response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
"# 解析HTML内容\n",
|
||
"soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
"\n",
|
||
"# 查找所有符合条件的元素\n",
|
||
"items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
"# 提取满足条件的元素\n",
|
||
"filtered_items = []\n",
|
||
"for item in items:\n",
|
||
" hot_labels = item.select('.hot-filter_item_label')#hot-filter_item_label\n",
|
||
" for label in hot_labels:\n",
|
||
" if label.get_text(strip=True) == '火':\n",
|
||
" filtered_items.append(item)\n",
|
||
" if len(filtered_items) >= 20:\n",
|
||
" break\n",
|
||
" if len(filtered_items) >= 20:\n",
|
||
" break\n",
|
||
"\n",
|
||
"# 打印提取的20条元素内容\n",
|
||
"for i, item in enumerate(filtered_items, 1):\n",
|
||
" time = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" print(f\"消息 {i}:\")\n",
|
||
" print(f\"时间: {time}\")\n",
|
||
" print(f\"内容: {content}\")\n",
|
||
" print('-' * 40)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 发送GET请求\n",
|
||
"response = requests.get(url, headers=headers)\n",
|
||
"response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
"# 解析HTML内容\n",
|
||
"soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
"\n",
|
||
"# 查找所有符合条件的元素\n",
|
||
"items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
"# 提取满足条件的元素\n",
|
||
"filtered_items = []\n",
|
||
"for item in items:\n",
|
||
" if item.find('i', class_='jin-icon iconfont icon-huo is-huo'):\n",
|
||
" filtered_items.append(item)\n",
|
||
" if len(filtered_items) >= 20:\n",
|
||
" break\n",
|
||
"\n",
|
||
"# 打印提取的20条元素内容\n",
|
||
"for i, item in enumerate(filtered_items, 1):\n",
|
||
" time = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" print(f\"消息 {i}:\")\n",
|
||
" print(f\"时间: {time}\")\n",
|
||
" print(f\"内容: {content}\")\n",
|
||
" print('-' * 40)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import smtplib\n",
|
||
"from email.mime.multipart import MIMEMultipart\n",
|
||
"from email.mime.text import MIMEText\n",
|
||
"import schedule\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 发件人邮箱和密码\n",
|
||
"from_email = \"240884432@qq.com\"\n",
|
||
"from_password = \"osjyjmbqrzxtbjbf\"\n",
|
||
"to_email = \"240884432@qq.com\"\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 上一次获取的信息\n",
|
||
"last_items = []\n",
|
||
"\n",
|
||
"# 发送邮件的函数\n",
|
||
"def send_email(subject, content, to_email):\n",
|
||
" msg = MIMEMultipart('alternative')\n",
|
||
" msg['Subject'] = subject\n",
|
||
" msg['From'] = from_email\n",
|
||
" msg['To'] = to_email\n",
|
||
" msg.attach(MIMEText(content, 'html'))\n",
|
||
"\n",
|
||
" try:\n",
|
||
" server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 这里使用SSL,端口通常是465\n",
|
||
" server.login(from_email, from_password)\n",
|
||
" server.sendmail(from_email, to_email, msg.as_string())\n",
|
||
" server.quit()\n",
|
||
" print(\"邮件发送成功\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"邮件发送失败: {e}\")\n",
|
||
"\n",
|
||
"# 爬取并发送邮件的函数\n",
|
||
"def fetch_and_notify():\n",
|
||
" global last_items\n",
|
||
" # 发送GET请求\n",
|
||
" response = requests.get(url, headers=headers)\n",
|
||
" response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
" # 解析HTML内容\n",
|
||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
"\n",
|
||
" # 查找所有符合条件的元素\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" filtered_items = []\n",
|
||
" for item in items:\n",
|
||
" if item.find('i', class_='jin-icon iconfont icon-huo is-huo'):\n",
|
||
" filtered_items.append(item)\n",
|
||
" if len(filtered_items) >= 20:\n",
|
||
" break\n",
|
||
"\n",
|
||
" if not filtered_items:\n",
|
||
" return\n",
|
||
"\n",
|
||
" new_items = [item for item in filtered_items if item not in last_items]\n",
|
||
" print(filtered_items)\n",
|
||
" print(new_items)\n",
|
||
"\n",
|
||
" if new_items:\n",
|
||
" email_content = \"\"\n",
|
||
" for i, item in enumerate(new_items, 1):\n",
|
||
" time = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" email_content += f\"<p><strong>消息 {i}:</strong><br>时间: {time}<br>内容: {content}</p>\"\n",
|
||
" email_content += '<hr>'\n",
|
||
"\n",
|
||
" send_email(\"金十数据市场快讯\", email_content, to_email)\n",
|
||
" last_items = filtered_items\n",
|
||
" else:\n",
|
||
" send_email(\"金十数据市场快讯\", \"<p>没有新的市场快讯信息。</p>\", to_email)\n",
|
||
"\n",
|
||
"# 每5分钟运行一次\n",
|
||
"schedule.every(1).minutes.do(fetch_and_notify)\n",
|
||
"\n",
|
||
"print(\"开始监控市场快讯信息...\")\n",
|
||
"while True:\n",
|
||
" schedule.run_pending()\n",
|
||
" time.sleep(1)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import smtplib\n",
|
||
"from email.mime.multipart import MIMEMultipart\n",
|
||
"from email.mime.text import MIMEText\n",
|
||
"import schedule\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 邮箱配置\n",
|
||
"from_email = \"240884432@qq.com\"\n",
|
||
"from_password = \"osjyjmbqrzxtbjbf\"\n",
|
||
"to_email = \"240884432@qq.com\"\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 上一次获取的信息\n",
|
||
"last_items = []\n",
|
||
"\n",
|
||
"# 发送邮件的函数\n",
|
||
"def send_email(subject, content, to_email):\n",
|
||
" msg = MIMEMultipart('alternative')\n",
|
||
" msg['Subject'] = subject\n",
|
||
" msg['From'] = from_email\n",
|
||
" msg['To'] = to_email\n",
|
||
" msg.attach(MIMEText(content, 'html'))\n",
|
||
"\n",
|
||
" try:\n",
|
||
" server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 这里使用SSL,端口通常是465\n",
|
||
" server.login(from_email, from_password)\n",
|
||
" server.sendmail(from_email, to_email, msg.as_string())\n",
|
||
" server.quit()\n",
|
||
" print(\"邮件发送成功\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"邮件发送失败: {e}\")\n",
|
||
"\n",
|
||
"# 爬取并发送邮件的函数\n",
|
||
"def fetch_and_notify():\n",
|
||
" global last_items\n",
|
||
" response = requests.get(url, headers=headers)\n",
|
||
" response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" filtered_items = []\n",
|
||
" for item in items:\n",
|
||
" if item.find('i', class_='jin-icon iconfont icon-huo is-huo'):\n",
|
||
" filtered_items.append(item)\n",
|
||
" if len(filtered_items) >= 10:\n",
|
||
" break\n",
|
||
"\n",
|
||
" if not filtered_items:\n",
|
||
" return\n",
|
||
"\n",
|
||
" new_items = [item for item in filtered_items if item not in last_items]\n",
|
||
"\n",
|
||
" if new_items:\n",
|
||
" email_content = \"\"\n",
|
||
" for i, item in enumerate(new_items, 1):\n",
|
||
" time_text = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content_text = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" email_content += f\"<p><strong>消息 {i}:</strong><br>时间: {time_text}<br>内容: {content_text}</p>\"\n",
|
||
" email_content += '<hr>'\n",
|
||
"\n",
|
||
" send_email(\"金十数据市场快讯\", email_content, to_email)\n",
|
||
" last_items = filtered_items\n",
|
||
" else:\n",
|
||
" send_email(\"金十数据市场快讯\", \"<p>没有新的市场快讯信息。</p>\", to_email)\n",
|
||
"\n",
|
||
"# 每5分钟运行一次\n",
|
||
"schedule.every(1).minutes.do(fetch_and_notify)\n",
|
||
"\n",
|
||
"print(\"开始监控市场快讯信息...\")\n",
|
||
"while True:\n",
|
||
" schedule.run_pending()\n",
|
||
" time.sleep(1)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import requests\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import smtplib\n",
|
||
"from email.mime.multipart import MIMEMultipart\n",
|
||
"from email.mime.text import MIMEText\n",
|
||
"import schedule\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 邮箱配置\n",
|
||
"from_email = \"240884432@qq.com\"\n",
|
||
"from_password = \"osjyjmbqrzxtbjbf\"\n",
|
||
"to_email = \"240884432@qq.com\"\n",
|
||
"\n",
|
||
"# 目标URL\n",
|
||
"url = 'https://www.jin10.com/'\n",
|
||
"\n",
|
||
"# 设置请求头,模拟浏览器访问\n",
|
||
"headers = {\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'\n",
|
||
"}\n",
|
||
"\n",
|
||
"# 上一次获取的消息ID列表\n",
|
||
"last_item_ids = []\n",
|
||
"\n",
|
||
"# 发送邮件的函数\n",
|
||
"def send_email(subject, content, to_email):\n",
|
||
" msg = MIMEMultipart('alternative')\n",
|
||
" msg['Subject'] = subject\n",
|
||
" msg['From'] = from_email\n",
|
||
" msg['To'] = to_email\n",
|
||
" msg.attach(MIMEText(content, 'html'))\n",
|
||
"\n",
|
||
" try:\n",
|
||
" server = smtplib.SMTP_SSL('smtp.qq.com', 465) # 使用SSL,端口通常是465\n",
|
||
" server.login(from_email, from_password)\n",
|
||
" server.sendmail(from_email, to_email, msg.as_string())\n",
|
||
" server.quit()\n",
|
||
" print(\"邮件发送成功\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"邮件发送失败: {e}\")\n",
|
||
"\n",
|
||
"# 爬取并发送邮件的函数\n",
|
||
"def fetch_and_notify():\n",
|
||
" global last_item_ids\n",
|
||
" response = requests.get(url, headers=headers)\n",
|
||
" response.raise_for_status() # 如果请求失败则抛出异常\n",
|
||
"\n",
|
||
" soup = BeautifulSoup(response.content, 'html.parser')\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" filtered_items = []\n",
|
||
" new_item_ids = []\n",
|
||
" \n",
|
||
" for item in items:\n",
|
||
" item_id = item.get('id')\n",
|
||
" if item.find('i', class_='flash-hot_text is-fei'):# jin-icon iconfont icon-huo is-huo\n",
|
||
" filtered_items.append(item)\n",
|
||
" new_item_ids.append(item_id)\n",
|
||
" if len(filtered_items) >= 5:\n",
|
||
" break\n",
|
||
"\n",
|
||
" if not filtered_items:\n",
|
||
" return\n",
|
||
"\n",
|
||
" new_items = [item for item in filtered_items if item.get('id') not in last_item_ids]\n",
|
||
"\n",
|
||
" if new_items:\n",
|
||
" email_content = \"\"\n",
|
||
" for i, item in enumerate(new_items, 1):\n",
|
||
" time_text = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content_text = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" email_content += f\"<p><strong>消息 {i}:</strong><br>时间: {time_text}<br>内容: {content_text}</p>\"\n",
|
||
" email_content += '<hr>'\n",
|
||
"\n",
|
||
" send_email(\"金十数据市场快讯\", email_content, to_email)\n",
|
||
" last_item_ids = new_item_ids\n",
|
||
" else:\n",
|
||
" send_email(\"金十数据市场快讯\", \"<p>没有新的市场快讯信息。</p>\", to_email)\n",
|
||
"\n",
|
||
"# 每5分钟运行一次\n",
|
||
"schedule.every(1).minutes.do(fetch_and_notify)\n",
|
||
"\n",
|
||
"print(\"开始监控市场快讯信息...\")\n",
|
||
"while True:\n",
|
||
" schedule.run_pending()\n",
|
||
" time.sleep(1)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import requests\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = webdriver.ChromeOptions()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待网页加载并找到目标元素\n",
|
||
" target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" target_element = WebDriverWait(driver, 10).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, target_xpath))\n",
|
||
" )\n",
|
||
" # print(target_element)\n",
|
||
"\n",
|
||
" # 使用 JavaScript 修改元素的 class class=\"hot-filter_item is-active\"\n",
|
||
" driver.execute_script('arguments[0].className = \"hot-filter_item is-active\";', target_element)\n",
|
||
"\n",
|
||
" # 等待内容更新(这里等待时间可以根据需要调整)\n",
|
||
" time.sleep(5)\n",
|
||
"\n",
|
||
" # 获取页面内容\n",
|
||
" page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析 HTML\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取class为'jin-flash-item-container is-normal'的div标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:5], start=1):\n",
|
||
" # print(f\"Information {index}:\")\n",
|
||
" # print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
" time = item.find('div', class_='item-time').get_text(strip=True)\n",
|
||
" content = item.find('div', class_='flash-text').get_text(strip=True)\n",
|
||
" print(f\"消息 {index}:\")\n",
|
||
" print(f\"时间: {time}\")\n",
|
||
" print(f\"内容: {content}\")\n",
|
||
" print('-' * 40)\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = webdriver.ChromeOptions()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待网页加载并找到目标元素\n",
|
||
" target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, target_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" # 使用 JavaScript 修改元素的 class\n",
|
||
" script = '''\n",
|
||
" var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n",
|
||
" element.className = \"hot-filter_item is-active\";\n",
|
||
" '''\n",
|
||
" driver.execute_script(script, target_xpath)\n",
|
||
"\n",
|
||
" # 等待内容更新\n",
|
||
" time.sleep(5)\n",
|
||
"\n",
|
||
" # 获取页面内容\n",
|
||
" page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析 HTML\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取class为'jin-flash-item-container is-normal'的div标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:5], start=1):\n",
|
||
" print(f\"Information {index}:\")\n",
|
||
" print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = webdriver.ChromeOptions()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待网页加载并找到目标元素\n",
|
||
" target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" target_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, target_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" # 使用 JavaScript 修改元素的 class\n",
|
||
" script = '''\n",
|
||
" var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n",
|
||
" element.className = \"hot-filter_item is-active\";\n",
|
||
" '''\n",
|
||
" driver.execute_script(script, target_xpath)\n",
|
||
"\n",
|
||
" # 立即更新内容,假设更新内容的位置是已知的\n",
|
||
" update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'\n",
|
||
" update_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, update_xpath))\n",
|
||
" )\n",
|
||
" \n",
|
||
" # 触发页面更新,这里假设更新内容是通过某种操作,比如点击按钮或其他方式\n",
|
||
" # 请根据实际情况调整这里的操作\n",
|
||
" driver.execute_script('arguments[0].scrollIntoView(true);', update_element)\n",
|
||
" time.sleep(5) # 等待页面内容更新\n",
|
||
"\n",
|
||
" # 获取页面内容\n",
|
||
" page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析 HTML\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取 class 为 'jin-flash-item-container is-normal' 的 div 标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:40], start=1):\n",
|
||
" print(f\"Information {index}:\")\n",
|
||
" print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = webdriver.ChromeOptions()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待网页加载并找到目标按钮\n",
|
||
" button_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" button_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.element_to_be_clickable((By.XPATH, button_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" # 点击按钮\n",
|
||
" button_element.click()\n",
|
||
"\n",
|
||
" # 等待页面内容更新\n",
|
||
" time.sleep(30) # 根据需要调整等待时间\n",
|
||
"\n",
|
||
" # 获取页面内容\n",
|
||
" page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析 HTML\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取 class 为 'jin-flash-item-container is-normal' 的 div 标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:5], start=1):\n",
|
||
" print(f\"Information {index}:\")\n",
|
||
" print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = webdriver.ChromeOptions()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待网页加载并找到目标元素\n",
|
||
" target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" target_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, target_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" # 使用 JavaScript 修改元素的 class\n",
|
||
" script_modify_class = '''\n",
|
||
" var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n",
|
||
" element.className = \"hot-filter_item is-active\";\n",
|
||
" '''\n",
|
||
" driver.execute_script(script_modify_class, target_xpath)\n",
|
||
"\n",
|
||
" # 触发页面内容更新\n",
|
||
" update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'\n",
|
||
" update_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, update_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" # 使用 JavaScript 来调用更新方法\n",
|
||
" script_update_content = '''\n",
|
||
" var updateElement = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n",
|
||
" updateElement.classList.add(\"el-loading-parent--relative\");\n",
|
||
" // 触发实际的内容更新逻辑,这可能需要根据实际页面的JavaScript实现来进行调整\n",
|
||
" '''\n",
|
||
" driver.execute_script(script_update_content, update_xpath)\n",
|
||
"\n",
|
||
" # 等待页面内容更新\n",
|
||
" time.sleep(5) # 根据需要调整等待时间\n",
|
||
"\n",
|
||
" # 获取页面内容\n",
|
||
" page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析 HTML\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取 class 为 'jin-flash-item-container is-normal' 的 div 标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:5], start=1):\n",
|
||
" print(f\"Information {index}:\")\n",
|
||
" print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.chrome.service import Service\n",
|
||
"from selenium.webdriver.chrome.options import Options\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"# 初始化 WebDriver\n",
|
||
"options = Options()\n",
|
||
"options.add_argument('--headless') # 无头模式,不打开浏览器界面\n",
|
||
"options.add_argument('--disable-gpu')\n",
|
||
"# service = Service('path/to/chromedriver') # 替换为你的ChromeDriver路径\n",
|
||
"driver = webdriver.Chrome(options=options)# (service=service, options=options)\n",
|
||
"\n",
|
||
"try:\n",
|
||
" # 打开金十数据网站\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" # 等待页面加载\n",
|
||
" time.sleep(5)\n",
|
||
"\n",
|
||
" # 模拟点击滑块\n",
|
||
" switch_element_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[3]'\n",
|
||
" switch_element = driver.find_element(By.XPATH, switch_element_xpath)\n",
|
||
" switch_element.click()\n",
|
||
"\n",
|
||
" # 等待新内容加载\n",
|
||
" time.sleep(5) # 可以调整时间,确保新内容加载完成\n",
|
||
"\n",
|
||
" # 获取新的页面内容\n",
|
||
" new_page_source = driver.page_source\n",
|
||
"\n",
|
||
" # 使用 BeautifulSoup 解析新的 HTML\n",
|
||
" soup = BeautifulSoup(new_page_source, 'lxml')\n",
|
||
"\n",
|
||
" # 提取 class 为 'jin-flash-item-container is-important' 的 div 标签\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-important')\n",
|
||
"\n",
|
||
" # 只爬取前5条信息\n",
|
||
" for index, item in enumerate(items[:5], start=1):\n",
|
||
" print(f\"Information {index}:\")\n",
|
||
" print(item.get_text(strip=True)) # 打印提取的文本内容\n",
|
||
" print('-' * 50)\n",
|
||
"\n",
|
||
"finally:\n",
|
||
" # 关闭 WebDriver\n",
|
||
" driver.quit()\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"开始定时监控...\n",
|
||
"火:现货黄金日内走低1.00%,现报3102.30美元/盎司。\n",
|
||
"last_matched_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"current_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"火:现货黄金日内走低1.00%,现报3102.30美元/盎司。\n",
|
||
"last_matched_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"current_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"火:现货黄金日内走低1.00%,现报3102.30美元/盎司。\n",
|
||
"last_matched_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"current_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"火:现货黄金日内走低1.00%,现报3102.30美元/盎司。\n",
|
||
"last_matched_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"current_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"火:现货黄金日内走低1.00%,现报3102.30美元/盎司。\n",
|
||
"last_matched_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"current_hash 968c113779225800e7593a2e594ad0e0\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"执行出错: Message: \n",
|
||
"Stacktrace:\n",
|
||
"\tGetHandleVerifier [0x00007FF6B5614C25+3179557]\n",
|
||
"\t(No symbol) [0x00007FF6B52788A0]\n",
|
||
"\t(No symbol) [0x00007FF6B51091CA]\n",
|
||
"\t(No symbol) [0x00007FF6B515FA67]\n",
|
||
"\t(No symbol) [0x00007FF6B515FC9C]\n",
|
||
"\t(No symbol) [0x00007FF6B51B3627]\n",
|
||
"\t(No symbol) [0x00007FF6B5187C6F]\n",
|
||
"\t(No symbol) [0x00007FF6B51B02F3]\n",
|
||
"\t(No symbol) [0x00007FF6B5187A03]\n",
|
||
"\t(No symbol) [0x00007FF6B51506D0]\n",
|
||
"\t(No symbol) [0x00007FF6B5151983]\n",
|
||
"\tGetHandleVerifier [0x00007FF6B56767CD+3579853]\n",
|
||
"\tGetHandleVerifier [0x00007FF6B568D1D2+3672530]\n",
|
||
"\tGetHandleVerifier [0x00007FF6B5682153+3627347]\n",
|
||
"\tGetHandleVerifier [0x00007FF6B53E092A+868650]\n",
|
||
"\t(No symbol) [0x00007FF6B5282FFF]\n",
|
||
"\t(No symbol) [0x00007FF6B527F4A4]\n",
|
||
"\t(No symbol) [0x00007FF6B527F646]\n",
|
||
"\t(No symbol) [0x00007FF6B526EAA9]\n",
|
||
"\tBaseThreadInitThunk [0x00007FFF903C259D+29]\n",
|
||
"\tRtlUserThreadStart [0x00007FFF90F4AF38+40]\n",
|
||
"\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"邮件发送成功\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"last_matched_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"current_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"last_matched_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"current_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"last_matched_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"current_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"last_matched_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"current_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"热:纽约期银暴跌6%金十数据4月3日讯,纽约期银日内暴跌6.00%,现报32.57美元/盎司。现货白银跌4.43%,报32.40美元/盎司。\n",
|
||
"last_matched_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"current_hash c2e035a6e60f47a267833722604e1a24\n",
|
||
"\n",
|
||
"共找到 2 条匹配'默认火热沸爆'的信息\n",
|
||
"火:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"邮件发送成功\n",
|
||
"火:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash b8daf18e3735013ff4c9c47aeca33fa5\n",
|
||
"current_hash b8daf18e3735013ff4c9c47aeca33fa5\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"邮件发送成功\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n",
|
||
"热:黄金快速下破3090美元金十数据4月3日讯,现货黄金15分钟内快速下破两道关口,最低至3089.57美元/盎司,日内跌1.41%。\n",
|
||
"last_matched_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"current_hash e00714f0ae93195b10a76db627c95bbe\n",
|
||
"\n",
|
||
"共找到 1 条匹配'默认火热沸爆'的信息\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"import smtplib\n",
|
||
"from email.mime.text import MIMEText\n",
|
||
"from email.mime.multipart import MIMEMultipart\n",
|
||
"import hashlib\n",
|
||
"import schedule\n",
|
||
"\n",
|
||
"from_email = \"240884432@qq.com\"\n",
|
||
"from_password = \"osjyjmbqrzxtbjbf\"\n",
|
||
"to_email = \"240884432@qq.com\"\n",
|
||
"\n",
|
||
"# 邮件发送函数\n",
|
||
"def send_email(content):\n",
|
||
" msg = MIMEMultipart('alternative')\n",
|
||
" msg['Subject'] = '金十数据更新通知'\n",
|
||
" msg['From'] = from_email\n",
|
||
" msg['To'] = to_email\n",
|
||
" msg.attach(MIMEText(content, 'html'))\n",
|
||
"\n",
|
||
" try:\n",
|
||
" server = smtplib.SMTP_SSL('smtp.qq.com', 465)\n",
|
||
" server.login(from_email, from_password)\n",
|
||
" server.sendmail(from_email, to_email, msg.as_string())\n",
|
||
" server.quit()\n",
|
||
" print(\"邮件发送成功\")\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"邮件发送失败: {e}\")\n",
|
||
"\n",
|
||
"# 全局存储上次匹配内容\n",
|
||
"last_matched_hash = None\n",
|
||
"\n",
|
||
"# 主抓取函数\n",
|
||
"def fetch_news():\n",
|
||
" global last_matched_hash\n",
|
||
" \n",
|
||
" options = webdriver.ChromeOptions()\n",
|
||
" options.add_argument('--headless')\n",
|
||
" driver = webdriver.Chrome(options=options)\n",
|
||
"\n",
|
||
" try:\n",
|
||
" driver.get(\"https://www.jin10.com/\")\n",
|
||
"\n",
|
||
" target_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[2]/div[2]/div[2]/div[4]/span[2]/div[1]/div[3]/div/div[2]/div[3]'\n",
|
||
" target_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, target_xpath))\n",
|
||
" )\n",
|
||
"\n",
|
||
" script = '''\n",
|
||
" var element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;\n",
|
||
" element.className = \"hot-filter_item is-active\";\n",
|
||
" '''\n",
|
||
" driver.execute_script(script, target_xpath)\n",
|
||
"\n",
|
||
" update_xpath = '/html/body/div[1]/div[2]/div[2]/div/main/div[2]/div[2]/div[3]'\n",
|
||
" update_element = WebDriverWait(driver, 20).until(\n",
|
||
" EC.presence_of_element_located((By.XPATH, update_xpath))\n",
|
||
" )\n",
|
||
" driver.execute_script('arguments[0].scrollIntoView(true);', update_element)\n",
|
||
" time.sleep(5)\n",
|
||
"\n",
|
||
" page_source = driver.page_source\n",
|
||
" soup = BeautifulSoup(page_source, 'lxml')\n",
|
||
" items = soup.find_all('div', class_='jin-flash-item-container is-normal')\n",
|
||
"\n",
|
||
" matched_count = 0\n",
|
||
" modified_text = ''\n",
|
||
" \n",
|
||
" for item in items[:40]:\n",
|
||
" text_content = item.get_text(strip=True)\n",
|
||
" # print(text_content)\n",
|
||
" if '默认火热沸爆' not in text_content:\n",
|
||
" continue\n",
|
||
" \n",
|
||
" matched_count += 1\n",
|
||
" parts = text_content.split('默认火热沸爆', 1)\n",
|
||
" if len(parts) > 1 and parts[1].strip():\n",
|
||
" modified_text = parts[1].strip()[0] + \":\" + parts[1].strip()[1:]\n",
|
||
"\n",
|
||
" print(modified_text) \n",
|
||
" if modified_text:\n",
|
||
" current_hash = hashlib.md5(modified_text.encode()).hexdigest()\n",
|
||
" if last_matched_hash and current_hash != last_matched_hash:\n",
|
||
" send_email(modified_text)\n",
|
||
" print(modified_text)\n",
|
||
" last_matched_hash = current_hash\n",
|
||
" print(\"last_matched_hash\", last_matched_hash)\n",
|
||
" print(\"current_hash\", current_hash)\n",
|
||
"\n",
|
||
" print(f\"\\n共找到 {matched_count} 条匹配'默认火热沸爆'的信息\")\n",
|
||
"\n",
|
||
" except Exception as e:\n",
|
||
" print(f\"执行出错: {e}\")\n",
|
||
" finally:\n",
|
||
" driver.quit()\n",
|
||
"\n",
|
||
"# 定时任务配置\n",
|
||
"schedule.every(1).minutes.do(fetch_news)\n",
|
||
"\n",
|
||
"print(\"开始定时监控...\")\n",
|
||
"while True:\n",
|
||
" schedule.run_pending()\n",
|
||
" time.sleep(1)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.9"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|