299 lines
8.0 KiB
Plaintext
299 lines
8.0 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:45.729834Z",
|
||
"start_time": "2025-08-21T08:51:45.724789Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import time\n",
|
||
"\n",
|
||
"from selenium import webdriver\n",
|
||
"from selenium.webdriver.edge.service import Service\n",
|
||
"from selenium.webdriver.common.by import By\n",
|
||
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
||
"from selenium.webdriver.support import expected_conditions as EC\n",
|
||
"from selenium.webdriver.edge.options import Options"
|
||
],
|
||
"outputs": [],
|
||
"execution_count": 10
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:45.747072Z",
|
||
"start_time": "2025-08-21T08:51:45.743237Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import sqlite3"
|
||
],
|
||
"id": "f184b255d5098302",
|
||
"outputs": [],
|
||
"execution_count": 11
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:45.773737Z",
|
||
"start_time": "2025-08-21T08:51:45.769129Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"db_path = '../data.db'\n",
|
||
"conn = sqlite3.connect(db_path)"
|
||
],
|
||
"id": "4813fcf4dea28b8d",
|
||
"outputs": [],
|
||
"execution_count": 12
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:45.797561Z",
|
||
"start_time": "2025-08-21T08:51:45.790647Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"edge_options = Options()\n",
|
||
"#edge_options.add_argument(\"--headless\") # 可选:无界面模式\n",
|
||
"edge_options.add_argument(\"--disable-gpu\")\n",
|
||
"edge_options.add_argument(\"--no-sandbox\")\n",
|
||
"edge_options.add_argument(\"--disable-extensions\")\n",
|
||
"edge_options.add_argument(\"--disable-plugins\")\n",
|
||
"edge_options.add_argument(\"--disable-popup-blocking\")\n",
|
||
"edge_options.add_argument(\"--disable-infobars\")\n",
|
||
"edge_options.add_argument(\"--disable-notifications\")\n",
|
||
"edge_options.add_argument(\"--no-first-run\")\n",
|
||
"edge_options.add_argument(\"--no-default-browser-check\")\n",
|
||
"\n",
|
||
"user_data_dir = r\"D:\\code\\edge\"\n",
|
||
"edge_options.add_argument(f\"--user-data-dir={user_data_dir}\")\n",
|
||
"# 指定配置文件(可选,默认是 Default)\n",
|
||
"edge_options.add_argument(\"--profile-directory=Default\")"
|
||
],
|
||
"id": "e5632e44a52d5dc4",
|
||
"outputs": [],
|
||
"execution_count": 13
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:47.136948Z",
|
||
"start_time": "2025-08-21T08:51:45.814223Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 指定 EdgeDriver 路径(可选,若已配置环境变量可省略)\n",
|
||
"service = Service(executable_path=r\"D:\\app\\edgeDriver\\msedgedriver.exe\")\n",
|
||
"# 创建 Edge 浏览器实例\n",
|
||
"driver = webdriver.Edge(service=service, options=edge_options)"
|
||
],
|
||
"id": "28b1479c3decc6b1",
|
||
"outputs": [],
|
||
"execution_count": 14
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:53.084554Z",
|
||
"start_time": "2025-08-21T08:51:47.152588Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"driver.get(\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/4d60c96ef05c452b812654e78af7701a/1957604601548296194?from=ht2\")\n",
|
||
"\n",
|
||
"# 等待页面渲染完成(例如等待 body 加载)\n",
|
||
"wait = WebDriverWait(driver, 720)\n",
|
||
"wait.until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
|
||
"time.sleep(3)\n",
|
||
"\n"
|
||
],
|
||
"id": "779f88e1c3670c02",
|
||
"outputs": [],
|
||
"execution_count": 15
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:51:59.934014Z",
|
||
"start_time": "2025-08-21T08:51:59.829632Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"#进入背题模式\n",
|
||
"clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n",
|
||
"clickable_element.click()\n",
|
||
"\n"
|
||
],
|
||
"id": "721f5a8a872bfdce",
|
||
"outputs": [],
|
||
"execution_count": 17
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:52:07.855382Z",
|
||
"start_time": "2025-08-21T08:52:07.834085Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 获取渲染后的 HTML\n",
|
||
"rendered_html = driver.page_source"
|
||
],
|
||
"id": "aa728e660ee9bbe5",
|
||
"outputs": [],
|
||
"execution_count": 18
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T09:03:09.025560Z",
|
||
"start_time": "2025-08-21T09:03:08.973835Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"soup = BeautifulSoup(rendered_html, 'html.parser')\n",
|
||
"\n",
|
||
"title = soup.find_all('p', class_='title')\n",
|
||
"\n",
|
||
"out_options_box = soup.find_all('div', class_='options-box')\n",
|
||
"\n",
|
||
"analyze = soup.find_all('div', class_='analyze')"
|
||
],
|
||
"id": "5db0bbd564c0b53f",
|
||
"outputs": [],
|
||
"execution_count": 33
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T09:03:10.108380Z",
|
||
"start_time": "2025-08-21T09:03:10.102301Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "print(title)",
|
||
"id": "9ae9f13772cfed6a",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[<p class=\"title\" data-v-127dd018=\"\">根据《期货经营机构投资者适当性管理实施指引(试行)》,经营机构评估,划分所销售产品或者所提供服务的风险等级时,涉及投资组合的产品或服务的,下列表述中正确的是( )。</p>]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 34
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T09:11:05.144092Z",
|
||
"start_time": "2025-08-21T09:11:05.136517Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"out_options_box = soup.find_all('div', class_='options-box')\n",
|
||
"\n",
|
||
"\n",
|
||
"if (len(out_options_box)!=1):\n",
|
||
" raise out_options_box\n",
|
||
"out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n",
|
||
"\n",
|
||
"for out_option in out_options:\n",
|
||
" abcd = out_option.find(\"p\",\"label\")\n",
|
||
"\n",
|
||
" trueFalse = False\n",
|
||
" if \"success-active\" in abcd.get(\"class\"):\n",
|
||
" trueFalse = True\n",
|
||
"\n",
|
||
" abcd = abcd.text.strip()\n",
|
||
"\n",
|
||
" answer = out_option.find(\"p\",\"text\").text.strip()\n",
|
||
"\n",
|
||
" print(abcd, answer, trueFalse)"
|
||
],
|
||
"id": "11d9051ab089122d",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"A 可以按照产品或服务对应的任何一个风险等级进行评估 False\n",
|
||
"B 应当按照产品或服务最低风险等级进行评估 False\n",
|
||
"C 应当按照产品或服务最高风险等级进行评估 False\n",
|
||
"D 应当按照产品或服务整体风险等级进行评估 True\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 39
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-08-21T08:55:04.344999Z",
|
||
"start_time": "2025-08-21T08:55:04.339191Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "print(analyze)",
|
||
"id": "b7f43a482ce3c619",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"[<div class=\"analyze\" data-v-24612254=\"\">涉及投资组合的产品或服务,应当按照产品或服务整体风险等级进行评估。 <br/><br/> </div>]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 22
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "code",
|
||
"outputs": [],
|
||
"execution_count": null,
|
||
"source": "",
|
||
"id": "ad769b774bac8989"
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|