更新天一

This commit is contained in:
2025-08-22 10:04:00 +08:00
parent d8a03970f6
commit 78aa8e1b75
2 changed files with 192 additions and 106 deletions

View File

@@ -6,8 +6,8 @@
"metadata": { "metadata": {
"collapsed": true, "collapsed": true,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:45.729834Z", "end_time": "2025-08-22T00:51:15.383830Z",
"start_time": "2025-08-21T08:51:45.724789Z" "start_time": "2025-08-22T00:51:15.162928Z"
} }
}, },
"source": [ "source": [
@@ -21,13 +21,13 @@
"from selenium.webdriver.edge.options import Options" "from selenium.webdriver.edge.options import Options"
], ],
"outputs": [], "outputs": [],
"execution_count": 10 "execution_count": 1
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:45.747072Z", "end_time": "2025-08-22T00:51:16.137665Z",
"start_time": "2025-08-21T08:51:45.743237Z" "start_time": "2025-08-22T00:51:16.016527Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -37,13 +37,13 @@
], ],
"id": "f184b255d5098302", "id": "f184b255d5098302",
"outputs": [], "outputs": [],
"execution_count": 11 "execution_count": 2
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:45.773737Z", "end_time": "2025-08-22T01:24:38.259284Z",
"start_time": "2025-08-21T08:51:45.769129Z" "start_time": "2025-08-22T01:24:38.253051Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -53,13 +53,26 @@
], ],
"id": "4813fcf4dea28b8d", "id": "4813fcf4dea28b8d",
"outputs": [], "outputs": [],
"execution_count": 12 "execution_count": 54
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:45.797561Z", "end_time": "2025-08-22T01:24:21.532983Z",
"start_time": "2025-08-21T08:51:45.790647Z" "start_time": "2025-08-22T01:24:21.528098Z"
}
},
"cell_type": "code",
"source": "# conn.close()",
"id": "8ea63e4cb82fe0c",
"outputs": [],
"execution_count": 53
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-22T00:51:16.185320Z",
"start_time": "2025-08-22T00:51:16.176280Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -83,13 +96,13 @@
], ],
"id": "e5632e44a52d5dc4", "id": "e5632e44a52d5dc4",
"outputs": [], "outputs": [],
"execution_count": 13 "execution_count": 4
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:47.136948Z", "end_time": "2025-08-22T00:51:18.395Z",
"start_time": "2025-08-21T08:51:45.814223Z" "start_time": "2025-08-22T00:51:16.198246Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
@@ -101,19 +114,21 @@
], ],
"id": "28b1479c3decc6b1", "id": "28b1479c3decc6b1",
"outputs": [], "outputs": [],
"execution_count": 14 "execution_count": 5
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:53.084554Z", "end_time": "2025-08-22T00:51:27.692312Z",
"start_time": "2025-08-21T08:51:47.152588Z" "start_time": "2025-08-22T00:51:18.413131Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
"source": [ "source": [
"driver.get(\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/4d60c96ef05c452b812654e78af7701a/1957604601548296194?from=ht2\")\n", "driver.get(\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/4d60c96ef05c452b812654e78af7701a/1957604601548296194?from=ht2\")\n",
"\n", "\n",
"\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/94cfba022e2f4c7ebbeaa400576b3a9a/1958703246433423361?from=ht2\"\n",
"\n",
"# 等待页面渲染完成(例如等待 body 加载)\n", "# 等待页面渲染完成(例如等待 body 加载)\n",
"wait = WebDriverWait(driver, 720)\n", "wait = WebDriverWait(driver, 720)\n",
"wait.until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n", "wait.until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
@@ -122,148 +137,219 @@
], ],
"id": "779f88e1c3670c02", "id": "779f88e1c3670c02",
"outputs": [], "outputs": [],
"execution_count": 15 "execution_count": 6
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:51:59.934014Z", "end_time": "2025-08-22T00:51:27.716520Z",
"start_time": "2025-08-21T08:51:59.829632Z" "start_time": "2025-08-22T00:51:27.708786Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
"source": [ "source": [
"#进入背题模式\n", "def next_page():\n",
" clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n", " clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n",
" clickable_element.click()\n", " clickable_element.click()\n",
"\n" "\n",
"def get_html():\n",
" rendered_html = driver.page_source\n",
" return rendered_html"
], ],
"id": "721f5a8a872bfdce", "id": "721f5a8a872bfdce",
"outputs": [], "outputs": [],
"execution_count": 17 "execution_count": 7
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:52:07.855382Z", "end_time": "2025-08-22T01:24:45.201186Z",
"start_time": "2025-08-21T08:52:07.834085Z" "start_time": "2025-08-22T01:24:45.184772Z"
}
},
"cell_type": "code",
"source": [
"# 获取渲染后的 HTML\n",
"rendered_html = driver.page_source"
],
"id": "aa728e660ee9bbe5",
"outputs": [],
"execution_count": 18
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-21T09:03:09.025560Z",
"start_time": "2025-08-21T09:03:08.973835Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
"source": [ "source": [
"def html_parser(rendered_html):\n",
" soup = BeautifulSoup(rendered_html, 'html.parser')\n", " soup = BeautifulSoup(rendered_html, 'html.parser')\n",
"\n", "\n",
" title = soup.find_all('p', class_='title')\n", " title = soup.find_all('p', class_='title')\n",
"\n", "\n",
" out_options_box = soup.find_all('div', class_='options-box')\n", " out_options_box = soup.find_all('div', class_='options-box')\n",
"\n", "\n",
"analyze = soup.find_all('div', class_='analyze')" " analyze = soup.find_all('div', class_='analyze')\n",
],
"id": "5db0bbd564c0b53f",
"outputs": [],
"execution_count": 33
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-21T09:03:10.108380Z",
"start_time": "2025-08-21T09:03:10.102301Z"
}
},
"cell_type": "code",
"source": "print(title)",
"id": "9ae9f13772cfed6a",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[<p class=\"title\" data-v-127dd018=\"\">根据《期货经营机构投资者适当性管理实施指引(试行)》,经营机构评估,划分所销售产品或者所提供服务的风险等级时,涉及投资组合的产品或服务的,下列表述中正确的是()。</p>]\n"
]
}
],
"execution_count": 34
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-21T09:11:05.144092Z",
"start_time": "2025-08-21T09:11:05.136517Z"
}
},
"cell_type": "code",
"source": [
"out_options_box = soup.find_all('div', class_='options-box')\n",
"\n", "\n",
"\n", "\n",
" result={\"title\":title[0].text.strip(), \"analyze\":analyze[0].text.strip()}\n",
"\n",
" if (len(out_options_box)!=1):\n", " if (len(out_options_box)!=1):\n",
" raise out_options_box\n", " raise out_options_box\n",
" out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n", " out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n",
"\n", "\n",
" if len(out_options)==4:\n",
" # 多选\n",
" for out_option in out_options:\n", " for out_option in out_options:\n",
"\n",
" abcd = out_option.find(\"p\",\"label\")\n", " abcd = out_option.find(\"p\",\"label\")\n",
"\n", "\n",
" trueFalse = False\n", " trueFalse = False\n",
" if \"success-active\" in abcd.get(\"class\"):\n", " if \"success-active\" in abcd.get(\"class\"):\n",
" trueFalse = True\n", " trueFalse = True\n",
"\n", "\n",
" abcd = abcd.text.strip()\n", " abcd = abcd.text.strip().lower()\n",
"\n", "\n",
" answer = out_option.find(\"p\",\"text\").text.strip()\n", " answer = out_option.find(\"p\",\"text\").text.strip()\n",
"\n", "\n",
" print(abcd, answer, trueFalse)" " result[abcd] = [answer, trueFalse]\n",
" else:\n",
" # 单选\n",
" def get_tf():\n",
" out_options_box = soup.find_all('div', class_='answer-box')\n",
" for i in out_options_box:\n",
" for ii in i.find_all('div', class_='CORRECT'):\n",
" if ii.text == \"正确\":\n",
" return True\n",
" elif ii.text == \"错误\":\n",
" return False\n",
" print(out_options_box)\n",
" return 0\n",
" result[\"tf\"] = get_tf()\n",
"\n",
" return result\n",
"\n"
], ],
"id": "11d9051ab089122d", "id": "5db0bbd564c0b53f",
"outputs": [ "outputs": [],
{ "execution_count": 55
"name": "stdout",
"output_type": "stream",
"text": [
"A 可以按照产品或服务对应的任何一个风险等级进行评估 False\n",
"B 应当按照产品或服务最低风险等级进行评估 False\n",
"C 应当按照产品或服务最高风险等级进行评估 False\n",
"D 应当按照产品或服务整体风险等级进行评估 True\n"
]
}
],
"execution_count": 39
}, },
{ {
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-08-21T08:55:04.344999Z", "end_time": "2025-08-22T01:41:12.394198Z",
"start_time": "2025-08-21T08:55:04.339191Z" "start_time": "2025-08-22T01:41:12.386634Z"
} }
}, },
"cell_type": "code", "cell_type": "code",
"source": "print(analyze)", "source": [
"id": "b7f43a482ce3c619", "def write2db(index, result):\n",
"\n",
" if \"tf\" not in result:\n",
"\n",
" conn.execute(\n",
" \"INSERT INTO questions (title, chapter, q_num, q_type, question, a, b, c, d, a_result, b_result, c_result, d_result, explanation) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n",
" (\n",
" info[0],\n",
" info[1],\n",
" index,\n",
" \"多选题\",\n",
" result.get(\"title\"),\n",
" result.get(\"a\")[0],\n",
" result.get(\"b\")[0],\n",
" result.get(\"c\")[0],\n",
" result.get(\"d\")[0],\n",
" result.get(\"a\")[1],\n",
" result.get(\"b\")[1],\n",
" result.get(\"c\")[1],\n",
" result.get(\"d\")[1],\n",
" result.get(\"analyze\"),\n",
" )\n",
" )\n",
"\n",
" else:\n",
" if result[\"tf\"] == 0:\n",
" return\n",
"\n",
" conn.execute(\n",
" \"INSERT INTO questions (title, chapter, q_num, q_type, question, a, b, c, d, a_result, b_result, c_result, d_result, explanation) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n",
" (\n",
" info[0],\n",
" info[1],\n",
" index,\n",
" \"判断题\",\n",
" result.get(\"title\"),\n",
" \"\",\n",
" \"\",\n",
" \"\",\n",
" \"\",\n",
" 1 if result[\"tf\"] else 0,\n",
" 0 if result[\"tf\"] else 1,\n",
" \"\",\n",
" \"\",\n",
" result.get(\"analyze\"),\n",
" )\n",
" )\n",
"\n",
" conn.commit()"
],
"id": "853f278c1123cae1",
"outputs": [],
"execution_count": 69
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-22T01:46:12.973092Z",
"start_time": "2025-08-22T01:46:12.968961Z"
}
},
"cell_type": "code",
"source": "info = [\"天一\",0,130]",
"id": "71ef002122c67647",
"outputs": [],
"execution_count": 81
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-22T01:50:44.170181Z",
"start_time": "2025-08-22T01:50:27.773842Z"
}
},
"cell_type": "code",
"source": [
"for i in range(info[2]):\n",
" p = get_html()\n",
" result = html_parser(p)\n",
" write2db(i, result)\n",
" next_page()\n",
"\n"
],
"id": "11d9051ab089122d",
"outputs": [],
"execution_count": 89
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-22T01:10:27.088143Z",
"start_time": "2025-08-22T01:10:27.076521Z"
}
},
"cell_type": "code",
"source": "",
"id": "ad769b774bac8989",
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"[<div class=\"analyze\" data-v-24612254=\"\">涉及投资组合的产品或服务,应当按照产品或服务整体风险等级进行评估。 <br/><br/> </div>]\n" "<div class=\"answer CORRECT\" data-v-ee229d58=\"\">正确</div>\n"
] ]
} }
], ],
"execution_count": 22 "execution_count": 40
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-22T01:06:54.877198Z",
"start_time": "2025-08-22T01:06:54.867849Z"
}
},
"cell_type": "code",
"source": "",
"id": "e1474fd283674850",
"outputs": [],
"execution_count": 35
}, },
{ {
"metadata": {}, "metadata": {},
@@ -271,7 +357,7 @@
"outputs": [], "outputs": [],
"execution_count": null, "execution_count": null,
"source": "", "source": "",
"id": "ad769b774bac8989" "id": "54ad268f864e1f6c"
} }
], ],
"metadata": { "metadata": {

Binary file not shown.