更新天一
This commit is contained in:
@@ -6,8 +6,8 @@
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:45.729834Z",
|
||||
"start_time": "2025-08-21T08:51:45.724789Z"
|
||||
"end_time": "2025-08-22T00:51:15.383830Z",
|
||||
"start_time": "2025-08-22T00:51:15.162928Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
@@ -21,13 +21,13 @@
|
||||
"from selenium.webdriver.edge.options import Options"
|
||||
],
|
||||
"outputs": [],
|
||||
"execution_count": 10
|
||||
"execution_count": 1
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:45.747072Z",
|
||||
"start_time": "2025-08-21T08:51:45.743237Z"
|
||||
"end_time": "2025-08-22T00:51:16.137665Z",
|
||||
"start_time": "2025-08-22T00:51:16.016527Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@@ -37,13 +37,13 @@
|
||||
],
|
||||
"id": "f184b255d5098302",
|
||||
"outputs": [],
|
||||
"execution_count": 11
|
||||
"execution_count": 2
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:45.773737Z",
|
||||
"start_time": "2025-08-21T08:51:45.769129Z"
|
||||
"end_time": "2025-08-22T01:24:38.259284Z",
|
||||
"start_time": "2025-08-22T01:24:38.253051Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@@ -53,13 +53,26 @@
|
||||
],
|
||||
"id": "4813fcf4dea28b8d",
|
||||
"outputs": [],
|
||||
"execution_count": 12
|
||||
"execution_count": 54
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:45.797561Z",
|
||||
"start_time": "2025-08-21T08:51:45.790647Z"
|
||||
"end_time": "2025-08-22T01:24:21.532983Z",
|
||||
"start_time": "2025-08-22T01:24:21.528098Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "# conn.close()",
|
||||
"id": "8ea63e4cb82fe0c",
|
||||
"outputs": [],
|
||||
"execution_count": 53
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-22T00:51:16.185320Z",
|
||||
"start_time": "2025-08-22T00:51:16.176280Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@@ -83,13 +96,13 @@
|
||||
],
|
||||
"id": "e5632e44a52d5dc4",
|
||||
"outputs": [],
|
||||
"execution_count": 13
|
||||
"execution_count": 4
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:47.136948Z",
|
||||
"start_time": "2025-08-21T08:51:45.814223Z"
|
||||
"end_time": "2025-08-22T00:51:18.395Z",
|
||||
"start_time": "2025-08-22T00:51:16.198246Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
@@ -101,19 +114,21 @@
|
||||
],
|
||||
"id": "28b1479c3decc6b1",
|
||||
"outputs": [],
|
||||
"execution_count": 14
|
||||
"execution_count": 5
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:53.084554Z",
|
||||
"start_time": "2025-08-21T08:51:47.152588Z"
|
||||
"end_time": "2025-08-22T00:51:27.692312Z",
|
||||
"start_time": "2025-08-22T00:51:18.413131Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"driver.get(\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/4d60c96ef05c452b812654e78af7701a/1957604601548296194?from=ht2\")\n",
|
||||
"\n",
|
||||
"\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/94cfba022e2f4c7ebbeaa400576b3a9a/1958703246433423361?from=ht2\"\n",
|
||||
"\n",
|
||||
"# 等待页面渲染完成(例如等待 body 加载)\n",
|
||||
"wait = WebDriverWait(driver, 720)\n",
|
||||
"wait.until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n",
|
||||
@@ -122,148 +137,219 @@
|
||||
],
|
||||
"id": "779f88e1c3670c02",
|
||||
"outputs": [],
|
||||
"execution_count": 15
|
||||
"execution_count": 6
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:51:59.934014Z",
|
||||
"start_time": "2025-08-21T08:51:59.829632Z"
|
||||
"end_time": "2025-08-22T00:51:27.716520Z",
|
||||
"start_time": "2025-08-22T00:51:27.708786Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"#进入背题模式\n",
|
||||
"clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n",
|
||||
"clickable_element.click()\n",
|
||||
"\n"
|
||||
"def next_page():\n",
|
||||
" clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n",
|
||||
" clickable_element.click()\n",
|
||||
"\n",
|
||||
"def get_html():\n",
|
||||
" rendered_html = driver.page_source\n",
|
||||
" return rendered_html"
|
||||
],
|
||||
"id": "721f5a8a872bfdce",
|
||||
"outputs": [],
|
||||
"execution_count": 17
|
||||
"execution_count": 7
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:52:07.855382Z",
|
||||
"start_time": "2025-08-21T08:52:07.834085Z"
|
||||
"end_time": "2025-08-22T01:24:45.201186Z",
|
||||
"start_time": "2025-08-22T01:24:45.184772Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# 获取渲染后的 HTML\n",
|
||||
"rendered_html = driver.page_source"
|
||||
],
|
||||
"id": "aa728e660ee9bbe5",
|
||||
"outputs": [],
|
||||
"execution_count": 18
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T09:03:09.025560Z",
|
||||
"start_time": "2025-08-21T09:03:08.973835Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"soup = BeautifulSoup(rendered_html, 'html.parser')\n",
|
||||
"def html_parser(rendered_html):\n",
|
||||
" soup = BeautifulSoup(rendered_html, 'html.parser')\n",
|
||||
"\n",
|
||||
"title = soup.find_all('p', class_='title')\n",
|
||||
" title = soup.find_all('p', class_='title')\n",
|
||||
"\n",
|
||||
"out_options_box = soup.find_all('div', class_='options-box')\n",
|
||||
" out_options_box = soup.find_all('div', class_='options-box')\n",
|
||||
"\n",
|
||||
"analyze = soup.find_all('div', class_='analyze')"
|
||||
" analyze = soup.find_all('div', class_='analyze')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" result={\"title\":title[0].text.strip(), \"analyze\":analyze[0].text.strip()}\n",
|
||||
"\n",
|
||||
" if (len(out_options_box)!=1):\n",
|
||||
" raise out_options_box\n",
|
||||
" out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n",
|
||||
"\n",
|
||||
" if len(out_options)==4:\n",
|
||||
" # 多选\n",
|
||||
" for out_option in out_options:\n",
|
||||
"\n",
|
||||
" abcd = out_option.find(\"p\",\"label\")\n",
|
||||
"\n",
|
||||
" trueFalse = False\n",
|
||||
" if \"success-active\" in abcd.get(\"class\"):\n",
|
||||
" trueFalse = True\n",
|
||||
"\n",
|
||||
" abcd = abcd.text.strip().lower()\n",
|
||||
"\n",
|
||||
" answer = out_option.find(\"p\",\"text\").text.strip()\n",
|
||||
"\n",
|
||||
" result[abcd] = [answer, trueFalse]\n",
|
||||
" else:\n",
|
||||
" # 单选\n",
|
||||
" def get_tf():\n",
|
||||
" out_options_box = soup.find_all('div', class_='answer-box')\n",
|
||||
" for i in out_options_box:\n",
|
||||
" for ii in i.find_all('div', class_='CORRECT'):\n",
|
||||
" if ii.text == \"正确\":\n",
|
||||
" return True\n",
|
||||
" elif ii.text == \"错误\":\n",
|
||||
" return False\n",
|
||||
" print(out_options_box)\n",
|
||||
" return 0\n",
|
||||
" result[\"tf\"] = get_tf()\n",
|
||||
"\n",
|
||||
" return result\n",
|
||||
"\n"
|
||||
],
|
||||
"id": "5db0bbd564c0b53f",
|
||||
"outputs": [],
|
||||
"execution_count": 33
|
||||
"execution_count": 55
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T09:03:10.108380Z",
|
||||
"start_time": "2025-08-21T09:03:10.102301Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "print(title)",
|
||||
"id": "9ae9f13772cfed6a",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[<p class=\"title\" data-v-127dd018=\"\">根据《期货经营机构投资者适当性管理实施指引(试行)》,经营机构评估,划分所销售产品或者所提供服务的风险等级时,涉及投资组合的产品或服务的,下列表述中正确的是( )。</p>]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 34
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T09:11:05.144092Z",
|
||||
"start_time": "2025-08-21T09:11:05.136517Z"
|
||||
"end_time": "2025-08-22T01:41:12.394198Z",
|
||||
"start_time": "2025-08-22T01:41:12.386634Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"out_options_box = soup.find_all('div', class_='options-box')\n",
|
||||
"def write2db(index, result):\n",
|
||||
"\n",
|
||||
" if \"tf\" not in result:\n",
|
||||
"\n",
|
||||
"if (len(out_options_box)!=1):\n",
|
||||
" raise out_options_box\n",
|
||||
"out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n",
|
||||
" conn.execute(\n",
|
||||
" \"INSERT INTO questions (title, chapter, q_num, q_type, question, a, b, c, d, a_result, b_result, c_result, d_result, explanation) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n",
|
||||
" (\n",
|
||||
" info[0],\n",
|
||||
" info[1],\n",
|
||||
" index,\n",
|
||||
" \"多选题\",\n",
|
||||
" result.get(\"title\"),\n",
|
||||
" result.get(\"a\")[0],\n",
|
||||
" result.get(\"b\")[0],\n",
|
||||
" result.get(\"c\")[0],\n",
|
||||
" result.get(\"d\")[0],\n",
|
||||
" result.get(\"a\")[1],\n",
|
||||
" result.get(\"b\")[1],\n",
|
||||
" result.get(\"c\")[1],\n",
|
||||
" result.get(\"d\")[1],\n",
|
||||
" result.get(\"analyze\"),\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"for out_option in out_options:\n",
|
||||
" abcd = out_option.find(\"p\",\"label\")\n",
|
||||
" else:\n",
|
||||
" if result[\"tf\"] == 0:\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" trueFalse = False\n",
|
||||
" if \"success-active\" in abcd.get(\"class\"):\n",
|
||||
" trueFalse = True\n",
|
||||
" conn.execute(\n",
|
||||
" \"INSERT INTO questions (title, chapter, q_num, q_type, question, a, b, c, d, a_result, b_result, c_result, d_result, explanation) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\",\n",
|
||||
" (\n",
|
||||
" info[0],\n",
|
||||
" info[1],\n",
|
||||
" index,\n",
|
||||
" \"判断题\",\n",
|
||||
" result.get(\"title\"),\n",
|
||||
" \"\",\n",
|
||||
" \"\",\n",
|
||||
" \"\",\n",
|
||||
" \"\",\n",
|
||||
" 1 if result[\"tf\"] else 0,\n",
|
||||
" 0 if result[\"tf\"] else 1,\n",
|
||||
" \"\",\n",
|
||||
" \"\",\n",
|
||||
" result.get(\"analyze\"),\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" abcd = abcd.text.strip()\n",
|
||||
"\n",
|
||||
" answer = out_option.find(\"p\",\"text\").text.strip()\n",
|
||||
"\n",
|
||||
" print(abcd, answer, trueFalse)"
|
||||
" conn.commit()"
|
||||
],
|
||||
"id": "11d9051ab089122d",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A 可以按照产品或服务对应的任何一个风险等级进行评估 False\n",
|
||||
"B 应当按照产品或服务最低风险等级进行评估 False\n",
|
||||
"C 应当按照产品或服务最高风险等级进行评估 False\n",
|
||||
"D 应当按照产品或服务整体风险等级进行评估 True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 39
|
||||
"id": "853f278c1123cae1",
|
||||
"outputs": [],
|
||||
"execution_count": 69
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-21T08:55:04.344999Z",
|
||||
"start_time": "2025-08-21T08:55:04.339191Z"
|
||||
"end_time": "2025-08-22T01:46:12.973092Z",
|
||||
"start_time": "2025-08-22T01:46:12.968961Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "print(analyze)",
|
||||
"id": "b7f43a482ce3c619",
|
||||
"source": "info = [\"天一\",0,130]",
|
||||
"id": "71ef002122c67647",
|
||||
"outputs": [],
|
||||
"execution_count": 81
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-22T01:50:44.170181Z",
|
||||
"start_time": "2025-08-22T01:50:27.773842Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"for i in range(info[2]):\n",
|
||||
" p = get_html()\n",
|
||||
" result = html_parser(p)\n",
|
||||
" write2db(i, result)\n",
|
||||
" next_page()\n",
|
||||
"\n"
|
||||
],
|
||||
"id": "11d9051ab089122d",
|
||||
"outputs": [],
|
||||
"execution_count": 89
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-22T01:10:27.088143Z",
|
||||
"start_time": "2025-08-22T01:10:27.076521Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "",
|
||||
"id": "ad769b774bac8989",
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[<div class=\"analyze\" data-v-24612254=\"\">涉及投资组合的产品或服务,应当按照产品或服务整体风险等级进行评估。 <br/><br/> </div>]\n"
|
||||
"<div class=\"answer CORRECT\" data-v-ee229d58=\"\">正确</div>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 22
|
||||
"execution_count": 40
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-08-22T01:06:54.877198Z",
|
||||
"start_time": "2025-08-22T01:06:54.867849Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": "",
|
||||
"id": "e1474fd283674850",
|
||||
"outputs": [],
|
||||
"execution_count": 35
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
@@ -271,7 +357,7 @@
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "",
|
||||
"id": "ad769b774bac8989"
|
||||
"id": "54ad268f864e1f6c"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
BIN
data.db.zip
BIN
data.db.zip
Binary file not shown.
Reference in New Issue
Block a user