{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-08-21T08:51:45.729834Z", "start_time": "2025-08-21T08:51:45.724789Z" } }, "source": [ "import time\n", "\n", "from selenium import webdriver\n", "from selenium.webdriver.edge.service import Service\n", "from selenium.webdriver.common.by import By\n", "from selenium.webdriver.support.ui import WebDriverWait\n", "from selenium.webdriver.support import expected_conditions as EC\n", "from selenium.webdriver.edge.options import Options" ], "outputs": [], "execution_count": 10 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:45.747072Z", "start_time": "2025-08-21T08:51:45.743237Z" } }, "cell_type": "code", "source": [ "from bs4 import BeautifulSoup\n", "import sqlite3" ], "id": "f184b255d5098302", "outputs": [], "execution_count": 11 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:45.773737Z", "start_time": "2025-08-21T08:51:45.769129Z" } }, "cell_type": "code", "source": [ "db_path = '../data.db'\n", "conn = sqlite3.connect(db_path)" ], "id": "4813fcf4dea28b8d", "outputs": [], "execution_count": 12 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:45.797561Z", "start_time": "2025-08-21T08:51:45.790647Z" } }, "cell_type": "code", "source": [ "edge_options = Options()\n", "#edge_options.add_argument(\"--headless\") # 可选:无界面模式\n", "edge_options.add_argument(\"--disable-gpu\")\n", "edge_options.add_argument(\"--no-sandbox\")\n", "edge_options.add_argument(\"--disable-extensions\")\n", "edge_options.add_argument(\"--disable-plugins\")\n", "edge_options.add_argument(\"--disable-popup-blocking\")\n", "edge_options.add_argument(\"--disable-infobars\")\n", "edge_options.add_argument(\"--disable-notifications\")\n", "edge_options.add_argument(\"--no-first-run\")\n", "edge_options.add_argument(\"--no-default-browser-check\")\n", "\n", "user_data_dir = r\"D:\\code\\edge\"\n", "edge_options.add_argument(f\"--user-data-dir={user_data_dir}\")\n", "# 指定配置文件(可选,默认是 Default)\n", "edge_options.add_argument(\"--profile-directory=Default\")" ], "id": "e5632e44a52d5dc4", "outputs": [], "execution_count": 13 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:47.136948Z", "start_time": "2025-08-21T08:51:45.814223Z" } }, "cell_type": "code", "source": [ "# 指定 EdgeDriver 路径(可选,若已配置环境变量可省略)\n", "service = Service(executable_path=r\"D:\\app\\edgeDriver\\msedgedriver.exe\")\n", "# 创建 Edge 浏览器实例\n", "driver = webdriver.Edge(service=service, options=edge_options)" ], "id": "28b1479c3decc6b1", "outputs": [], "execution_count": 14 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:53.084554Z", "start_time": "2025-08-21T08:51:47.152588Z" } }, "cell_type": "code", "source": [ "driver.get(\"https://www.tianyiwangxiao.com/new/question-bank/learn-center-analyze/4d60c96ef05c452b812654e78af7701a/1957604601548296194?from=ht2\")\n", "\n", "# 等待页面渲染完成(例如等待 body 加载)\n", "wait = WebDriverWait(driver, 720)\n", "wait.until(EC.presence_of_element_located((By.TAG_NAME, \"body\")))\n", "time.sleep(3)\n", "\n" ], "id": "779f88e1c3670c02", "outputs": [], "execution_count": 15 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:51:59.934014Z", "start_time": "2025-08-21T08:51:59.829632Z" } }, "cell_type": "code", "source": [ "#进入背题模式\n", "clickable_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, \".el-icon-right.next\")))\n", "clickable_element.click()\n", "\n" ], "id": "721f5a8a872bfdce", "outputs": [], "execution_count": 17 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:52:07.855382Z", "start_time": "2025-08-21T08:52:07.834085Z" } }, "cell_type": "code", "source": [ "# 获取渲染后的 HTML\n", "rendered_html = driver.page_source" ], "id": "aa728e660ee9bbe5", "outputs": [], "execution_count": 18 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T09:03:09.025560Z", "start_time": "2025-08-21T09:03:08.973835Z" } }, "cell_type": "code", "source": [ "soup = BeautifulSoup(rendered_html, 'html.parser')\n", "\n", "title = soup.find_all('p', class_='title')\n", "\n", "out_options_box = soup.find_all('div', class_='options-box')\n", "\n", "analyze = soup.find_all('div', class_='analyze')" ], "id": "5db0bbd564c0b53f", "outputs": [], "execution_count": 33 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T09:03:10.108380Z", "start_time": "2025-08-21T09:03:10.102301Z" } }, "cell_type": "code", "source": "print(title)", "id": "9ae9f13772cfed6a", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[

根据《期货经营机构投资者适当性管理实施指引(试行)》,经营机构评估,划分所销售产品或者所提供服务的风险等级时,涉及投资组合的产品或服务的,下列表述中正确的是(  )。

]\n" ] } ], "execution_count": 34 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T09:11:05.144092Z", "start_time": "2025-08-21T09:11:05.136517Z" } }, "cell_type": "code", "source": [ "out_options_box = soup.find_all('div', class_='options-box')\n", "\n", "\n", "if (len(out_options_box)!=1):\n", " raise out_options_box\n", "out_options = out_options_box[0].find_all(\"div\",class_=\"options-item\")\n", "\n", "for out_option in out_options:\n", " abcd = out_option.find(\"p\",\"label\")\n", "\n", " trueFalse = False\n", " if \"success-active\" in abcd.get(\"class\"):\n", " trueFalse = True\n", "\n", " abcd = abcd.text.strip()\n", "\n", " answer = out_option.find(\"p\",\"text\").text.strip()\n", "\n", " print(abcd, answer, trueFalse)" ], "id": "11d9051ab089122d", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A 可以按照产品或服务对应的任何一个风险等级进行评估 False\n", "B 应当按照产品或服务最低风险等级进行评估 False\n", "C 应当按照产品或服务最高风险等级进行评估 False\n", "D 应当按照产品或服务整体风险等级进行评估 True\n" ] } ], "execution_count": 39 }, { "metadata": { "ExecuteTime": { "end_time": "2025-08-21T08:55:04.344999Z", "start_time": "2025-08-21T08:55:04.339191Z" } }, "cell_type": "code", "source": "print(analyze)", "id": "b7f43a482ce3c619", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[
涉及投资组合的产品或服务,应当按照产品或服务整体风险等级进行评估。 

 
]\n" ] } ], "execution_count": 22 }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": "", "id": "ad769b774bac8989" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }