# Scrapy DrissionPage 集成工具
## 📌 项目简介
Scrapy DrissionPage 是一个将 DrissionPage 与 Scrapy 框架无缝集成的扩展工具,让您可以在 Scrapy 爬虫中使用 DrissionPage 的全部功能。本扩展工具支持浏览器自动化和数据包收发两种模式,并可以自由切换,大幅提高爬虫开发效率与稳定性。
### 主要特性:
- 🌐 **模式自由切换**:支持在浏览器模式(chromium)和会话模式(session)间动态切换
- 🚀 **性能优化**:提供数据读取加速功能,支持静态解析,提高数据提取速度
- 📦 **数据包监听**:可监听网络请求,轻松获取AJAX加载的数据
- 📥 **文件下载**:集成下载功能,支持自定义保存路径和文件名
- 🔍 **简洁语法**:兼容DrissionPage的简洁元素定位语法,大大减少代码量
## 📥 安装方法
### 依赖项
- Python 3.9+
- Scrapy 2.12.0+
- DrissionPage 4.1.0.18+
### 安装命令
```bash
# 安装DrissionPage
pip install DrissionPage>=4.1.0.18
# 安装扩展工具
pip install scrapy-drissionpage
```
## ⚙️ 基本配置
在 Scrapy 项目的 `settings.py` 中添加以下配置:
```python
# 启用中间件
DOWNLOADER_MIDDLEWARES = {
'scrapy_drissionpage.middleware.DrissionPageMiddleware': 543,
}
# DrissionPage配置
DRISSIONPAGE_HEADLESS = True # 是否无头模式
DRISSIONPAGE_LOAD_MODE = 'normal' # 页面加载模式:normal, eager, none
DRISSIONPAGE_DOWNLOAD_PATH = 'downloads' # 下载路径
DRISSIONPAGE_TIMEOUT = 30 # 请求超时时间
DRISSIONPAGE_RETRY_TIMES = 3 # 重试次数
DRISSIONPAGE_RETRY_INTERVAL = 2 # 重试间隔(秒)
# 浏览器设置
DRISSIONPAGE_BROWSER_PATH = None # 浏览器路径,None使用默认浏览器
DRISSIONPAGE_INCOGNITO = True # 是否使用无痕模式
DRISSIONPAGE_CHROME_OPTIONS = ['--disable-gpu'] # Chrome启动选项
```
## 🧰 使用方法
### 1. 创建爬虫
继承 `DrissionSpider` 类创建爬虫:
```python
from scrapy_drissionpage.spider import DrissionSpider
class MySpider(DrissionSpider):
name = 'myspider'
def start_requests(self):
# 创建浏览器模式请求
yield self.drission_request(
'https://example.com',
page_type='chromium', # 使用浏览器模式
callback=self.parse
)
# 创建会话模式请求
yield self.drission_request(
'https://example.com/api',
page_type='session', # 使用会话模式
callback=self.parse_api
)
def parse(self, response):
# 使用DrissionPage的语法查找元素
title = response.ele('tag:h1').text
yield {'title': title}
```
### 2. 模式切换
您可以在不同的请求间动态切换模式,DrissionPage 4.1.0.18版本已支持更便捷的模式切换:
```python
def parse_login(self, response):
# 先使用浏览器模式登录
response.page.ele('#username').input('user123')
response.page.ele('#password').input('password123')
response.page.ele('#login-btn').click()
# 获取当前会话的session对象并使用它发送请求
session_page = self.get_utils().ModeSwitcher.to_session(response.page)
# 或者直接使用DrissionRequest发送会话模式请求
yield self.drission_request(
'https://example.com/api/data',
page_type='session', # 切换到会话模式
callback=self.parse_data
)
```
### 3. 数据提取加速
使用 `s_ele` 和 `s_eles` 方法进行静态解析,提高数据提取速度:
```python
def parse(self, response):
# 常规方式
# links = response.eles('t:a') # 速度较慢
# 加速方式
links = response.s_eles('t:a') # 速度提升约10倍
for link in links:
yield {
'text': link.text,
'url': link.attr('href')
}
```
### 4. 数据包监听
监听和拦截页面上的网络请求:
```python
def parse_with_monitor(self, response):
# 开始监听API请求
response.page.listen.xhr(callback=self.handle_xhr)
# 点击按钮触发AJAX请求
response.page.ele('#load-more').click()
# 等待一段时间让请求完成
response.wait(3)
# 获取完数据后停止加载(适用于none加载模式)
response.page.stop_loading()
def handle_xhr(self, flow):
# 处理XHR请求
if 'api/data' in flow.request.url:
data = flow.response.json
# 处理数据...
```
### 5. 文件下载
使用内置的下载功能:
```python
def parse_download(self, response):
# 设置下载配置
response.page.download.set_path('files')
# 点击下载按钮并指定文件名
response.page.ele('#download-btn').click()
# 等待下载完成
mission = response.page.wait.download_begin()
mission.wait()
yield {'file_path': mission.path}
```
## 📚 高级功能
### 1. 多标签页操作
```python
def parse_multi_tabs(self, response):
# 创建新标签页
tab2 = response.page.new_tab('https://example.com/page2')
# 从第一个标签页获取数据
title1 = response.page.title
# 从第二个标签页获取数据
title2 = tab2.title
# 切换回原标签页
response.page.activate_tab()
yield {
'title1': title1,
'title2': title2
}
```
### 2. iframe 操作
```python
def parse_iframe(self, response):
# 获取iframe对象
iframe = response.page.get_frame('#my-iframe')
# 在iframe中查找元素
data = iframe.ele('#data').text
yield {'iframe_data': data}
```
### 3. 执行JavaScript
```python
def parse_with_js(self, response):
# 执行JavaScript代码
result = response.page.run_js('return document.title')
# 修改页面元素
response.page.run_js('document.getElementById("demo").innerHTML = "Hello JavaScript"')
yield {'js_result': result}
```
## 🌰 完整示例
### 例1:爬取GiteeExplore页面项目列表
```python
import scrapy
from scrapy_drissionpage.spider import DrissionSpider
class GiteeSpider(DrissionSpider):
name = 'gitee_spider'
def start_requests(self):
yield self.drission_request(
'https://gitee.com/explore',
page_type='session', # 使用会话模式即可,不需要JavaScript
callback=self.parse
)
def parse(self, response):
# 使用静态解析加速
ul_ele = response.s_ele('tag:ul@text():全部推荐项目')
projects = ul_ele.s_eles('tag:a')
for project in projects:
# 只处理有href属性的链接
if project.attr('href') and '/explore/' not in project.attr('href'):
yield {
'name': project.text,
'url': response.urljoin(project.attr('href'))
}
```
### 例2:处理需要登录的网站
```python
import scrapy
from scrapy_drissionpage.spider import DrissionSpider
class LoginSpider(DrissionSpider):
name = 'login_spider'
def start_requests(self):
yield self.drission_request(
'https://example.com/login',
page_type='chromium', # 使用浏览器模式处理登录
callback=self.login
)
def login(self, response):
# 填写登录表单
response.page.ele('#username').input('your_username')
response.page.ele('#password').input('your_password')
response.page.ele('#login-btn').click()
# 等待登录成功,页面跳转
response.page.wait.url_change()
# 登录成功后访问用户中心
yield self.drission_request(
'https://example.com/user/dashboard',
page_type='session', # 登录后切换到会话模式提高效率
callback=self.parse_dashboard
)
def parse_dashboard(self, response):
# 提取用户信息
username = response.ele('.user-name').text
points = response.ele('.user-points').text
yield {
'username': username,
'points': points
}
```
## 📋 版本说明
### v1.0.2 (最新版)
- 🚀 支持DrissionPage 4.1.0.18版本
- 🛠 更新了API使用方式,适配最新的DrissionPage接口
- 🔄 提供向后兼容性支持,确保与旧版本兼容
- 🐛 修复了cookies设置问题
### v1.0.1
- 🐛 修复了包发布配置问题
- 🔧 优化了依赖管理
### v1.0.0
- 🎉 首次发布
- 🔌 提供了Scrapy与DrissionPage的基本集成功能
## 📄 许可证
本项目采用 MIT 许可证,允许个人和商业使用。
- **个人使用**:任何个人可以自由使用、修改和分发本软件。
- **商业用途**:允许将本软件用于商业产品和服务,无需支付额外费用。
查看完整的 [LICENSE](LICENSE) 文件获取更多信息。
---
通过以上配置和示例,您可以开始使用Scrapy DrissionPage扩展工具进行高效的网页抓取。该工具结合了Scrapy的分布式抓取能力和DrissionPage的强大自动化功能,为您提供了一个强大而灵活的网络抓取解决方案。
Raw data
{
"_id": null,
"home_page": "https://github.com/kingking888/scrapy-drissionpage",
"name": "scrapy-drissionpage",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.9",
"maintainer_email": null,
"keywords": "scrapy, drissionpage, crawler, spider, web scraping, automation, commercial-use, personal-use",
"author": "KingKing888",
"author_email": "KingKing888 <184108270@qq.com>",
"download_url": "https://files.pythonhosted.org/packages/a6/01/a5b212eba445bbf2ea9c50c6dc5d24513feaac63d345a591d4c7e05f6d3d/scrapy_drissionpage-1.0.3.tar.gz",
"platform": null,
"description": "# Scrapy DrissionPage \u96c6\u6210\u5de5\u5177\n\n## \ud83d\udccc \u9879\u76ee\u7b80\u4ecb\n\nScrapy DrissionPage \u662f\u4e00\u4e2a\u5c06 DrissionPage \u4e0e Scrapy \u6846\u67b6\u65e0\u7f1d\u96c6\u6210\u7684\u6269\u5c55\u5de5\u5177\uff0c\u8ba9\u60a8\u53ef\u4ee5\u5728 Scrapy \u722c\u866b\u4e2d\u4f7f\u7528 DrissionPage \u7684\u5168\u90e8\u529f\u80fd\u3002\u672c\u6269\u5c55\u5de5\u5177\u652f\u6301\u6d4f\u89c8\u5668\u81ea\u52a8\u5316\u548c\u6570\u636e\u5305\u6536\u53d1\u4e24\u79cd\u6a21\u5f0f\uff0c\u5e76\u53ef\u4ee5\u81ea\u7531\u5207\u6362\uff0c\u5927\u5e45\u63d0\u9ad8\u722c\u866b\u5f00\u53d1\u6548\u7387\u4e0e\u7a33\u5b9a\u6027\u3002\n\n### \u4e3b\u8981\u7279\u6027\uff1a\n\n- \ud83c\udf10 **\u6a21\u5f0f\u81ea\u7531\u5207\u6362**\uff1a\u652f\u6301\u5728\u6d4f\u89c8\u5668\u6a21\u5f0f\uff08chromium\uff09\u548c\u4f1a\u8bdd\u6a21\u5f0f\uff08session\uff09\u95f4\u52a8\u6001\u5207\u6362\n- \ud83d\ude80 **\u6027\u80fd\u4f18\u5316**\uff1a\u63d0\u4f9b\u6570\u636e\u8bfb\u53d6\u52a0\u901f\u529f\u80fd\uff0c\u652f\u6301\u9759\u6001\u89e3\u6790\uff0c\u63d0\u9ad8\u6570\u636e\u63d0\u53d6\u901f\u5ea6\n- \ud83d\udce6 **\u6570\u636e\u5305\u76d1\u542c**\uff1a\u53ef\u76d1\u542c\u7f51\u7edc\u8bf7\u6c42\uff0c\u8f7b\u677e\u83b7\u53d6AJAX\u52a0\u8f7d\u7684\u6570\u636e\n- \ud83d\udce5 **\u6587\u4ef6\u4e0b\u8f7d**\uff1a\u96c6\u6210\u4e0b\u8f7d\u529f\u80fd\uff0c\u652f\u6301\u81ea\u5b9a\u4e49\u4fdd\u5b58\u8def\u5f84\u548c\u6587\u4ef6\u540d\n- \ud83d\udd0d **\u7b80\u6d01\u8bed\u6cd5**\uff1a\u517c\u5bb9DrissionPage\u7684\u7b80\u6d01\u5143\u7d20\u5b9a\u4f4d\u8bed\u6cd5\uff0c\u5927\u5927\u51cf\u5c11\u4ee3\u7801\u91cf\n\n## \ud83d\udce5 \u5b89\u88c5\u65b9\u6cd5\n\n### \u4f9d\u8d56\u9879\n\n- Python 3.9+\n- Scrapy 2.12.0+\n- DrissionPage 4.1.0.18+\n\n### \u5b89\u88c5\u547d\u4ee4\n\n```bash\n# \u5b89\u88c5DrissionPage\npip install DrissionPage>=4.1.0.18\n\n# \u5b89\u88c5\u6269\u5c55\u5de5\u5177\npip install scrapy-drissionpage\n```\n\n## \u2699\ufe0f \u57fa\u672c\u914d\u7f6e\n\n\u5728 Scrapy \u9879\u76ee\u7684 `settings.py` \u4e2d\u6dfb\u52a0\u4ee5\u4e0b\u914d\u7f6e\uff1a\n\n```python\n# \u542f\u7528\u4e2d\u95f4\u4ef6\nDOWNLOADER_MIDDLEWARES = {\n 'scrapy_drissionpage.middleware.DrissionPageMiddleware': 543,\n}\n\n# DrissionPage\u914d\u7f6e\nDRISSIONPAGE_HEADLESS = True # \u662f\u5426\u65e0\u5934\u6a21\u5f0f\nDRISSIONPAGE_LOAD_MODE = 'normal' # \u9875\u9762\u52a0\u8f7d\u6a21\u5f0f\uff1anormal, eager, none\nDRISSIONPAGE_DOWNLOAD_PATH = 'downloads' # \u4e0b\u8f7d\u8def\u5f84\nDRISSIONPAGE_TIMEOUT = 30 # \u8bf7\u6c42\u8d85\u65f6\u65f6\u95f4\nDRISSIONPAGE_RETRY_TIMES = 3 # \u91cd\u8bd5\u6b21\u6570\nDRISSIONPAGE_RETRY_INTERVAL = 2 # \u91cd\u8bd5\u95f4\u9694\uff08\u79d2\uff09\n\n# \u6d4f\u89c8\u5668\u8bbe\u7f6e\nDRISSIONPAGE_BROWSER_PATH = None # \u6d4f\u89c8\u5668\u8def\u5f84\uff0cNone\u4f7f\u7528\u9ed8\u8ba4\u6d4f\u89c8\u5668\nDRISSIONPAGE_INCOGNITO = True # \u662f\u5426\u4f7f\u7528\u65e0\u75d5\u6a21\u5f0f\nDRISSIONPAGE_CHROME_OPTIONS = ['--disable-gpu'] # Chrome\u542f\u52a8\u9009\u9879\n```\n\n## \ud83e\uddf0 \u4f7f\u7528\u65b9\u6cd5\n\n### 1. \u521b\u5efa\u722c\u866b\n\n\u7ee7\u627f `DrissionSpider` \u7c7b\u521b\u5efa\u722c\u866b\uff1a\n\n```python\nfrom scrapy_drissionpage.spider import DrissionSpider\n\nclass MySpider(DrissionSpider):\n name = 'myspider'\n \n def start_requests(self):\n # \u521b\u5efa\u6d4f\u89c8\u5668\u6a21\u5f0f\u8bf7\u6c42\n yield self.drission_request(\n 'https://example.com',\n page_type='chromium', # \u4f7f\u7528\u6d4f\u89c8\u5668\u6a21\u5f0f\n callback=self.parse\n )\n \n # \u521b\u5efa\u4f1a\u8bdd\u6a21\u5f0f\u8bf7\u6c42\n yield self.drission_request(\n 'https://example.com/api',\n page_type='session', # \u4f7f\u7528\u4f1a\u8bdd\u6a21\u5f0f\n callback=self.parse_api\n )\n \n def parse(self, response):\n # \u4f7f\u7528DrissionPage\u7684\u8bed\u6cd5\u67e5\u627e\u5143\u7d20\n title = response.ele('tag:h1').text\n yield {'title': title}\n```\n\n### 2. \u6a21\u5f0f\u5207\u6362\n\n\u60a8\u53ef\u4ee5\u5728\u4e0d\u540c\u7684\u8bf7\u6c42\u95f4\u52a8\u6001\u5207\u6362\u6a21\u5f0f\uff0cDrissionPage 4.1.0.18\u7248\u672c\u5df2\u652f\u6301\u66f4\u4fbf\u6377\u7684\u6a21\u5f0f\u5207\u6362\uff1a\n\n```python\ndef parse_login(self, response):\n # \u5148\u4f7f\u7528\u6d4f\u89c8\u5668\u6a21\u5f0f\u767b\u5f55\n response.page.ele('#username').input('user123')\n response.page.ele('#password').input('password123')\n response.page.ele('#login-btn').click()\n \n # \u83b7\u53d6\u5f53\u524d\u4f1a\u8bdd\u7684session\u5bf9\u8c61\u5e76\u4f7f\u7528\u5b83\u53d1\u9001\u8bf7\u6c42\n session_page = self.get_utils().ModeSwitcher.to_session(response.page)\n \n # \u6216\u8005\u76f4\u63a5\u4f7f\u7528DrissionRequest\u53d1\u9001\u4f1a\u8bdd\u6a21\u5f0f\u8bf7\u6c42\n yield self.drission_request(\n 'https://example.com/api/data',\n page_type='session', # \u5207\u6362\u5230\u4f1a\u8bdd\u6a21\u5f0f\n callback=self.parse_data\n )\n```\n\n### 3. \u6570\u636e\u63d0\u53d6\u52a0\u901f\n\n\u4f7f\u7528 `s_ele` \u548c `s_eles` \u65b9\u6cd5\u8fdb\u884c\u9759\u6001\u89e3\u6790\uff0c\u63d0\u9ad8\u6570\u636e\u63d0\u53d6\u901f\u5ea6\uff1a\n\n```python\ndef parse(self, response):\n # \u5e38\u89c4\u65b9\u5f0f\n # links = response.eles('t:a') # \u901f\u5ea6\u8f83\u6162\n \n # \u52a0\u901f\u65b9\u5f0f\n links = response.s_eles('t:a') # \u901f\u5ea6\u63d0\u5347\u7ea610\u500d\n \n for link in links:\n yield {\n 'text': link.text,\n 'url': link.attr('href')\n }\n```\n\n### 4. \u6570\u636e\u5305\u76d1\u542c\n\n\u76d1\u542c\u548c\u62e6\u622a\u9875\u9762\u4e0a\u7684\u7f51\u7edc\u8bf7\u6c42\uff1a\n\n```python\ndef parse_with_monitor(self, response):\n # \u5f00\u59cb\u76d1\u542cAPI\u8bf7\u6c42\n response.page.listen.xhr(callback=self.handle_xhr)\n \n # \u70b9\u51fb\u6309\u94ae\u89e6\u53d1AJAX\u8bf7\u6c42\n response.page.ele('#load-more').click()\n \n # \u7b49\u5f85\u4e00\u6bb5\u65f6\u95f4\u8ba9\u8bf7\u6c42\u5b8c\u6210\n response.wait(3)\n \n # \u83b7\u53d6\u5b8c\u6570\u636e\u540e\u505c\u6b62\u52a0\u8f7d\uff08\u9002\u7528\u4e8enone\u52a0\u8f7d\u6a21\u5f0f\uff09\n response.page.stop_loading()\n\ndef handle_xhr(self, flow):\n # \u5904\u7406XHR\u8bf7\u6c42\n if 'api/data' in flow.request.url:\n data = flow.response.json\n # \u5904\u7406\u6570\u636e...\n```\n\n### 5. \u6587\u4ef6\u4e0b\u8f7d\n\n\u4f7f\u7528\u5185\u7f6e\u7684\u4e0b\u8f7d\u529f\u80fd\uff1a\n\n```python\ndef parse_download(self, response):\n # \u8bbe\u7f6e\u4e0b\u8f7d\u914d\u7f6e\n response.page.download.set_path('files')\n \n # \u70b9\u51fb\u4e0b\u8f7d\u6309\u94ae\u5e76\u6307\u5b9a\u6587\u4ef6\u540d\n response.page.ele('#download-btn').click()\n \n # \u7b49\u5f85\u4e0b\u8f7d\u5b8c\u6210\n mission = response.page.wait.download_begin()\n mission.wait()\n \n yield {'file_path': mission.path}\n```\n\n## \ud83d\udcda \u9ad8\u7ea7\u529f\u80fd\n\n### 1. \u591a\u6807\u7b7e\u9875\u64cd\u4f5c\n\n```python\ndef parse_multi_tabs(self, response):\n # \u521b\u5efa\u65b0\u6807\u7b7e\u9875\n tab2 = response.page.new_tab('https://example.com/page2')\n \n # \u4ece\u7b2c\u4e00\u4e2a\u6807\u7b7e\u9875\u83b7\u53d6\u6570\u636e\n title1 = response.page.title\n \n # \u4ece\u7b2c\u4e8c\u4e2a\u6807\u7b7e\u9875\u83b7\u53d6\u6570\u636e\n title2 = tab2.title\n \n # \u5207\u6362\u56de\u539f\u6807\u7b7e\u9875\n response.page.activate_tab()\n \n yield {\n 'title1': title1,\n 'title2': title2\n }\n```\n\n### 2. iframe \u64cd\u4f5c\n\n```python\ndef parse_iframe(self, response):\n # \u83b7\u53d6iframe\u5bf9\u8c61\n iframe = response.page.get_frame('#my-iframe')\n \n # \u5728iframe\u4e2d\u67e5\u627e\u5143\u7d20\n data = iframe.ele('#data').text\n \n yield {'iframe_data': data}\n```\n\n### 3. \u6267\u884cJavaScript\n\n```python\ndef parse_with_js(self, response):\n # \u6267\u884cJavaScript\u4ee3\u7801\n result = response.page.run_js('return document.title')\n \n # \u4fee\u6539\u9875\u9762\u5143\u7d20\n response.page.run_js('document.getElementById(\"demo\").innerHTML = \"Hello JavaScript\"')\n \n yield {'js_result': result}\n```\n\n## \ud83c\udf30 \u5b8c\u6574\u793a\u4f8b\n\n### \u4f8b1\uff1a\u722c\u53d6GiteeExplore\u9875\u9762\u9879\u76ee\u5217\u8868\n\n```python\nimport scrapy\nfrom scrapy_drissionpage.spider import DrissionSpider\n\nclass GiteeSpider(DrissionSpider):\n name = 'gitee_spider'\n \n def start_requests(self):\n yield self.drission_request(\n 'https://gitee.com/explore',\n page_type='session', # \u4f7f\u7528\u4f1a\u8bdd\u6a21\u5f0f\u5373\u53ef\uff0c\u4e0d\u9700\u8981JavaScript\n callback=self.parse\n )\n \n def parse(self, response):\n # \u4f7f\u7528\u9759\u6001\u89e3\u6790\u52a0\u901f\n ul_ele = response.s_ele('tag:ul@text():\u5168\u90e8\u63a8\u8350\u9879\u76ee')\n projects = ul_ele.s_eles('tag:a')\n \n for project in projects:\n # \u53ea\u5904\u7406\u6709href\u5c5e\u6027\u7684\u94fe\u63a5\n if project.attr('href') and '/explore/' not in project.attr('href'):\n yield {\n 'name': project.text,\n 'url': response.urljoin(project.attr('href'))\n }\n```\n\n### \u4f8b2\uff1a\u5904\u7406\u9700\u8981\u767b\u5f55\u7684\u7f51\u7ad9\n\n```python\nimport scrapy\nfrom scrapy_drissionpage.spider import DrissionSpider\n\nclass LoginSpider(DrissionSpider):\n name = 'login_spider'\n \n def start_requests(self):\n yield self.drission_request(\n 'https://example.com/login',\n page_type='chromium', # \u4f7f\u7528\u6d4f\u89c8\u5668\u6a21\u5f0f\u5904\u7406\u767b\u5f55\n callback=self.login\n )\n \n def login(self, response):\n # \u586b\u5199\u767b\u5f55\u8868\u5355\n response.page.ele('#username').input('your_username')\n response.page.ele('#password').input('your_password')\n response.page.ele('#login-btn').click()\n \n # \u7b49\u5f85\u767b\u5f55\u6210\u529f\uff0c\u9875\u9762\u8df3\u8f6c\n response.page.wait.url_change()\n \n # \u767b\u5f55\u6210\u529f\u540e\u8bbf\u95ee\u7528\u6237\u4e2d\u5fc3\n yield self.drission_request(\n 'https://example.com/user/dashboard',\n page_type='session', # \u767b\u5f55\u540e\u5207\u6362\u5230\u4f1a\u8bdd\u6a21\u5f0f\u63d0\u9ad8\u6548\u7387\n callback=self.parse_dashboard\n )\n \n def parse_dashboard(self, response):\n # \u63d0\u53d6\u7528\u6237\u4fe1\u606f\n username = response.ele('.user-name').text\n points = response.ele('.user-points').text\n \n yield {\n 'username': username,\n 'points': points\n }\n```\n\n## \ud83d\udccb \u7248\u672c\u8bf4\u660e\n\n### v1.0.2 (\u6700\u65b0\u7248)\n- \ud83d\ude80 \u652f\u6301DrissionPage 4.1.0.18\u7248\u672c\n- \ud83d\udee0 \u66f4\u65b0\u4e86API\u4f7f\u7528\u65b9\u5f0f\uff0c\u9002\u914d\u6700\u65b0\u7684DrissionPage\u63a5\u53e3\n- \ud83d\udd04 \u63d0\u4f9b\u5411\u540e\u517c\u5bb9\u6027\u652f\u6301\uff0c\u786e\u4fdd\u4e0e\u65e7\u7248\u672c\u517c\u5bb9\n- \ud83d\udc1b \u4fee\u590d\u4e86cookies\u8bbe\u7f6e\u95ee\u9898\n\n### v1.0.1\n- \ud83d\udc1b \u4fee\u590d\u4e86\u5305\u53d1\u5e03\u914d\u7f6e\u95ee\u9898\n- \ud83d\udd27 \u4f18\u5316\u4e86\u4f9d\u8d56\u7ba1\u7406\n\n### v1.0.0\n- \ud83c\udf89 \u9996\u6b21\u53d1\u5e03\n- \ud83d\udd0c \u63d0\u4f9b\u4e86Scrapy\u4e0eDrissionPage\u7684\u57fa\u672c\u96c6\u6210\u529f\u80fd\n\n## \ud83d\udcc4 \u8bb8\u53ef\u8bc1\n\n\u672c\u9879\u76ee\u91c7\u7528 MIT \u8bb8\u53ef\u8bc1\uff0c\u5141\u8bb8\u4e2a\u4eba\u548c\u5546\u4e1a\u4f7f\u7528\u3002\n\n- **\u4e2a\u4eba\u4f7f\u7528**\uff1a\u4efb\u4f55\u4e2a\u4eba\u53ef\u4ee5\u81ea\u7531\u4f7f\u7528\u3001\u4fee\u6539\u548c\u5206\u53d1\u672c\u8f6f\u4ef6\u3002\n- **\u5546\u4e1a\u7528\u9014**\uff1a\u5141\u8bb8\u5c06\u672c\u8f6f\u4ef6\u7528\u4e8e\u5546\u4e1a\u4ea7\u54c1\u548c\u670d\u52a1\uff0c\u65e0\u9700\u652f\u4ed8\u989d\u5916\u8d39\u7528\u3002\n\n\u67e5\u770b\u5b8c\u6574\u7684 [LICENSE](LICENSE) \u6587\u4ef6\u83b7\u53d6\u66f4\u591a\u4fe1\u606f\u3002\n\n---\n\n\u901a\u8fc7\u4ee5\u4e0a\u914d\u7f6e\u548c\u793a\u4f8b\uff0c\u60a8\u53ef\u4ee5\u5f00\u59cb\u4f7f\u7528Scrapy DrissionPage\u6269\u5c55\u5de5\u5177\u8fdb\u884c\u9ad8\u6548\u7684\u7f51\u9875\u6293\u53d6\u3002\u8be5\u5de5\u5177\u7ed3\u5408\u4e86Scrapy\u7684\u5206\u5e03\u5f0f\u6293\u53d6\u80fd\u529b\u548cDrissionPage\u7684\u5f3a\u5927\u81ea\u52a8\u5316\u529f\u80fd\uff0c\u4e3a\u60a8\u63d0\u4f9b\u4e86\u4e00\u4e2a\u5f3a\u5927\u800c\u7075\u6d3b\u7684\u7f51\u7edc\u6293\u53d6\u89e3\u51b3\u65b9\u6848\u3002\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "\u5c06Scrapy\u722c\u866b\u6846\u67b6\u4e0eDrissionPage\u7f51\u9875\u81ea\u52a8\u5316\u5de5\u5177\u8fdb\u884c\u65e0\u7f1d\u96c6\u6210",
"version": "1.0.3",
"project_urls": {
"Bug Tracker": "https://github.com/xyuns-cn/scrapy-drissionpage/issues",
"Homepage": "https://github.com/xyuns-cn/scrapy-drissionpage"
},
"split_keywords": [
"scrapy",
" drissionpage",
" crawler",
" spider",
" web scraping",
" automation",
" commercial-use",
" personal-use"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "496af1b90487dfa86c2d5ce17c7ac175819fab59a934cc4d75c8856123b98737",
"md5": "ebc2ff668e7a0da8e43d726ce956733e",
"sha256": "cd91e866204c1e22a19efb2588c8fdd6e35c972e103996e0242075af3d6f9703"
},
"downloads": -1,
"filename": "scrapy_drissionpage-1.0.3-py3-none-any.whl",
"has_sig": false,
"md5_digest": "ebc2ff668e7a0da8e43d726ce956733e",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.9",
"size": 21081,
"upload_time": "2025-04-09T03:07:41",
"upload_time_iso_8601": "2025-04-09T03:07:41.055659Z",
"url": "https://files.pythonhosted.org/packages/49/6a/f1b90487dfa86c2d5ce17c7ac175819fab59a934cc4d75c8856123b98737/scrapy_drissionpage-1.0.3-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "a601a5b212eba445bbf2ea9c50c6dc5d24513feaac63d345a591d4c7e05f6d3d",
"md5": "eab4cb54cae5cd7f4ea68138b957e982",
"sha256": "dd99937b1b93e51ccc3c1c0252aed33a47566bb5704fc72a0b274a8468763cce"
},
"downloads": -1,
"filename": "scrapy_drissionpage-1.0.3.tar.gz",
"has_sig": false,
"md5_digest": "eab4cb54cae5cd7f4ea68138b957e982",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.9",
"size": 23530,
"upload_time": "2025-04-09T03:07:42",
"upload_time_iso_8601": "2025-04-09T03:07:42.516760Z",
"url": "https://files.pythonhosted.org/packages/a6/01/a5b212eba445bbf2ea9c50c6dc5d24513feaac63d345a591d4c7e05f6d3d/scrapy_drissionpage-1.0.3.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-04-09 03:07:42",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "kingking888",
"github_project": "scrapy-drissionpage",
"github_not_found": true,
"lcname": "scrapy-drissionpage"
}