# captures and processes network requests (selenium-wire and undetected-chromedriver) and converts them to pandas DataFrames
## Tested against Windows 10 / Python 3.10 / Anaconda
## pip install wiredseleniumdf
```python
get_driver(save_folder=None, stop_keys="ctrl+alt+e", scan_time=10, **kwargs):
"""
Initialize a Selenium WebDriver Instance (undetected-chromedriver) with Request Monitoring (selenium-wire).
This function initializes a Selenium WebDriver instance with request monitoring capabilities.
It allows you to interact with web pages while capturing and processing network requests
made during the interactions.
Parameters:
- save_folder (str, optional): The folder path where captured request data should be saved
as DataFrames in pickle format. If None, no data is saved. Defaults to None.
- stop_keys (str, optional): A key combination to stop the request monitoring process.
Defaults to "ctrl+alt+e".
- scan_time (int, optional): The interval in seconds for scanning and capturing requests.
Defaults to 10 seconds.
- **kwargs: Additional keyword arguments to configure the Selenium WebDriver instance.
Returns:
- driver (Selenium WebDriver): An initialized Selenium WebDriver instance with request
monitoring capabilities.
Usage:
1. Call this function to create a WebDriver instance.
2. The WebDriver instance can be used for web interactions and will automatically
capture network requests.
3. Optionally, provide a save folder to save captured data as pickle files.
Example:
>>> driver = get_driver(save_folder="request_data", stop_keys="ctrl+alt+e")
Note:
- This function combines Selenium WebDriver functionality with request monitoring
capabilities for advanced web testing and analysis.
- The request monitoring continues until the specified stop_keys combination is pressed
or the WebDriver session is closed.
- Use keyboard shortcuts (stop_keys) to control when to stop request monitoring.
```
```python
# Download the root certificate https://github.com/wkeeling/selenium-wire/raw/master/seleniumwire/ca.crt and install it - Trusted Root Certification Authorities
from wiredseleniumdf import get_driver
import random
import requests
import bs4
driver = get_driver(
save_folder="c:\\requestsdfs",
stop_keys="ctrl+alt+e",
scan_time=10,
)
driver.get("https://testpages.eviltester.com/styled/file-upload-test.html")
# The code prints out driver.requests_dfs, which is using a custom WebDriver functionality (selenium-wire)
# to capture and store network request data during the page load.
print(driver.requests_dfs)
# The script retrieves a specific request data frame (POST request) from the
# driver.requests_dfs dictionary using a timestamp (1693779184.1983006) as the key.
df = driver.requests_dfs[1693779184.1983006] # timestamps used as keys in dict
print(df.iloc[1].to_string())
r"""
id 3810eb8d-ce30-46f6-8cdd-3890728a66de
method POST
url https://testpages.eviltester.com/uploads/filep...
headers {'Host': 'testpages.eviltester.com', 'Connecti...
_body b'------WebKitFormBoundaryPmEb1NMyJICQA4B5\r\n...
response 200 OK
date 2023-09-03 19:12:57.084177
ws_messages []
cert {'subject': [(b'CN', b'testpages.eviltester.co...
intern_id 1
cert__subject [(b'CN', b'testpages.eviltester.com')]
cert__serial 325007634443972637219049593487986324830598
cert__key (RSA, 2048)
cert__signature_algorithm b'sha256WithRSAEncryption'
cert__expired False
cert__issuer [(b'C', b'US'), (b'O', b"Let's Encrypt"), (b'C...
cert__notbefore 2023-08-28 09:04:49
cert__notafter 2023-11-26 09:04:48
cert__organization None
cert__cn b'testpages.eviltester.com'
cert__altnames [b'testpages.eviltester.com']
headers__x_goog_api_key NaN
headers__sec_fetch_site NaN
headers__sec_fetch_mode NaN
headers__sec_fetch_dest NaN
headers__user_agent NaN
headers__accept_encoding NaN
headers__accept_language NaN
headers__Host testpages.eviltester.com
headers__Connection keep-alive
headers__Content_Length 619
headers__Cache_Control max-age=0
headers__sec_ch_ua "Chromium";v="116", "Not)A;Brand";v="24", "Goo...
headers__sec_ch_ua_mobile ?0
headers__sec_ch_ua_platform "Windows"
headers__Upgrade_Insecure_Requests 1
headers__Origin https://testpages.eviltester.com
headers__Content_Type multipart/form-data; boundary=----WebKitFormBo...
headers__User_Agent Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...
headers__Accept text/html,application/xhtml+xml,application/xm...
headers__Sec_Fetch_Site same-origin
headers__Sec_Fetch_Mode navigate
headers__Sec_Fetch_User ?1
headers__Sec_Fetch_Dest document
headers__Referer https://testpages.eviltester.com/styled/file-u...
headers__Accept_Encoding gzip, deflate, br
headers__Accept_Language en-US,en;q=0.9
"""
wholebody = df.iloc[1]._body
wholeheader = df.iloc[1].headers.copy()
with open(R"C:\newfile.txt", mode="rb") as f: # uploaded file during capturing
datauploaded = f.read()
# The code reads the contents of another file (the one we want to upload) located at "C:\testfilex.txt" and
# stores it in the newdata variable.
with open(r"C:\testfilex.txt", mode="rb") as f:
newdata = f.read()
# The script replaces the request body content again, this time replacing the
# datauploaded with newdata within wholebody. Additionally, it modifies the
# filename part of the request body to include a random number and the text "newfile.txt."
newdata2upload = wholebody.replace(datauploaded, newdata)
newdata2uploadwithnewfilename = newdata2upload.replace(
b'filename="newfile.txt"',
b'filename="' + str(random.randint(1000, 2990)).encode() + b"newfile.txt",
)
# The Content-Length header in the wholeheader dictionary is updated to reflect
# the new length of newdata2uploadwithnewfilename.
wholeheader["Content-Length"] = str(len(newdata2uploadwithnewfilename))
# Finally, a POST request is sent to "https://testpages.eviltester.com/uploads/fileprocessor"
# with the modified headers and request body (newdata2uploadwithnewfilename)
res = requests.post(
"https://testpages.eviltester.com/uploads/fileprocessor",
headers=wholeheader,
data=newdata2uploadwithnewfilename,
)
print(bs4.BeautifulSoup(res.text))
"""
<!DOCTYPE html>
<html>
<head>
<title>Uploaded Results Page</title>
<link href="/css/testpages.css" rel="stylesheet"/>
<script src="js/toc.js"></script>
<!-- HEAD -->
</head>
<body>
<div class="left-col" style="float: left"></div>
<div class="page-body">
<div class="navigation">
<div class="page-navigation">
<a href="/styled/index.html">Index</a>
</div>
<div class="app-navigation">
<!-- APPNAVIGATION CONTENT -->
</div>
</div>
<h1>Uploaded File</h1>
<div class="explanation">
<p>You uploaded a file. This is the result.
</p>
</div>
<div class="centered">
<h2>You uploaded this file:</h2>
<div>
<p id="uploadedfilename">"1901newfile.txt</p>
</div>
<div class="form-label">
<button class="styled-click-button" id="goback" onclick="window.history.back()">Upload Another</button>
</div>
</div>
<div class="page-footer">
<p><a href="https://eviltester.com" rel="noopener noreferrer" target="_blank">EvilTester.com</a>,
<a href="https://compendiumdev.co.uk" rel="noopener noreferrer" target="_blank">Compendium Developments</a></p>
</div>
</div>
<!-- BODY END -->
<div class="right-col" style="float: right">
<!-- VERTICALADUNIT -->
</div>
</body>
</html>
"""
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/wiredseleniumdf",
"name": "wiredseleniumdf",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "requests,POST,GET",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/dc/bc/e08509de1ed4eb4682728d53721141681eff39e5b02b93f2d1101fba4cdf/wiredseleniumdf-0.10.tar.gz",
"platform": null,
"description": "\r\n# captures and processes network requests (selenium-wire and undetected-chromedriver) and converts them to pandas DataFrames\r\n\r\n## Tested against Windows 10 / Python 3.10 / Anaconda\r\n\r\n## pip install wiredseleniumdf\r\n\r\n\r\n\r\n```python\r\n\r\nget_driver(save_folder=None, stop_keys=\"ctrl+alt+e\", scan_time=10, **kwargs):\r\n \"\"\"\r\n Initialize a Selenium WebDriver Instance (undetected-chromedriver) with Request Monitoring (selenium-wire).\r\n\r\n This function initializes a Selenium WebDriver instance with request monitoring capabilities.\r\n It allows you to interact with web pages while capturing and processing network requests\r\n made during the interactions.\r\n\r\n Parameters:\r\n - save_folder (str, optional): The folder path where captured request data should be saved\r\n as DataFrames in pickle format. If None, no data is saved. Defaults to None.\r\n - stop_keys (str, optional): A key combination to stop the request monitoring process.\r\n Defaults to \"ctrl+alt+e\".\r\n - scan_time (int, optional): The interval in seconds for scanning and capturing requests.\r\n Defaults to 10 seconds.\r\n - **kwargs: Additional keyword arguments to configure the Selenium WebDriver instance.\r\n\r\n Returns:\r\n - driver (Selenium WebDriver): An initialized Selenium WebDriver instance with request\r\n monitoring capabilities.\r\n\r\n Usage:\r\n 1. Call this function to create a WebDriver instance.\r\n 2. The WebDriver instance can be used for web interactions and will automatically\r\n capture network requests.\r\n 3. Optionally, provide a save folder to save captured data as pickle files.\r\n\r\n Example:\r\n >>> driver = get_driver(save_folder=\"request_data\", stop_keys=\"ctrl+alt+e\")\r\n\r\n Note:\r\n - This function combines Selenium WebDriver functionality with request monitoring\r\n capabilities for advanced web testing and analysis.\r\n - The request monitoring continues until the specified stop_keys combination is pressed\r\n or the WebDriver session is closed.\r\n - Use keyboard shortcuts (stop_keys) to control when to stop request monitoring.\r\n\t\r\n```\r\n\r\n```python\r\n\r\n# Download the root certificate https://github.com/wkeeling/selenium-wire/raw/master/seleniumwire/ca.crt and install it - Trusted Root Certification Authorities \r\n\r\nfrom wiredseleniumdf import get_driver\r\n\r\nimport random\r\nimport requests\r\nimport bs4\r\n\r\ndriver = get_driver(\r\n save_folder=\"c:\\\\requestsdfs\",\r\n stop_keys=\"ctrl+alt+e\",\r\n scan_time=10,\r\n)\r\ndriver.get(\"https://testpages.eviltester.com/styled/file-upload-test.html\")\r\n\r\n# The code prints out driver.requests_dfs, which is using a custom WebDriver functionality (selenium-wire)\r\n# to capture and store network request data during the page load.\r\n\r\nprint(driver.requests_dfs)\r\n\r\n# The script retrieves a specific request data frame (POST request) from the\r\n# driver.requests_dfs dictionary using a timestamp (1693779184.1983006) as the key.\r\n\r\ndf = driver.requests_dfs[1693779184.1983006] # timestamps used as keys in dict\r\n\r\n\r\nprint(df.iloc[1].to_string())\r\nr\"\"\"\r\nid 3810eb8d-ce30-46f6-8cdd-3890728a66de\r\nmethod POST\r\nurl https://testpages.eviltester.com/uploads/filep...\r\nheaders {'Host': 'testpages.eviltester.com', 'Connecti...\r\n_body b'------WebKitFormBoundaryPmEb1NMyJICQA4B5\\r\\n...\r\nresponse 200 OK\r\ndate 2023-09-03 19:12:57.084177\r\nws_messages []\r\ncert {'subject': [(b'CN', b'testpages.eviltester.co...\r\nintern_id 1\r\ncert__subject [(b'CN', b'testpages.eviltester.com')]\r\ncert__serial 325007634443972637219049593487986324830598\r\ncert__key (RSA, 2048)\r\ncert__signature_algorithm b'sha256WithRSAEncryption'\r\ncert__expired False\r\ncert__issuer [(b'C', b'US'), (b'O', b\"Let's Encrypt\"), (b'C...\r\ncert__notbefore 2023-08-28 09:04:49\r\ncert__notafter 2023-11-26 09:04:48\r\ncert__organization None\r\ncert__cn b'testpages.eviltester.com'\r\ncert__altnames [b'testpages.eviltester.com']\r\nheaders__x_goog_api_key NaN\r\nheaders__sec_fetch_site NaN\r\nheaders__sec_fetch_mode NaN\r\nheaders__sec_fetch_dest NaN\r\nheaders__user_agent NaN\r\nheaders__accept_encoding NaN\r\nheaders__accept_language NaN\r\nheaders__Host testpages.eviltester.com\r\nheaders__Connection keep-alive\r\nheaders__Content_Length 619\r\nheaders__Cache_Control max-age=0\r\nheaders__sec_ch_ua \"Chromium\";v=\"116\", \"Not)A;Brand\";v=\"24\", \"Goo...\r\nheaders__sec_ch_ua_mobile ?0\r\nheaders__sec_ch_ua_platform \"Windows\"\r\nheaders__Upgrade_Insecure_Requests 1\r\nheaders__Origin https://testpages.eviltester.com\r\nheaders__Content_Type multipart/form-data; boundary=----WebKitFormBo...\r\nheaders__User_Agent Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...\r\nheaders__Accept text/html,application/xhtml+xml,application/xm...\r\nheaders__Sec_Fetch_Site same-origin\r\nheaders__Sec_Fetch_Mode navigate\r\nheaders__Sec_Fetch_User ?1\r\nheaders__Sec_Fetch_Dest document\r\nheaders__Referer https://testpages.eviltester.com/styled/file-u...\r\nheaders__Accept_Encoding gzip, deflate, br\r\nheaders__Accept_Language en-US,en;q=0.9\r\n\"\"\"\r\n\r\nwholebody = df.iloc[1]._body\r\nwholeheader = df.iloc[1].headers.copy()\r\nwith open(R\"C:\\newfile.txt\", mode=\"rb\") as f: # uploaded file during capturing\r\n datauploaded = f.read()\r\n\r\n# The code reads the contents of another file (the one we want to upload) located at \"C:\\testfilex.txt\" and\r\n# stores it in the newdata variable.\r\nwith open(r\"C:\\testfilex.txt\", mode=\"rb\") as f:\r\n newdata = f.read()\r\n\r\n# The script replaces the request body content again, this time replacing the\r\n# datauploaded with newdata within wholebody. Additionally, it modifies the\r\n# filename part of the request body to include a random number and the text \"newfile.txt.\"\r\nnewdata2upload = wholebody.replace(datauploaded, newdata)\r\nnewdata2uploadwithnewfilename = newdata2upload.replace(\r\n b'filename=\"newfile.txt\"',\r\n b'filename=\"' + str(random.randint(1000, 2990)).encode() + b\"newfile.txt\",\r\n)\r\n# The Content-Length header in the wholeheader dictionary is updated to reflect\r\n# the new length of newdata2uploadwithnewfilename.\r\nwholeheader[\"Content-Length\"] = str(len(newdata2uploadwithnewfilename))\r\n\r\n# Finally, a POST request is sent to \"https://testpages.eviltester.com/uploads/fileprocessor\"\r\n# with the modified headers and request body (newdata2uploadwithnewfilename)\r\nres = requests.post(\r\n \"https://testpages.eviltester.com/uploads/fileprocessor\",\r\n headers=wholeheader,\r\n data=newdata2uploadwithnewfilename,\r\n)\r\n\r\n\r\nprint(bs4.BeautifulSoup(res.text))\r\n\r\n\"\"\"\r\n<!DOCTYPE html>\r\n<html>\r\n<head>\r\n<title>Uploaded Results Page</title>\r\n<link href=\"/css/testpages.css\" rel=\"stylesheet\"/>\r\n<script src=\"js/toc.js\"></script>\r\n<!-- HEAD -->\r\n</head>\r\n<body>\r\n<div class=\"left-col\" style=\"float: left\"></div>\r\n<div class=\"page-body\">\r\n<div class=\"navigation\">\r\n<div class=\"page-navigation\">\r\n<a href=\"/styled/index.html\">Index</a>\r\n</div>\r\n<div class=\"app-navigation\">\r\n<!-- APPNAVIGATION CONTENT -->\r\n</div>\r\n</div>\r\n<h1>Uploaded File</h1>\r\n<div class=\"explanation\">\r\n<p>You uploaded a file. This is the result.\r\n </p>\r\n</div>\r\n<div class=\"centered\">\r\n<h2>You uploaded this file:</h2>\r\n<div>\r\n<p id=\"uploadedfilename\">\"1901newfile.txt</p>\r\n</div>\r\n<div class=\"form-label\">\r\n<button class=\"styled-click-button\" id=\"goback\" onclick=\"window.history.back()\">Upload Another</button>\r\n</div>\r\n</div>\r\n<div class=\"page-footer\">\r\n<p><a href=\"https://eviltester.com\" rel=\"noopener noreferrer\" target=\"_blank\">EvilTester.com</a>,\r\n <a href=\"https://compendiumdev.co.uk\" rel=\"noopener noreferrer\" target=\"_blank\">Compendium Developments</a></p>\r\n</div>\r\n</div>\r\n<!-- BODY END -->\r\n<div class=\"right-col\" style=\"float: right\">\r\n<!-- VERTICALADUNIT -->\r\n</div>\r\n</body>\r\n</html>\r\n\"\"\"\r\n\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "captures and processes network requests (selenium-wire and undetected-chromedriver) and converts them to pandas DataFrames",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/wiredseleniumdf"
},
"split_keywords": [
"requests",
"post",
"get"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "9371e9b03df745372ae0c488796a8ba12a5fb9524659ecfec0463a193cd7ae03",
"md5": "9b895d20b13a516c0ae9599b66cc369e",
"sha256": "8aec0fc0943ff951a011158564e94d9fc8b93fc9800737d0b44663cccb043ab9"
},
"downloads": -1,
"filename": "wiredseleniumdf-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "9b895d20b13a516c0ae9599b66cc369e",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 15053,
"upload_time": "2023-09-03T23:48:33",
"upload_time_iso_8601": "2023-09-03T23:48:33.564110Z",
"url": "https://files.pythonhosted.org/packages/93/71/e9b03df745372ae0c488796a8ba12a5fb9524659ecfec0463a193cd7ae03/wiredseleniumdf-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "dcbce08509de1ed4eb4682728d53721141681eff39e5b02b93f2d1101fba4cdf",
"md5": "86afb5397c50a4fb12c8392d20d356f9",
"sha256": "fbf0e05c2015dc4908847cb7a4fb63a82eae84f2b3214be5681338039006e43c"
},
"downloads": -1,
"filename": "wiredseleniumdf-0.10.tar.gz",
"has_sig": false,
"md5_digest": "86afb5397c50a4fb12c8392d20d356f9",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 13435,
"upload_time": "2023-09-03T23:48:35",
"upload_time_iso_8601": "2023-09-03T23:48:35.547320Z",
"url": "https://files.pythonhosted.org/packages/dc/bc/e08509de1ed4eb4682728d53721141681eff39e5b02b93f2d1101fba4cdf/wiredseleniumdf-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-09-03 23:48:35",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "wiredseleniumdf",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "wiredseleniumdf"
}