textBotPlus

Name	textBotPlus JSON
Version	0.1.4 JSON
	download
home_page	None
Summary	A Python package for text manipulation
upload_time	2025-01-17 11:58:01
maintainer	None
docs_url	None
author	Ramesh Chandra
requires_python	>=3.6
license	None
keywords
VCS
bugtrack_url
requirements	No requirements were recorded.
Travis-CI	No Travis.
coveralls test coverage	No coveralls.

            # textBotPlus

`textBotPlus` is a Python package designed to simplify text processing tasks, such as extracting, manipulating, and saving text data from various sources. It includes utility functions for working with text, handling files (e.g., reading/writing CSV), interacting with HTML elements via BeautifulSoup, and performing operations like string standardization, element extraction with CSS selectors, and more. 

Key features include:
- String manipulation and sanitization.
- CSV file handling with optional appending or header control.
- HTML parsing using BeautifulSoup to extract text or attributes via CSS selectors.
- Flexible error handling for missing or invalid inputs. 

The package is geared towards automating text-related tasks and is useful for data extraction, web scraping, and text file management.

## Installation

You can install this package via pip:

```bash
pip install textBotPlus

# ================================================================

from textBotPlus import create_directory
# Creates a directory if it doesn't already exist.


# Example 1: Creating a new directory
create_directory("new_folder")

# Example 2: Creating nested directories
create_directory("parent_folder/sub_folder")

# ================================================================

from textBotPlus import standardized_string
# This function standardizes the input string by removing escape sequences like \n, \t, and \r, removing HTML tags, collapsing multiple spaces, and trimming leading/trailing spaces.


# Example 1: Standardize a string with newlines, tabs, and HTML tags
input_string_1 = "<html><body>  Hello \nWorld!  \tThis is a test.  </body></html>"
print("Standardized String 1:", standardized_string(input_string_1))

# Example 2: Input string with multiple spaces and line breaks
input_string_2 = "  This   is   a  \n\n   string   with  spaces and \t tabs.  "
print("Standardized String 2:", standardized_string(input_string_2))

# Example 3: Pass an empty string
input_string_3 = ""
print("Standardized String 3:", standardized_string(input_string_3))

# Example 4: Pass None (invalid input)
input_string_4 = None
print("Standardized String 4:", standardized_string(input_string_4))

================================================================

from textBotPlus import remove_common_elements
# The function removes elements from the remove_in collection that are present in the remove_by collection.


# Example 1: Removing common elements between two lists
remove_in = [1, 2, 3, 4, 5]
remove_by = [3, 4, 6]
result = remove_common_elements(remove_in, remove_by)
print(result)  # Output: [1, 2, 5]

# Example 2: Removing common elements between a set and a tuple
remove_in = {1, 2, 3, 4, 5}
remove_by = (3, 4, 6)
result = remove_common_elements(remove_in, remove_by)
print(result)  # Output: {1, 2, 5}

# Example 3: Missing one argument (should print an error message)
result = remove_common_elements(remove_in, None)  # Output: "Value not passed for: remove_by"

# Example 4: Missing both arguments (should print an error message)
result = remove_common_elements(None, None)  # Output: "Value not passed for: remove_in, remove_by"

================================================================

from textBotPlus import save_to_csv
# The save_to_csv function saves data to a CSV file, appending to an existing file or creating a new one, and includes optional column headers and a customizable delimiter.

# Example data to be saved
list_data = [[1, 'Alice', 23], [2, 'Bob', 30], [3, 'Charlie', 25]]
column_header_list = ['ID', 'Name', 'Age']
output_file_path = 'output_data.csv'

# Save with the default separator (comma)
save_to_csv(list_data, column_header_list, output_file_path)

# Save with a tab separator
save_to_csv(list_data, column_header_list, output_file_path, sep="\t")

# Save with a semicolon separator
save_to_csv(list_data, column_header_list, output_file_path, sep=";")

# Sample Output

# If sep="," (default), the CSV file will look like this:

ID,Name,Age
1,Alice,23
2,Bob,30
3,Charlie,25

# If sep="\t" (tab), the CSV file will look like this: 

ID	Name	Age
1	Alice	23
2	Bob	30
3	Charlie	25

================================================================

from textBotPlus import read_csv
# The read_csv function reads a CSV file into a Pandas DataFrame, with optional filtering and column value extraction based on specified criteria.

# Example 1:  Default Separator (Comma)

# Read from a CSV file with the default separator (comma)
csv_file_path = 'data.csv'
get_value_by_col_name = 'URL'
filter_col_name = 'Category'
inculde_filter_col_values = ['Tech']

result = read_csv(csv_file_path, get_value_by_col_name, filter_col_name, inculde_filter_col_values)
print(result)

Sample Output:

Category,URL
Tech,https://tech1.com
Tech,https://tech2.com
Science,https://science1.com


# Example 2: Default Separator (tab

# Read from a CSV file with a custom separator (tab)
result = read_csv(csv_file_path, get_value_by_col_name, filter_col_name, inculde_filter_col_values, sep="\t")
print(result)

Sample Output:

Category	URL
Tech	https://tech1.com
Tech	https://tech2.com
Science	https://science1.com

================================================================

from textBotPlus import get_json_text
# The function returns the standardized value (using standardized_string()) if itâ€™s a basic data type like str, int, or float. Otherwise, it returns the raw JSON object or an empty string if nothing is found.

json_data = {"user": {"name": "John", "age": 30}}
keys = ["user", "name"]
result = get_json_text(json_data, keys)
print(result)  # Output: "John"

================================================================

from bs4 import BeautifulSoup
from textBotPlus import get_selector_text
# The get_selector_text function extracts text or attribute values from a BeautifulSoup-parsed HTML document using CSS selectors, with options for specific element matching, attribute retrieval, or full text extraction.


# Example 1: Extract Text from a Single Element

html = '<html><body><p id="paragraph">Hello, world!</p></body></html>'
soup = BeautifulSoup(html, 'html.parser')

result = get_selector_text(soup, css_selector="#paragraph")
print(result)  # Output: "Hello, world!"


# Example 2: Extract Attribute Value from an Element

html = '''<html><body><a href="https://example.com" class="link">Example</a></body></html>'''
soup = BeautifulSoup(html, 'html.parser')

href = get_selector_text(soup, css_selector_text='.link', attr='href')
print(href)  # Output: "https://example.com"


# Example 3: Get All Matching Elements

html = '''<html><body><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'''
soup = BeautifulSoup(html, 'html.parser')

paragraphs = get_selector_text(soup, css_selectors='p')
for p in paragraphs:
    print(p.text)
# Output:
# Paragraph 1
# Paragraph 2

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "textBotPlus",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.6",
    "maintainer_email": null,
    "keywords": null,
    "author": "Ramesh Chandra",
    "author_email": "rameshsofter@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/ce/4f/ded870cfde9d7ac3c9290cfba6a047ad013530dd1d59345121989364f5cf/textBotPlus-0.1.4.tar.gz",
    "platform": null,
    "description": "# textBotPlus\r\n\r\n`textBotPlus` is a Python package designed to simplify text processing tasks, such as extracting, manipulating, and saving text data from various sources. It includes utility functions for working with text, handling files (e.g., reading/writing CSV), interacting with HTML elements via BeautifulSoup, and performing operations like string standardization, element extraction with CSS selectors, and more. \r\n\r\nKey features include:\r\n- String manipulation and sanitization.\r\n- CSV file handling with optional appending or header control.\r\n- HTML parsing using BeautifulSoup to extract text or attributes via CSS selectors.\r\n- Flexible error handling for missing or invalid inputs. \r\n\r\nThe package is geared towards automating text-related tasks and is useful for data extraction, web scraping, and text file management.\r\n\r\n## Installation\r\n\r\nYou can install this package via pip:\r\n\r\n```bash\r\npip install textBotPlus\r\n\r\n# ================================================================\r\n\r\nfrom textBotPlus import create_directory\r\n# Creates a directory if it doesn't already exist.\r\n\r\n\r\n# Example 1: Creating a new directory\r\ncreate_directory(\"new_folder\")\r\n\r\n# Example 2: Creating nested directories\r\ncreate_directory(\"parent_folder/sub_folder\")\r\n\r\n# ================================================================\r\n\r\nfrom textBotPlus import standardized_string\r\n# This function standardizes the input string by removing escape sequences like \\n, \\t, and \\r, removing HTML tags, collapsing multiple spaces, and trimming leading/trailing spaces.\r\n\r\n\r\n# Example 1: Standardize a string with newlines, tabs, and HTML tags\r\ninput_string_1 = \"<html><body>  Hello \\nWorld!  \\tThis is a test.  </body></html>\"\r\nprint(\"Standardized String 1:\", standardized_string(input_string_1))\r\n\r\n# Example 2: Input string with multiple spaces and line breaks\r\ninput_string_2 = \"  This   is   a  \\n\\n   string   with  spaces and \\t tabs.  \"\r\nprint(\"Standardized String 2:\", standardized_string(input_string_2))\r\n\r\n# Example 3: Pass an empty string\r\ninput_string_3 = \"\"\r\nprint(\"Standardized String 3:\", standardized_string(input_string_3))\r\n\r\n# Example 4: Pass None (invalid input)\r\ninput_string_4 = None\r\nprint(\"Standardized String 4:\", standardized_string(input_string_4))\r\n\r\n================================================================\r\n\r\nfrom textBotPlus import remove_common_elements\r\n# The function removes elements from the remove_in collection that are present in the remove_by collection.\r\n\r\n\r\n# Example 1: Removing common elements between two lists\r\nremove_in = [1, 2, 3, 4, 5]\r\nremove_by = [3, 4, 6]\r\nresult = remove_common_elements(remove_in, remove_by)\r\nprint(result)  # Output: [1, 2, 5]\r\n\r\n# Example 2: Removing common elements between a set and a tuple\r\nremove_in = {1, 2, 3, 4, 5}\r\nremove_by = (3, 4, 6)\r\nresult = remove_common_elements(remove_in, remove_by)\r\nprint(result)  # Output: {1, 2, 5}\r\n\r\n# Example 3: Missing one argument (should print an error message)\r\nresult = remove_common_elements(remove_in, None)  # Output: \"Value not passed for: remove_by\"\r\n\r\n# Example 4: Missing both arguments (should print an error message)\r\nresult = remove_common_elements(None, None)  # Output: \"Value not passed for: remove_in, remove_by\"\r\n\r\n================================================================\r\n\r\nfrom textBotPlus import save_to_csv\r\n# The save_to_csv function saves data to a CSV file, appending to an existing file or creating a new one, and includes optional column headers and a customizable delimiter.\r\n\r\n# Example data to be saved\r\nlist_data = [[1, 'Alice', 23], [2, 'Bob', 30], [3, 'Charlie', 25]]\r\ncolumn_header_list = ['ID', 'Name', 'Age']\r\noutput_file_path = 'output_data.csv'\r\n\r\n# Save with the default separator (comma)\r\nsave_to_csv(list_data, column_header_list, output_file_path)\r\n\r\n# Save with a tab separator\r\nsave_to_csv(list_data, column_header_list, output_file_path, sep=\"\\t\")\r\n\r\n# Save with a semicolon separator\r\nsave_to_csv(list_data, column_header_list, output_file_path, sep=\";\")\r\n\r\n# Sample Output\r\n\r\n# If sep=\",\" (default), the CSV file will look like this:\r\n\r\nID,Name,Age\r\n1,Alice,23\r\n2,Bob,30\r\n3,Charlie,25\r\n\r\n# If sep=\"\\t\" (tab), the CSV file will look like this: \r\n\r\nID\tName\tAge\r\n1\tAlice\t23\r\n2\tBob\t30\r\n3\tCharlie\t25\r\n\r\n================================================================\r\n\r\nfrom textBotPlus import read_csv\r\n# The read_csv function reads a CSV file into a Pandas DataFrame, with optional filtering and column value extraction based on specified criteria.\r\n\r\n# Example 1:  Default Separator (Comma)\r\n\r\n# Read from a CSV file with the default separator (comma)\r\ncsv_file_path = 'data.csv'\r\nget_value_by_col_name = 'URL'\r\nfilter_col_name = 'Category'\r\ninculde_filter_col_values = ['Tech']\r\n\r\nresult = read_csv(csv_file_path, get_value_by_col_name, filter_col_name, inculde_filter_col_values)\r\nprint(result)\r\n\r\nSample Output:\r\n\r\nCategory,URL\r\nTech,https://tech1.com\r\nTech,https://tech2.com\r\nScience,https://science1.com\r\n\r\n\r\n# Example 2: Default Separator (tab\r\n\r\n# Read from a CSV file with a custom separator (tab)\r\nresult = read_csv(csv_file_path, get_value_by_col_name, filter_col_name, inculde_filter_col_values, sep=\"\\t\")\r\nprint(result)\r\n\r\nSample Output:\r\n\r\nCategory\tURL\r\nTech\thttps://tech1.com\r\nTech\thttps://tech2.com\r\nScience\thttps://science1.com\r\n\r\n================================================================\r\n\r\nfrom textBotPlus import get_json_text\r\n# The function returns the standardized value (using standardized_string()) if it\u00e2\u20ac\u2122s a basic data type like str, int, or float. Otherwise, it returns the raw JSON object or an empty string if nothing is found.\r\n\r\njson_data = {\"user\": {\"name\": \"John\", \"age\": 30}}\r\nkeys = [\"user\", \"name\"]\r\nresult = get_json_text(json_data, keys)\r\nprint(result)  # Output: \"John\"\r\n\r\n================================================================\r\n\r\nfrom bs4 import BeautifulSoup\r\nfrom textBotPlus import get_selector_text\r\n# The get_selector_text function extracts text or attribute values from a BeautifulSoup-parsed HTML document using CSS selectors, with options for specific element matching, attribute retrieval, or full text extraction.\r\n\r\n\r\n# Example 1: Extract Text from a Single Element\r\n\r\nhtml = '<html><body><p id=\"paragraph\">Hello, world!</p></body></html>'\r\nsoup = BeautifulSoup(html, 'html.parser')\r\n\r\nresult = get_selector_text(soup, css_selector=\"#paragraph\")\r\nprint(result)  # Output: \"Hello, world!\"\r\n\r\n\r\n# Example 2: Extract Attribute Value from an Element\r\n\r\nhtml = '''<html><body><a href=\"https://example.com\" class=\"link\">Example</a></body></html>'''\r\nsoup = BeautifulSoup(html, 'html.parser')\r\n\r\nhref = get_selector_text(soup, css_selector_text='.link', attr='href')\r\nprint(href)  # Output: \"https://example.com\"\r\n\r\n\r\n# Example 3: Get All Matching Elements\r\n\r\nhtml = '''<html><body><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'''\r\nsoup = BeautifulSoup(html, 'html.parser')\r\n\r\nparagraphs = get_selector_text(soup, css_selectors='p')\r\nfor p in paragraphs:\r\n    print(p.text)\r\n# Output:\r\n# Paragraph 1\r\n# Paragraph 2\r\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "A Python package for text manipulation",
    "version": "0.1.4",
    "project_urls": null,
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ac20a1037896b127e3f6f66efb0587de85553d719383726a7df03c0b7c0dda01",
                "md5": "398f07e4d6ded0651e6b3cf3bdb7b70a",
                "sha256": "32f19db207a0a78647751c197dcc57e631beeacd485086fe51f5a79413396e49"
            },
            "downloads": -1,
            "filename": "textBotPlus-0.1.4-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "398f07e4d6ded0651e6b3cf3bdb7b70a",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.6",
            "size": 7742,
            "upload_time": "2025-01-17T11:57:57",
            "upload_time_iso_8601": "2025-01-17T11:57:57.953653Z",
            "url": "https://files.pythonhosted.org/packages/ac/20/a1037896b127e3f6f66efb0587de85553d719383726a7df03c0b7c0dda01/textBotPlus-0.1.4-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ce4fded870cfde9d7ac3c9290cfba6a047ad013530dd1d59345121989364f5cf",
                "md5": "d70a78a7867a9c5e6d0c313883e5a039",
                "sha256": "f527ac73681206bba263344fa3b776f06180f8f88d8ce9eb554ba5b07f85a9dd"
            },
            "downloads": -1,
            "filename": "textBotPlus-0.1.4.tar.gz",
            "has_sig": false,
            "md5_digest": "d70a78a7867a9c5e6d0c313883e5a039",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.6",
            "size": 9070,
            "upload_time": "2025-01-17T11:58:01",
            "upload_time_iso_8601": "2025-01-17T11:58:01.072316Z",
            "url": "https://files.pythonhosted.org/packages/ce/4f/ded870cfde9d7ac3c9290cfba6a047ad013530dd1d59345121989364f5cf/textBotPlus-0.1.4.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-01-17 11:58:01",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "textbotplus"
}

Ramesh Chandra