get-file-type


Nameget-file-type JSON
Version 0.10 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/get_file_type
SummaryGuesses the file type/mime/encoding of files. It uses the binaries from File and Libmagic.
upload_time2023-05-14 19:45:14
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords file types mime
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Guesses the file type/mime/encoding of files

## pip install get-file-type


```python
    Guesses the file type/mime/encoding of files. It uses the binaries from https://github.com/julian-r/file-windows/releases
    (File and Libmagic build with Visual Studio) - They are included in this package


    Args:
        files_folders (list or str): A list of file/folder paths or a single file/folder path.
        maxsubfolders (int, optional): Maximum number of subfolders to scan. Default is -1, which means no limit.
        pandas_dataframe (bool, optional): Determines if the results should be returned as a pandas DataFrame.
                                           Requires pandas to be installed. Default is False.
        verbose (bool, optional): Determines if verbose output should be displayed. Default is True.

    Returns:
        list or pd.DataFrame: A list of file type information for each file or a pandas DataFrame if pandas_dataframe is True.

    Raises:
        Exception: If pandas is not installed and pandas_dataframe is set to True.

    Example:
from get_file_type import guess_filetypes	
result_list = guess_filetypes(
    files_folders=[
        r"C:\Users\hansc\Pictures\fastcpy",  # png file without ending
        r"C:\Users\hansc\Pictures\fastcpy - Copy.png",  # an actual png file, to check if files with the correct ending are ignored
        r"C:\Users\hansc\Pictures\cppcomp.jpg",  # a .txt file with the wrong ending
        r"E:\destinationcopytemp5",  # internet cache files - whole folder will be scanned
    ],
    maxsubfolders=-1,  # if you want to limit the number of subfolders to scan, -1 means no limit
    pandas_dataframe=False,  # return the results as a pd.DataFrame (pandas must be installed)
    verbose=True,  # visual output
)

result_df = guess_filetypes(
    files_folders=[
        r"C:\Users\hansc\Pictures\fastcpy",  # png file without ending
        r"C:\Users\hansc\Pictures\fastcpy - Copy.png",  # an actual png file, to check if files with the correct ending are ignored
        r"C:\Users\hansc\Pictures\cppcomp.jpg",  # a .txt file with the wrong ending
        r"E:\destinationcopytemp5",  # internet cache files - whole folder will be scanned
    ],
    maxsubfolders=-1,
    pandas_dataframe=True,
    verbose=True,)
    output:
    [[['C:\\Users\\hansc\\Pictures\\fastcpy', 'image/png', 'charset=binary', ('png',), ('C:\\Users\\hansc\\Pictures\\fastcpy.png',)]]]
    [[['C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png', 'image/png', 'charset=binary', ('png',), ('C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png',)]]]
    [[['C:\\Users\\hansc\\Pictures\\cppcomp.jpg', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('C:\\Users\\hansc\\Pictures\\cppcomp.jpg.conf', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.def', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.in', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.ini', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.list', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.log', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.text', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.txt')]]]
    [[['E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log',)]]]
    [[['E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log',)]]]


                                                                                                 aa_filename     aa_mime       aa_encoding                      aa_possible_extensions                                                                                                                                                                                                                                                                                                                       aa_possible_filenames
    0                                                                        C:\Users\hansc\Pictures\fastcpy   image/png    charset=binary                                      (png,)                                                                                                                                                                                                                                                                                                      (C:\Users\hansc\Pictures\fastcpy.png,)
    1                                                             C:\Users\hansc\Pictures\fastcpy - Copy.png   image/png    charset=binary                                      (png,)                                                                                                                                                                                                                                                                                               (C:\Users\hansc\Pictures\fastcpy - Copy.png,)
    2                                                                    C:\Users\hansc\Pictures\cppcomp.jpg  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)  (C:\Users\hansc\Pictures\cppcomp.jpg.conf, C:\Users\hansc\Pictures\cppcomp.jpg.def, C:\Users\hansc\Pictures\cppcomp.jpg.in, C:\Users\hansc\Pictures\cppcomp.jpg.ini, C:\Users\hansc\Pictures\cppcomp.jpg.list, C:\Users\hansc\Pictures\cppcomp.jpg.log, C:\Users\hansc\Pictures\cppcomp.jpg.text, C:\Users\hansc\Pictures\cppcomp.jpg.txt)
    3  E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-64BA6ED6.log  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)                                                                                                                                                                                                                                    (E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-64BA6ED6.log,)
    4  E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-DF39BC9A.log  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)                                                                                                                                                                                                                                    (E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-DF39BC9A.log,)




```

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/get_file_type",
    "name": "get-file-type",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "file,types,mime",
    "author": "Johannes Fischer",
    "author_email": "aulasparticularesdealemaosp@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/ef/8b/f7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198/get_file_type-0.10.tar.gz",
    "platform": null,
    "description": "# Guesses the file type/mime/encoding of files\r\n\r\n## pip install get-file-type\r\n\r\n\r\n```python\r\n    Guesses the file type/mime/encoding of files. It uses the binaries from https://github.com/julian-r/file-windows/releases\r\n    (File and Libmagic build with Visual Studio) - They are included in this package\r\n\r\n\r\n    Args:\r\n        files_folders (list or str): A list of file/folder paths or a single file/folder path.\r\n        maxsubfolders (int, optional): Maximum number of subfolders to scan. Default is -1, which means no limit.\r\n        pandas_dataframe (bool, optional): Determines if the results should be returned as a pandas DataFrame.\r\n                                           Requires pandas to be installed. Default is False.\r\n        verbose (bool, optional): Determines if verbose output should be displayed. Default is True.\r\n\r\n    Returns:\r\n        list or pd.DataFrame: A list of file type information for each file or a pandas DataFrame if pandas_dataframe is True.\r\n\r\n    Raises:\r\n        Exception: If pandas is not installed and pandas_dataframe is set to True.\r\n\r\n    Example:\r\nfrom get_file_type import guess_filetypes\t\r\nresult_list = guess_filetypes(\r\n    files_folders=[\r\n        r\"C:\\Users\\hansc\\Pictures\\fastcpy\",  # png file without ending\r\n        r\"C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png\",  # an actual png file, to check if files with the correct ending are ignored\r\n        r\"C:\\Users\\hansc\\Pictures\\cppcomp.jpg\",  # a .txt file with the wrong ending\r\n        r\"E:\\destinationcopytemp5\",  # internet cache files - whole folder will be scanned\r\n    ],\r\n    maxsubfolders=-1,  # if you want to limit the number of subfolders to scan, -1 means no limit\r\n    pandas_dataframe=False,  # return the results as a pd.DataFrame (pandas must be installed)\r\n    verbose=True,  # visual output\r\n)\r\n\r\nresult_df = guess_filetypes(\r\n    files_folders=[\r\n        r\"C:\\Users\\hansc\\Pictures\\fastcpy\",  # png file without ending\r\n        r\"C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png\",  # an actual png file, to check if files with the correct ending are ignored\r\n        r\"C:\\Users\\hansc\\Pictures\\cppcomp.jpg\",  # a .txt file with the wrong ending\r\n        r\"E:\\destinationcopytemp5\",  # internet cache files - whole folder will be scanned\r\n    ],\r\n    maxsubfolders=-1,\r\n    pandas_dataframe=True,\r\n    verbose=True,)\r\n    output:\r\n    [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy', 'image/png', 'charset=binary', ('png',), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy.png',)]]]\r\n    [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy - Copy.png', 'image/png', 'charset=binary', ('png',), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy - Copy.png',)]]]\r\n    [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.conf', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.def', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.in', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.ini', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.list', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.log', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.text', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.txt')]]]\r\n    [[['E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-64BA6ED6.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-64BA6ED6.log',)]]]\r\n    [[['E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-DF39BC9A.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-DF39BC9A.log',)]]]\r\n\r\n\r\n                                                                                                 aa_filename     aa_mime       aa_encoding                      aa_possible_extensions                                                                                                                                                                                                                                                                                                                       aa_possible_filenames\r\n    0                                                                        C:\\Users\\hansc\\Pictures\\fastcpy   image/png    charset=binary                                      (png,)                                                                                                                                                                                                                                                                                                      (C:\\Users\\hansc\\Pictures\\fastcpy.png,)\r\n    1                                                             C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png   image/png    charset=binary                                      (png,)                                                                                                                                                                                                                                                                                               (C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png,)\r\n    2                                                                    C:\\Users\\hansc\\Pictures\\cppcomp.jpg  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)  (C:\\Users\\hansc\\Pictures\\cppcomp.jpg.conf, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.def, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.in, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.ini, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.list, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.log, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.text, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.txt)\r\n    3  E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)                                                                                                                                                                                                                                    (E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log,)\r\n    4  E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log  text/plain  charset=us-ascii  (conf, def, in, ini, list, log, text, txt)                                                                                                                                                                                                                                    (E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log,)\r\n\r\n\r\n\r\n\r\n```\r\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Guesses the file type/mime/encoding of files. It uses the binaries from File and Libmagic.",
    "version": "0.10",
    "project_urls": {
        "Homepage": "https://github.com/hansalemaos/get_file_type"
    },
    "split_keywords": [
        "file",
        "types",
        "mime"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "cd2713062797d98d44fc3bf2413f3381a8dd51d44cc4a65cd6de8c97d93e57cf",
                "md5": "f9761ee29ef866a2a6ff8644b387456d",
                "sha256": "fc33ee2953c5103e88e6100499021793596b54cd6b5f3d17235af80d074d713f"
            },
            "downloads": -1,
            "filename": "get_file_type-0.10-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "f9761ee29ef866a2a6ff8644b387456d",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 616765,
            "upload_time": "2023-05-14T19:45:09",
            "upload_time_iso_8601": "2023-05-14T19:45:09.932679Z",
            "url": "https://files.pythonhosted.org/packages/cd/27/13062797d98d44fc3bf2413f3381a8dd51d44cc4a65cd6de8c97d93e57cf/get_file_type-0.10-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ef8bf7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198",
                "md5": "b8567f842dead695083e5a8b54040e0d",
                "sha256": "f77b21b0bf9f4a2f19092c1f78e2022d3ebfcfbf577474990e2cd111cd7af763"
            },
            "downloads": -1,
            "filename": "get_file_type-0.10.tar.gz",
            "has_sig": false,
            "md5_digest": "b8567f842dead695083e5a8b54040e0d",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 577531,
            "upload_time": "2023-05-14T19:45:14",
            "upload_time_iso_8601": "2023-05-14T19:45:14.030295Z",
            "url": "https://files.pythonhosted.org/packages/ef/8b/f7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198/get_file_type-0.10.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-05-14 19:45:14",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "hansalemaos",
    "github_project": "get_file_type",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [],
    "lcname": "get-file-type"
}
        
Elapsed time: 0.32367s