# Guesses the file type/mime/encoding of files
## pip install get-file-type
```python
Guesses the file type/mime/encoding of files. It uses the binaries from https://github.com/julian-r/file-windows/releases
(File and Libmagic build with Visual Studio) - They are included in this package
Args:
files_folders (list or str): A list of file/folder paths or a single file/folder path.
maxsubfolders (int, optional): Maximum number of subfolders to scan. Default is -1, which means no limit.
pandas_dataframe (bool, optional): Determines if the results should be returned as a pandas DataFrame.
Requires pandas to be installed. Default is False.
verbose (bool, optional): Determines if verbose output should be displayed. Default is True.
Returns:
list or pd.DataFrame: A list of file type information for each file or a pandas DataFrame if pandas_dataframe is True.
Raises:
Exception: If pandas is not installed and pandas_dataframe is set to True.
Example:
from get_file_type import guess_filetypes
result_list = guess_filetypes(
files_folders=[
r"C:\Users\hansc\Pictures\fastcpy", # png file without ending
r"C:\Users\hansc\Pictures\fastcpy - Copy.png", # an actual png file, to check if files with the correct ending are ignored
r"C:\Users\hansc\Pictures\cppcomp.jpg", # a .txt file with the wrong ending
r"E:\destinationcopytemp5", # internet cache files - whole folder will be scanned
],
maxsubfolders=-1, # if you want to limit the number of subfolders to scan, -1 means no limit
pandas_dataframe=False, # return the results as a pd.DataFrame (pandas must be installed)
verbose=True, # visual output
)
result_df = guess_filetypes(
files_folders=[
r"C:\Users\hansc\Pictures\fastcpy", # png file without ending
r"C:\Users\hansc\Pictures\fastcpy - Copy.png", # an actual png file, to check if files with the correct ending are ignored
r"C:\Users\hansc\Pictures\cppcomp.jpg", # a .txt file with the wrong ending
r"E:\destinationcopytemp5", # internet cache files - whole folder will be scanned
],
maxsubfolders=-1,
pandas_dataframe=True,
verbose=True,)
output:
[[['C:\\Users\\hansc\\Pictures\\fastcpy', 'image/png', 'charset=binary', ('png',), ('C:\\Users\\hansc\\Pictures\\fastcpy.png',)]]]
[[['C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png', 'image/png', 'charset=binary', ('png',), ('C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png',)]]]
[[['C:\\Users\\hansc\\Pictures\\cppcomp.jpg', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('C:\\Users\\hansc\\Pictures\\cppcomp.jpg.conf', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.def', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.in', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.ini', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.list', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.log', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.text', 'C:\\Users\\hansc\\Pictures\\cppcomp.jpg.txt')]]]
[[['E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log',)]]]
[[['E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log',)]]]
aa_filename aa_mime aa_encoding aa_possible_extensions aa_possible_filenames
0 C:\Users\hansc\Pictures\fastcpy image/png charset=binary (png,) (C:\Users\hansc\Pictures\fastcpy.png,)
1 C:\Users\hansc\Pictures\fastcpy - Copy.png image/png charset=binary (png,) (C:\Users\hansc\Pictures\fastcpy - Copy.png,)
2 C:\Users\hansc\Pictures\cppcomp.jpg text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (C:\Users\hansc\Pictures\cppcomp.jpg.conf, C:\Users\hansc\Pictures\cppcomp.jpg.def, C:\Users\hansc\Pictures\cppcomp.jpg.in, C:\Users\hansc\Pictures\cppcomp.jpg.ini, C:\Users\hansc\Pictures\cppcomp.jpg.list, C:\Users\hansc\Pictures\cppcomp.jpg.log, C:\Users\hansc\Pictures\cppcomp.jpg.text, C:\Users\hansc\Pictures\cppcomp.jpg.txt)
3 E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-64BA6ED6.log text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-64BA6ED6.log,)
4 E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-DF39BC9A.log text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (E:\destinationcopytemp5\00000000__2023_05_13_22_33_46\Users\hansc\AppData\Local\Temp\RBX-DF39BC9A.log,)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/get_file_type",
"name": "get-file-type",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "file,types,mime",
"author": "Johannes Fischer",
"author_email": "aulasparticularesdealemaosp@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/ef/8b/f7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198/get_file_type-0.10.tar.gz",
"platform": null,
"description": "# Guesses the file type/mime/encoding of files\r\n\r\n## pip install get-file-type\r\n\r\n\r\n```python\r\n Guesses the file type/mime/encoding of files. It uses the binaries from https://github.com/julian-r/file-windows/releases\r\n (File and Libmagic build with Visual Studio) - They are included in this package\r\n\r\n\r\n Args:\r\n files_folders (list or str): A list of file/folder paths or a single file/folder path.\r\n maxsubfolders (int, optional): Maximum number of subfolders to scan. Default is -1, which means no limit.\r\n pandas_dataframe (bool, optional): Determines if the results should be returned as a pandas DataFrame.\r\n Requires pandas to be installed. Default is False.\r\n verbose (bool, optional): Determines if verbose output should be displayed. Default is True.\r\n\r\n Returns:\r\n list or pd.DataFrame: A list of file type information for each file or a pandas DataFrame if pandas_dataframe is True.\r\n\r\n Raises:\r\n Exception: If pandas is not installed and pandas_dataframe is set to True.\r\n\r\n Example:\r\nfrom get_file_type import guess_filetypes\t\r\nresult_list = guess_filetypes(\r\n files_folders=[\r\n r\"C:\\Users\\hansc\\Pictures\\fastcpy\", # png file without ending\r\n r\"C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png\", # an actual png file, to check if files with the correct ending are ignored\r\n r\"C:\\Users\\hansc\\Pictures\\cppcomp.jpg\", # a .txt file with the wrong ending\r\n r\"E:\\destinationcopytemp5\", # internet cache files - whole folder will be scanned\r\n ],\r\n maxsubfolders=-1, # if you want to limit the number of subfolders to scan, -1 means no limit\r\n pandas_dataframe=False, # return the results as a pd.DataFrame (pandas must be installed)\r\n verbose=True, # visual output\r\n)\r\n\r\nresult_df = guess_filetypes(\r\n files_folders=[\r\n r\"C:\\Users\\hansc\\Pictures\\fastcpy\", # png file without ending\r\n r\"C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png\", # an actual png file, to check if files with the correct ending are ignored\r\n r\"C:\\Users\\hansc\\Pictures\\cppcomp.jpg\", # a .txt file with the wrong ending\r\n r\"E:\\destinationcopytemp5\", # internet cache files - whole folder will be scanned\r\n ],\r\n maxsubfolders=-1,\r\n pandas_dataframe=True,\r\n verbose=True,)\r\n output:\r\n [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy', 'image/png', 'charset=binary', ('png',), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy.png',)]]]\r\n [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy - Copy.png', 'image/png', 'charset=binary', ('png',), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\fastcpy - Copy.png',)]]]\r\n [[['C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.conf', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.def', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.in', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.ini', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.list', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.log', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.text', 'C:\\\\Users\\\\hansc\\\\Pictures\\\\cppcomp.jpg.txt')]]]\r\n [[['E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-64BA6ED6.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-64BA6ED6.log',)]]]\r\n [[['E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-DF39BC9A.log', 'text/plain', 'charset=us-ascii', ('conf', 'def', 'in', 'ini', 'list', 'log', 'text', 'txt'), ('E:\\\\destinationcopytemp5\\\\00000000__2023_05_13_22_33_46\\\\Users\\\\hansc\\\\AppData\\\\Local\\\\Temp\\\\RBX-DF39BC9A.log',)]]]\r\n\r\n\r\n aa_filename aa_mime aa_encoding aa_possible_extensions aa_possible_filenames\r\n 0 C:\\Users\\hansc\\Pictures\\fastcpy image/png charset=binary (png,) (C:\\Users\\hansc\\Pictures\\fastcpy.png,)\r\n 1 C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png image/png charset=binary (png,) (C:\\Users\\hansc\\Pictures\\fastcpy - Copy.png,)\r\n 2 C:\\Users\\hansc\\Pictures\\cppcomp.jpg text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (C:\\Users\\hansc\\Pictures\\cppcomp.jpg.conf, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.def, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.in, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.ini, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.list, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.log, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.text, C:\\Users\\hansc\\Pictures\\cppcomp.jpg.txt)\r\n 3 E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-64BA6ED6.log,)\r\n 4 E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log text/plain charset=us-ascii (conf, def, in, ini, list, log, text, txt) (E:\\destinationcopytemp5\\00000000__2023_05_13_22_33_46\\Users\\hansc\\AppData\\Local\\Temp\\RBX-DF39BC9A.log,)\r\n\r\n\r\n\r\n\r\n```\r\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Guesses the file type/mime/encoding of files. It uses the binaries from File and Libmagic.",
"version": "0.10",
"project_urls": {
"Homepage": "https://github.com/hansalemaos/get_file_type"
},
"split_keywords": [
"file",
"types",
"mime"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "cd2713062797d98d44fc3bf2413f3381a8dd51d44cc4a65cd6de8c97d93e57cf",
"md5": "f9761ee29ef866a2a6ff8644b387456d",
"sha256": "fc33ee2953c5103e88e6100499021793596b54cd6b5f3d17235af80d074d713f"
},
"downloads": -1,
"filename": "get_file_type-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "f9761ee29ef866a2a6ff8644b387456d",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 616765,
"upload_time": "2023-05-14T19:45:09",
"upload_time_iso_8601": "2023-05-14T19:45:09.932679Z",
"url": "https://files.pythonhosted.org/packages/cd/27/13062797d98d44fc3bf2413f3381a8dd51d44cc4a65cd6de8c97d93e57cf/get_file_type-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "ef8bf7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198",
"md5": "b8567f842dead695083e5a8b54040e0d",
"sha256": "f77b21b0bf9f4a2f19092c1f78e2022d3ebfcfbf577474990e2cd111cd7af763"
},
"downloads": -1,
"filename": "get_file_type-0.10.tar.gz",
"has_sig": false,
"md5_digest": "b8567f842dead695083e5a8b54040e0d",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 577531,
"upload_time": "2023-05-14T19:45:14",
"upload_time_iso_8601": "2023-05-14T19:45:14.030295Z",
"url": "https://files.pythonhosted.org/packages/ef/8b/f7158c12a7435e2bbd1b892fbc256abe45cdb76aba7f3983775284e3a198/get_file_type-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-05-14 19:45:14",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "hansalemaos",
"github_project": "get_file_type",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [],
"lcname": "get-file-type"
}