### Never mind explorer.exe, here is dirdf
Are you one of those people who have never any space on their hard disk? Do you spend hours and hours searching for your files because you never remember where you saved them? Well, since I am one of those, I tried several tools in the past: TreeSize / WinDirStat / WizTree / SpaceSniffer / GREP. They all are great,
but take forever to get the job done and only offer limited filter functions. Around 5 hours ago, after having searched around 30 minutes for a file on my hard drive, I decided to do something about it ...
#### Install the package (Windows only)
```python
pip install dirdf
```
#### Install Cygwin (ls.exe is necessary for getting the file list)
[ls.exe](https://www.cygwin.com/setup-x86_64.exe).
#### Tools needed for some functions
[strings.exe](https://download.sysinternals.com/files/Strings.zip).
[rg.exe](https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep-13.0.0-x86_64-pc-windows-gnu.zip).
[fzf.exe](https://github.com/junegunn/fzf/releases/download/0.35.1/fzf-0.35.1-windows_amd64.zip).
##### It is recommended to add strings.exe/rg.exe/fzf.exe/ls.exe files to your path
```python
from dirdf import pd_add_dfdir
pd_add_dfdir()
import pandas as pd
df = pd.Q_folder_to_df(
folder=r"C:\Users\blabla",
ls_path="ls",
last_access_time=True,
exit_keys="ctrl+x",
timeout=None,
)
df2 = pd.Q_folder_to_df_with_functions(
folder=r"C:\Users\blabla",
ls_path="ls",
last_access_time=True,
exit_keys="ctrl+x",
timeout=None,
strings_path="strings",
fzf_path="fzf",
rip_grep_path="rg.exe",
add_flatcopy_sorted=True,
add_flatcopy=True,
add_extract_strings=True,
add_fuzzy_extract=True,
add_ripgrep=True,
add_open_file=True,
add_move_file=True,
)
# Some examples
# Flatcopy - foldersep='ǀ' means that the backslash ‘\’ will be replaced by 'ǀ'. The replacement is important because there is no “flat copy” with a backslash in the path! All file types (pdf, jpg ... ) will get their own folder. If you want to save space, create a symlink instead of copying the whole file
df.loc[df.aa_fullpath.str.contains(r'\.txt|\.docx|\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\testflatcopy_df', foldersep='ǀ', symlink=False, copystat=True))
df.loc[df.aa_fullpath.str.contains(r'\.txt|\.docx|\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\testflatcopy_df\\symlink', foldersep='ǀ', symlink=True, copystat=True)) # copystat will be ignored in this case
# Flatcopy without sorting file types
df.loc[df.aa_fullpath.str.contains(r'\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\newfoldertest\\flatcopy'))
df.loc[df.aa_fullpath.str.contains(r'\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\newfoldertest\\flatcopy\\sym',symlink=True))
# Extract all strings from any file
df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_extract_strings.apply(lambda x:x(exit_keys='ctrl+x', print_output=True, timeout=None))
# Fuzzy search in any file
df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_fuzzy.apply(lambda x:x('windows'))
# Regex search in any file
df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_ripgrep.apply(lambda x:x(regular_expression='name', other_parameters='-i', exit_keys='ctrl+x', print_output=True, timeout=.1))
# Executes os.startfile()
df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].iloc[0].ff_open()
# Moves files, keeps the folder structur
df.loc[df.aa_fullpath.str.contains(r'\.txt')][:1].ff_move_file.apply(lambda x:x('f:\\newfoldertest'))
df
Out[3]:
aa_date ... aa_filetype
0 2022-10-23 12:18:58.767317900-03:00 ... .0
1 2022-10-23 05:48:51.755017400-03:00 ... .yaml
2 2022-10-23 05:51:47.520702700-03:00 ... .jpg
3 2022-10-23 05:51:46.817189600-03:00 ... .jpg
4 2022-10-23 05:48:51.755017400-03:00 ... .yaml
5 2022-10-23 12:18:58.767317900-03:00 ... .csv
6 2022-10-23 05:51:49.630625800-03:00 ... .jpg
7 2022-10-23 05:51:52.116036400-03:00 ... .jpg
8 2022-10-23 05:51:52.678404900-03:00 ... .jpg
9 2022-10-23 13:31:03.003835900-03:00 ... NaN
10 2022-10-23 12:18:59.267762600-03:00 ... .pt
11 2022-10-23 06:00:01.068658100-03:00 ... .pt
12 2022-10-23 07:23:56.117463800-03:00 ... .pt
13 2022-10-23 08:53:33.799414100-03:00 ... .pt
14 2022-10-23 12:18:59.048534200-03:00 ... .pt
[15 rows x 14 columns]
df2
Out[4]:
aa_date ... ff_flatcopy
0 2022-10-23 12:18:58.767317900-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
1 2022-10-23 05:48:51.755017400-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
2 2022-10-23 05:51:47.520702700-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
3 2022-10-23 05:51:46.817189600-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
4 2022-10-23 05:48:51.755017400-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
5 2022-10-23 12:18:58.767317900-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
6 2022-10-23 05:51:49.630625800-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
7 2022-10-23 05:51:52.116036400-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
8 2022-10-23 05:51:52.678404900-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
9 2022-10-23 13:31:03.003835900-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
10 2022-10-23 12:18:59.267762600-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
11 2022-10-23 06:00:01.068658100-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
12 2022-10-23 07:23:56.117463800-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
13 2022-10-23 08:53:33.799414100-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
14 2022-10-23 12:18:59.048534200-03:00 ... dest_folder:str, foldersep:str='ǀ', symlink:bo...
[15 rows x 21 columns]
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/dirdf",
"name": "dirdf",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "pandas,DataFrame,Series,search,files",
"author": "Johannes Fischer",
"author_email": "<aulasparticularesdealemaosp@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/77/31/71a39baaf96381c01a5505c1126abefccb90e34c41bd167e19503f9c3a37/dirdf-0.11.tar.gz",
"platform": null,
"description": "\n### Never mind explorer.exe, here is dirdf\n\n\n\nAre you one of those people who have never any space on their hard disk? Do you spend hours and hours searching for your files because you never remember where you saved them? Well, since I am one of those, I tried several tools in the past: TreeSize / WinDirStat / WizTree / SpaceSniffer / GREP. They all are great,\n\n\n\nbut take forever to get the job done and only offer limited filter functions. Around 5 hours ago, after having searched around 30 minutes for a file on my hard drive, I decided to do something about it ... \n\n\n\n#### Install the package (Windows only)\n\n```python\n\npip install dirdf\n\n```\n\n\n\n#### Install Cygwin (ls.exe is necessary for getting the file list)\n\n\n\n[ls.exe](https://www.cygwin.com/setup-x86_64.exe).\n\n\n\n#### Tools needed for some functions\n\n\n\n[strings.exe](https://download.sysinternals.com/files/Strings.zip).\n\n[rg.exe](https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep-13.0.0-x86_64-pc-windows-gnu.zip).\n\n[fzf.exe](https://github.com/junegunn/fzf/releases/download/0.35.1/fzf-0.35.1-windows_amd64.zip).\n\n\n\n##### It is recommended to add strings.exe/rg.exe/fzf.exe/ls.exe files to your path\n\n\n\n```python\n\nfrom dirdf import pd_add_dfdir\n\npd_add_dfdir()\n\nimport pandas as pd\n\n\n\ndf = pd.Q_folder_to_df(\n\n folder=r\"C:\\Users\\blabla\",\n\n ls_path=\"ls\",\n\n last_access_time=True,\n\n exit_keys=\"ctrl+x\",\n\n timeout=None,\n\n)\n\ndf2 = pd.Q_folder_to_df_with_functions(\n\n folder=r\"C:\\Users\\blabla\",\n\n ls_path=\"ls\",\n\n last_access_time=True,\n\n exit_keys=\"ctrl+x\",\n\n timeout=None,\n\n strings_path=\"strings\",\n\n fzf_path=\"fzf\",\n\n rip_grep_path=\"rg.exe\",\n\n add_flatcopy_sorted=True,\n\n add_flatcopy=True,\n\n add_extract_strings=True,\n\n add_fuzzy_extract=True,\n\n add_ripgrep=True,\n\n add_open_file=True,\n\n add_move_file=True,\n\n)\n\n\n\n# Some examples\n\n\n\n# Flatcopy - foldersep='\u01c0' means that the backslash \u2018\\\u2019 will be replaced by '\u01c0'. The replacement is important because there is no \u201cflat copy\u201d with a backslash in the path! All file types (pdf, jpg ... ) will get their own folder. If you want to save space, create a symlink instead of copying the whole file \n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt|\\.docx|\\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\\\testflatcopy_df', foldersep='\u01c0', symlink=False, copystat=True))\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt|\\.docx|\\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\\\testflatcopy_df\\\\symlink', foldersep='\u01c0', symlink=True, copystat=True)) # copystat will be ignored in this case\n\n\n\n# Flatcopy without sorting file types\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\\\newfoldertest\\\\flatcopy'))\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\\\newfoldertest\\\\flatcopy\\\\sym',symlink=True))\n\n\n\n# Extract all strings from any file \n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_extract_strings.apply(lambda x:x(exit_keys='ctrl+x', print_output=True, timeout=None))\n\n\n\n# Fuzzy search in any file\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_fuzzy.apply(lambda x:x('windows'))\n\n\n\n# Regex search in any file\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_ripgrep.apply(lambda x:x(regular_expression='name', other_parameters='-i', exit_keys='ctrl+x', print_output=True, timeout=.1))\n\n\n\n# Executes os.startfile()\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].iloc[0].ff_open()\n\n\n\n# Moves files, keeps the folder structur\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:1].ff_move_file.apply(lambda x:x('f:\\\\newfoldertest'))\n\n\n\n\n\ndf\n\nOut[3]: \n\n aa_date ... aa_filetype\n\n0 2022-10-23 12:18:58.767317900-03:00 ... .0\n\n1 2022-10-23 05:48:51.755017400-03:00 ... .yaml\n\n2 2022-10-23 05:51:47.520702700-03:00 ... .jpg\n\n3 2022-10-23 05:51:46.817189600-03:00 ... .jpg\n\n4 2022-10-23 05:48:51.755017400-03:00 ... .yaml\n\n5 2022-10-23 12:18:58.767317900-03:00 ... .csv\n\n6 2022-10-23 05:51:49.630625800-03:00 ... .jpg\n\n7 2022-10-23 05:51:52.116036400-03:00 ... .jpg\n\n8 2022-10-23 05:51:52.678404900-03:00 ... .jpg\n\n9 2022-10-23 13:31:03.003835900-03:00 ... NaN\n\n10 2022-10-23 12:18:59.267762600-03:00 ... .pt\n\n11 2022-10-23 06:00:01.068658100-03:00 ... .pt\n\n12 2022-10-23 07:23:56.117463800-03:00 ... .pt\n\n13 2022-10-23 08:53:33.799414100-03:00 ... .pt\n\n14 2022-10-23 12:18:59.048534200-03:00 ... .pt\n\n[15 rows x 14 columns]\n\ndf2\n\nOut[4]: \n\n aa_date ... ff_flatcopy\n\n0 2022-10-23 12:18:58.767317900-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n1 2022-10-23 05:48:51.755017400-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n2 2022-10-23 05:51:47.520702700-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n3 2022-10-23 05:51:46.817189600-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n4 2022-10-23 05:48:51.755017400-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n5 2022-10-23 12:18:58.767317900-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n6 2022-10-23 05:51:49.630625800-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n7 2022-10-23 05:51:52.116036400-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n8 2022-10-23 05:51:52.678404900-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n9 2022-10-23 13:31:03.003835900-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n10 2022-10-23 12:18:59.267762600-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n11 2022-10-23 06:00:01.068658100-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n12 2022-10-23 07:23:56.117463800-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n13 2022-10-23 08:53:33.799414100-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n14 2022-10-23 12:18:59.048534200-03:00 ... dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n[15 rows x 21 columns]\n\n\n\n\n\n```\n\n\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Never mind explorer.exe, here is dirdf",
"version": "0.11",
"split_keywords": [
"pandas",
"dataframe",
"series",
"search",
"files"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "86350fadd766b35dc1ebbb9933c42670",
"sha256": "70cada75b3b1704eb6e79e479f4364f2bdb97132cce06b6f6a620f77958007c1"
},
"downloads": -1,
"filename": "dirdf-0.11-py3-none-any.whl",
"has_sig": false,
"md5_digest": "86350fadd766b35dc1ebbb9933c42670",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 11175,
"upload_time": "2022-12-27T13:18:23",
"upload_time_iso_8601": "2022-12-27T13:18:23.818067Z",
"url": "https://files.pythonhosted.org/packages/f7/8e/133c1cb761d1fcbdd5b3bb0bab1f838f69dba45e2e9a67285800f506d34a/dirdf-0.11-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "2e697536eb18586b61e42a9213014790",
"sha256": "e5e0be03af0f11f2f28ab3f9eb3a75cfa0542925035b5e78fb1216a8eb8f28e2"
},
"downloads": -1,
"filename": "dirdf-0.11.tar.gz",
"has_sig": false,
"md5_digest": "2e697536eb18586b61e42a9213014790",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 10558,
"upload_time": "2022-12-27T13:18:25",
"upload_time_iso_8601": "2022-12-27T13:18:25.349151Z",
"url": "https://files.pythonhosted.org/packages/77/31/71a39baaf96381c01a5505c1126abefccb90e34c41bd167e19503f9c3a37/dirdf-0.11.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-12-27 13:18:25",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "hansalemaos",
"github_project": "dirdf",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"lcname": "dirdf"
}