dirdf


Namedirdf JSON
Version 0.11 PyPI version JSON
download
home_pagehttps://github.com/hansalemaos/dirdf
SummaryNever mind explorer.exe, here is dirdf
upload_time2022-12-27 13:18:25
maintainer
docs_urlNone
authorJohannes Fischer
requires_python
licenseMIT
keywords pandas dataframe series search files
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            
### Never mind explorer.exe, here is dirdf



Are you one of those people who have never any space on their hard disk? Do you spend hours and hours searching for your files because you never remember where you saved them? Well, since I am one of those, I tried several tools in the past: TreeSize / WinDirStat / WizTree / SpaceSniffer / GREP. They all are great,



but take forever to get the job done and only offer limited filter functions. Around 5 hours ago, after having searched around 30 minutes for a file on my hard drive, I decided to do something about it ... 



#### Install the package (Windows only)

```python

pip install dirdf

```



#### Install Cygwin (ls.exe is necessary for getting the file list)



[ls.exe](https://www.cygwin.com/setup-x86_64.exe).



#### Tools needed for some functions



[strings.exe](https://download.sysinternals.com/files/Strings.zip).

[rg.exe](https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep-13.0.0-x86_64-pc-windows-gnu.zip).

[fzf.exe](https://github.com/junegunn/fzf/releases/download/0.35.1/fzf-0.35.1-windows_amd64.zip).



##### It is recommended to add strings.exe/rg.exe/fzf.exe/ls.exe files to your path



```python

from dirdf import pd_add_dfdir

pd_add_dfdir()

import pandas as pd



df = pd.Q_folder_to_df(

    folder=r"C:\Users\blabla",

    ls_path="ls",

    last_access_time=True,

    exit_keys="ctrl+x",

    timeout=None,

)

df2 = pd.Q_folder_to_df_with_functions(

    folder=r"C:\Users\blabla",

    ls_path="ls",

    last_access_time=True,

    exit_keys="ctrl+x",

    timeout=None,

    strings_path="strings",

    fzf_path="fzf",

    rip_grep_path="rg.exe",

    add_flatcopy_sorted=True,

    add_flatcopy=True,

    add_extract_strings=True,

    add_fuzzy_extract=True,

    add_ripgrep=True,

    add_open_file=True,

    add_move_file=True,

)



# Some examples



# Flatcopy - foldersep='ǀ' means that the backslash ‘\’ will be replaced by 'ǀ'. The replacement is important because there is no “flat copy” with a backslash in the path! All file types (pdf, jpg ... ) will get their own folder. If you want to save space, create a symlink instead of copying the whole file 

df.loc[df.aa_fullpath.str.contains(r'\.txt|\.docx|\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\testflatcopy_df', foldersep='ǀ', symlink=False, copystat=True))

df.loc[df.aa_fullpath.str.contains(r'\.txt|\.docx|\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\testflatcopy_df\\symlink', foldersep='ǀ', symlink=True, copystat=True)) # copystat will be ignored in this case



# Flatcopy without sorting file types

df.loc[df.aa_fullpath.str.contains(r'\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\newfoldertest\\flatcopy'))

df.loc[df.aa_fullpath.str.contains(r'\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\newfoldertest\\flatcopy\\sym',symlink=True))



# Extract all strings from any file 

df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_extract_strings.apply(lambda x:x(exit_keys='ctrl+x', print_output=True, timeout=None))



# Fuzzy search in any file

df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_fuzzy.apply(lambda x:x('windows'))



# Regex search in any file

df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].ff_ripgrep.apply(lambda x:x(regular_expression='name', other_parameters='-i', exit_keys='ctrl+x', print_output=True, timeout=.1))



# Executes os.startfile()

df.loc[df.aa_fullpath.str.contains(r'\.txt')][:100].iloc[0].ff_open()



# Moves files, keeps the folder structur

df.loc[df.aa_fullpath.str.contains(r'\.txt')][:1].ff_move_file.apply(lambda x:x('f:\\newfoldertest'))





df

Out[3]: 

                               aa_date  ... aa_filetype

0  2022-10-23 12:18:58.767317900-03:00  ...          .0

1  2022-10-23 05:48:51.755017400-03:00  ...       .yaml

2  2022-10-23 05:51:47.520702700-03:00  ...        .jpg

3  2022-10-23 05:51:46.817189600-03:00  ...        .jpg

4  2022-10-23 05:48:51.755017400-03:00  ...       .yaml

5  2022-10-23 12:18:58.767317900-03:00  ...        .csv

6  2022-10-23 05:51:49.630625800-03:00  ...        .jpg

7  2022-10-23 05:51:52.116036400-03:00  ...        .jpg

8  2022-10-23 05:51:52.678404900-03:00  ...        .jpg

9  2022-10-23 13:31:03.003835900-03:00  ...         NaN

10 2022-10-23 12:18:59.267762600-03:00  ...         .pt

11 2022-10-23 06:00:01.068658100-03:00  ...         .pt

12 2022-10-23 07:23:56.117463800-03:00  ...         .pt

13 2022-10-23 08:53:33.799414100-03:00  ...         .pt

14 2022-10-23 12:18:59.048534200-03:00  ...         .pt

[15 rows x 14 columns]

df2

Out[4]: 

                               aa_date  ...                                        ff_flatcopy

0  2022-10-23 12:18:58.767317900-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

1  2022-10-23 05:48:51.755017400-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

2  2022-10-23 05:51:47.520702700-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

3  2022-10-23 05:51:46.817189600-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

4  2022-10-23 05:48:51.755017400-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

5  2022-10-23 12:18:58.767317900-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

6  2022-10-23 05:51:49.630625800-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

7  2022-10-23 05:51:52.116036400-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

8  2022-10-23 05:51:52.678404900-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

9  2022-10-23 13:31:03.003835900-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

10 2022-10-23 12:18:59.267762600-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

11 2022-10-23 06:00:01.068658100-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

12 2022-10-23 07:23:56.117463800-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

13 2022-10-23 08:53:33.799414100-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

14 2022-10-23 12:18:59.048534200-03:00  ...  dest_folder:str, foldersep:str='ǀ', symlink:bo...

[15 rows x 21 columns]





```




            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/hansalemaos/dirdf",
    "name": "dirdf",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "pandas,DataFrame,Series,search,files",
    "author": "Johannes Fischer",
    "author_email": "<aulasparticularesdealemaosp@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/77/31/71a39baaf96381c01a5505c1126abefccb90e34c41bd167e19503f9c3a37/dirdf-0.11.tar.gz",
    "platform": null,
    "description": "\n### Never mind explorer.exe, here is dirdf\n\n\n\nAre you one of those people who have never any space on their hard disk? Do you spend hours and hours searching for your files because you never remember where you saved them? Well, since I am one of those, I tried several tools in the past: TreeSize / WinDirStat / WizTree / SpaceSniffer / GREP. They all are great,\n\n\n\nbut take forever to get the job done and only offer limited filter functions. Around 5 hours ago, after having searched around 30 minutes for a file on my hard drive, I decided to do something about it ... \n\n\n\n#### Install the package (Windows only)\n\n```python\n\npip install dirdf\n\n```\n\n\n\n#### Install Cygwin (ls.exe is necessary for getting the file list)\n\n\n\n[ls.exe](https://www.cygwin.com/setup-x86_64.exe).\n\n\n\n#### Tools needed for some functions\n\n\n\n[strings.exe](https://download.sysinternals.com/files/Strings.zip).\n\n[rg.exe](https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep-13.0.0-x86_64-pc-windows-gnu.zip).\n\n[fzf.exe](https://github.com/junegunn/fzf/releases/download/0.35.1/fzf-0.35.1-windows_amd64.zip).\n\n\n\n##### It is recommended to add strings.exe/rg.exe/fzf.exe/ls.exe files to your path\n\n\n\n```python\n\nfrom dirdf import pd_add_dfdir\n\npd_add_dfdir()\n\nimport pandas as pd\n\n\n\ndf = pd.Q_folder_to_df(\n\n    folder=r\"C:\\Users\\blabla\",\n\n    ls_path=\"ls\",\n\n    last_access_time=True,\n\n    exit_keys=\"ctrl+x\",\n\n    timeout=None,\n\n)\n\ndf2 = pd.Q_folder_to_df_with_functions(\n\n    folder=r\"C:\\Users\\blabla\",\n\n    ls_path=\"ls\",\n\n    last_access_time=True,\n\n    exit_keys=\"ctrl+x\",\n\n    timeout=None,\n\n    strings_path=\"strings\",\n\n    fzf_path=\"fzf\",\n\n    rip_grep_path=\"rg.exe\",\n\n    add_flatcopy_sorted=True,\n\n    add_flatcopy=True,\n\n    add_extract_strings=True,\n\n    add_fuzzy_extract=True,\n\n    add_ripgrep=True,\n\n    add_open_file=True,\n\n    add_move_file=True,\n\n)\n\n\n\n# Some examples\n\n\n\n# Flatcopy - foldersep='\u01c0' means that the backslash \u2018\\\u2019 will be replaced by '\u01c0'. The replacement is important because there is no \u201cflat copy\u201d with a backslash in the path! All file types (pdf, jpg ... ) will get their own folder. If you want to save space, create a symlink instead of copying the whole file \n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt|\\.docx|\\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\\\testflatcopy_df', foldersep='\u01c0', symlink=False, copystat=True))\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt|\\.docx|\\.jpg')][:100].ff_flatcopy_sorted.apply(lambda x:x('f:\\\\testflatcopy_df\\\\symlink', foldersep='\u01c0', symlink=True, copystat=True)) # copystat will be ignored in this case\n\n\n\n# Flatcopy without sorting file types\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\\\newfoldertest\\\\flatcopy'))\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][2:100].ff_flatcopy.apply(lambda x:x('f:\\\\newfoldertest\\\\flatcopy\\\\sym',symlink=True))\n\n\n\n# Extract all strings from any file \n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_extract_strings.apply(lambda x:x(exit_keys='ctrl+x', print_output=True, timeout=None))\n\n\n\n# Fuzzy search in any file\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_fuzzy.apply(lambda x:x('windows'))\n\n\n\n# Regex search in any file\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].ff_ripgrep.apply(lambda x:x(regular_expression='name', other_parameters='-i', exit_keys='ctrl+x', print_output=True, timeout=.1))\n\n\n\n# Executes os.startfile()\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:100].iloc[0].ff_open()\n\n\n\n# Moves files, keeps the folder structur\n\ndf.loc[df.aa_fullpath.str.contains(r'\\.txt')][:1].ff_move_file.apply(lambda x:x('f:\\\\newfoldertest'))\n\n\n\n\n\ndf\n\nOut[3]: \n\n                               aa_date  ... aa_filetype\n\n0  2022-10-23 12:18:58.767317900-03:00  ...          .0\n\n1  2022-10-23 05:48:51.755017400-03:00  ...       .yaml\n\n2  2022-10-23 05:51:47.520702700-03:00  ...        .jpg\n\n3  2022-10-23 05:51:46.817189600-03:00  ...        .jpg\n\n4  2022-10-23 05:48:51.755017400-03:00  ...       .yaml\n\n5  2022-10-23 12:18:58.767317900-03:00  ...        .csv\n\n6  2022-10-23 05:51:49.630625800-03:00  ...        .jpg\n\n7  2022-10-23 05:51:52.116036400-03:00  ...        .jpg\n\n8  2022-10-23 05:51:52.678404900-03:00  ...        .jpg\n\n9  2022-10-23 13:31:03.003835900-03:00  ...         NaN\n\n10 2022-10-23 12:18:59.267762600-03:00  ...         .pt\n\n11 2022-10-23 06:00:01.068658100-03:00  ...         .pt\n\n12 2022-10-23 07:23:56.117463800-03:00  ...         .pt\n\n13 2022-10-23 08:53:33.799414100-03:00  ...         .pt\n\n14 2022-10-23 12:18:59.048534200-03:00  ...         .pt\n\n[15 rows x 14 columns]\n\ndf2\n\nOut[4]: \n\n                               aa_date  ...                                        ff_flatcopy\n\n0  2022-10-23 12:18:58.767317900-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n1  2022-10-23 05:48:51.755017400-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n2  2022-10-23 05:51:47.520702700-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n3  2022-10-23 05:51:46.817189600-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n4  2022-10-23 05:48:51.755017400-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n5  2022-10-23 12:18:58.767317900-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n6  2022-10-23 05:51:49.630625800-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n7  2022-10-23 05:51:52.116036400-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n8  2022-10-23 05:51:52.678404900-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n9  2022-10-23 13:31:03.003835900-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n10 2022-10-23 12:18:59.267762600-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n11 2022-10-23 06:00:01.068658100-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n12 2022-10-23 07:23:56.117463800-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n13 2022-10-23 08:53:33.799414100-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n14 2022-10-23 12:18:59.048534200-03:00  ...  dest_folder:str, foldersep:str='\u01c0', symlink:bo...\n\n[15 rows x 21 columns]\n\n\n\n\n\n```\n\n\n\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Never mind explorer.exe, here is dirdf",
    "version": "0.11",
    "split_keywords": [
        "pandas",
        "dataframe",
        "series",
        "search",
        "files"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "86350fadd766b35dc1ebbb9933c42670",
                "sha256": "70cada75b3b1704eb6e79e479f4364f2bdb97132cce06b6f6a620f77958007c1"
            },
            "downloads": -1,
            "filename": "dirdf-0.11-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "86350fadd766b35dc1ebbb9933c42670",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 11175,
            "upload_time": "2022-12-27T13:18:23",
            "upload_time_iso_8601": "2022-12-27T13:18:23.818067Z",
            "url": "https://files.pythonhosted.org/packages/f7/8e/133c1cb761d1fcbdd5b3bb0bab1f838f69dba45e2e9a67285800f506d34a/dirdf-0.11-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "md5": "2e697536eb18586b61e42a9213014790",
                "sha256": "e5e0be03af0f11f2f28ab3f9eb3a75cfa0542925035b5e78fb1216a8eb8f28e2"
            },
            "downloads": -1,
            "filename": "dirdf-0.11.tar.gz",
            "has_sig": false,
            "md5_digest": "2e697536eb18586b61e42a9213014790",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 10558,
            "upload_time": "2022-12-27T13:18:25",
            "upload_time_iso_8601": "2022-12-27T13:18:25.349151Z",
            "url": "https://files.pythonhosted.org/packages/77/31/71a39baaf96381c01a5505c1126abefccb90e34c41bd167e19503f9c3a37/dirdf-0.11.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2022-12-27 13:18:25",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "github_user": "hansalemaos",
    "github_project": "dirdf",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "lcname": "dirdf"
}
        
Elapsed time: 0.04599s