### Creates a DataFrame/Series from duplicates
```python
pip install a-pandas-ex-duplicates-to-df
from a_pandas_ex_duplicates_to_df import pd_add_duplicates_to_df
import pandas as pd
pd_add_duplicates_to_df()
df = pd.read_csv("https://github.com/pandas-dev/pandas/raw/main/doc/data/titanic.csv")
df2 = pd.read_csv("https://github.com/pandas-dev/pandas/raw/main/doc/data/titanic.csv")[
:50
]
df = pd.concat([df, df2], ignore_index=True)
dupl = df.ds_get_duplicates()
dupl
Out[5]:
PassengerId Survived Pclass ... Cabin Embarked DUPLICATEINDEX
0 1 0 3 ... NaN S (0, 891)
1 1 0 3 ... NaN S (0, 891)
2 10 1 2 ... NaN C (9, 900)
3 10 1 2 ... NaN C (9, 900)
4 11 1 3 ... G6 S (10, 901)
.. ... ... ... ... ... ... ...
95 7 0 1 ... E46 S (6, 897)
96 8 0 3 ... NaN S (7, 898)
97 8 0 3 ... NaN S (7, 898)
98 9 1 3 ... NaN S (8, 899)
99 9 1 3 ... NaN S (8, 899)
[100 rows x 13 columns]
dupl2=df.ds_get_duplicates(subset=['Survived'])
dupl2
Out[7]:
PassengerId ... DUPLICATEINDEX
0 1 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...
1 5 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...
2 6 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...
3 7 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...
4 8 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...
.. ... ... ...
936 37 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...
937 40 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...
938 44 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...
939 45 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...
940 48 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...
[941 rows x 13 columns]
df.Embarked.ds_get_duplicates()
Embarked DUPLICATEINDEX
0 NaN (61, 829)
1 NaN (61, 829)
2 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...
3 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...
4 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...
.. ... ...
936 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...
937 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...
938 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...
939 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...
940 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...
[941 rows x 2 columns]
```
Raw data
{
"_id": null,
"home_page": "https://github.com/hansalemaos/a_pandas_ex_duplicates_to_df",
"name": "a-pandas-ex-duplicates-to-df",
"maintainer": "",
"docs_url": null,
"requires_python": "",
"maintainer_email": "",
"keywords": "pandas,DataFrame,Series,duplicates",
"author": "Johannes Fischer",
"author_email": "<aulasparticularesdealemaosp@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/e6/d9/273645c58ffbe7223c8c76e93ee447672ebaf74c7dce8c2a1bf4f64ca1d2/a_pandas_ex_duplicates_to_df-0.10.tar.gz",
"platform": null,
"description": "\n### Creates a DataFrame/Series from duplicates \n\n\n\n```python\n\npip install a-pandas-ex-duplicates-to-df\n\n\n\nfrom a_pandas_ex_duplicates_to_df import pd_add_duplicates_to_df\n\nimport pandas as pd\n\npd_add_duplicates_to_df()\n\ndf = pd.read_csv(\"https://github.com/pandas-dev/pandas/raw/main/doc/data/titanic.csv\")\n\ndf2 = pd.read_csv(\"https://github.com/pandas-dev/pandas/raw/main/doc/data/titanic.csv\")[\n\n :50\n\n]\n\ndf = pd.concat([df, df2], ignore_index=True)\n\ndupl = df.ds_get_duplicates()\n\n\n\n\n\ndupl\n\nOut[5]: \n\n PassengerId Survived Pclass ... Cabin Embarked DUPLICATEINDEX\n\n0 1 0 3 ... NaN S (0, 891)\n\n1 1 0 3 ... NaN S (0, 891)\n\n2 10 1 2 ... NaN C (9, 900)\n\n3 10 1 2 ... NaN C (9, 900)\n\n4 11 1 3 ... G6 S (10, 901)\n\n.. ... ... ... ... ... ... ...\n\n95 7 0 1 ... E46 S (6, 897)\n\n96 8 0 3 ... NaN S (7, 898)\n\n97 8 0 3 ... NaN S (7, 898)\n\n98 9 1 3 ... NaN S (8, 899)\n\n99 9 1 3 ... NaN S (8, 899)\n\n[100 rows x 13 columns]\n\n\n\n\n\ndupl2=df.ds_get_duplicates(subset=['Survived'])\n\ndupl2\n\nOut[7]: \n\n PassengerId ... DUPLICATEINDEX\n\n0 1 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...\n\n1 5 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...\n\n2 6 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...\n\n3 7 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...\n\n4 8 ... (0, 4, 5, 6, 7, 12, 13, 14, 16, 18, 20, 24, 26...\n\n.. ... ... ...\n\n936 37 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...\n\n937 40 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...\n\n938 44 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...\n\n939 45 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...\n\n940 48 ... (1, 2, 3, 8, 9, 10, 11, 15, 17, 19, 21, 22, 23...\n\n[941 rows x 13 columns]\n\n\n\n\n\ndf.Embarked.ds_get_duplicates()\n\n\n\n Embarked DUPLICATEINDEX\n\n0 NaN (61, 829)\n\n1 NaN (61, 829)\n\n2 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...\n\n3 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...\n\n4 C (1, 9, 19, 26, 30, 31, 34, 36, 39, 42, 43, 48,...\n\n.. ... ...\n\n936 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...\n\n937 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...\n\n938 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...\n\n939 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...\n\n940 S (0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, ...\n\n[941 rows x 2 columns]\n\n\n\n```\n\n\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Creates a DataFrame/Series from duplicates",
"version": "0.10",
"split_keywords": [
"pandas",
"dataframe",
"series",
"duplicates"
],
"urls": [
{
"comment_text": "",
"digests": {
"md5": "269f01f6abfdca7dfaec4ffee57f60ad",
"sha256": "2dec20ca311c13bda87d690a25879b81938793ac150750037cb401f7f3df1502"
},
"downloads": -1,
"filename": "a_pandas_ex_duplicates_to_df-0.10-py3-none-any.whl",
"has_sig": false,
"md5_digest": "269f01f6abfdca7dfaec4ffee57f60ad",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": null,
"size": 6222,
"upload_time": "2022-12-04T07:43:39",
"upload_time_iso_8601": "2022-12-04T07:43:39.957859Z",
"url": "https://files.pythonhosted.org/packages/c0/8f/981bc62bbeacba7dd0c498e1a1386d5e59fe40eb92c1f5bad145738ed61d/a_pandas_ex_duplicates_to_df-0.10-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"md5": "19326e7d718451b173af1a6f8ae21cd4",
"sha256": "10ec9fe3aee744f67a8a1293e9d1740fc7d311e352dcc37069ef1c2dbc4ea9cd"
},
"downloads": -1,
"filename": "a_pandas_ex_duplicates_to_df-0.10.tar.gz",
"has_sig": false,
"md5_digest": "19326e7d718451b173af1a6f8ae21cd4",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 4363,
"upload_time": "2022-12-04T07:43:42",
"upload_time_iso_8601": "2022-12-04T07:43:42.027610Z",
"url": "https://files.pythonhosted.org/packages/e6/d9/273645c58ffbe7223c8c76e93ee447672ebaf74c7dce8c2a1bf4f64ca1d2/a_pandas_ex_duplicates_to_df-0.10.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2022-12-04 07:43:42",
"github": true,
"gitlab": false,
"bitbucket": false,
"github_user": "hansalemaos",
"github_project": "a_pandas_ex_duplicates_to_df",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "pandas",
"specs": []
}
],
"lcname": "a-pandas-ex-duplicates-to-df"
}