Name | fuzzylearn JSON |
Version |
1.2.0
JSON |
| download |
home_page | |
Summary | |
upload_time | 2023-06-16 06:42:18 |
maintainer | |
docs_url | None |
author | drhosseinjavedani |
requires_python | ==3.8.5 |
license | |
keywords |
|
VCS |
|
bugtrack_url |
|
requirements |
No requirements were recorded.
|
Travis-CI |
No Travis.
|
coveralls test coverage |
No coveralls.
|
![GitHub Repo stars](https://img.shields.io/github/stars/drhosseinjavedani/fuzzylearn) ![GitHub forks](https://img.shields.io/github/forks/drhosseinjavedani/fuzzylearn) ![GitHub language count](https://img.shields.io/github/languages/count/drhosseinjavedani/fuzzylearn) ![GitHub repo size](https://img.shields.io/github/repo-size/drhosseinjavedani/fuzzylearn) ![GitHub](https://img.shields.io/github/license/drhosseinjavedani/fuzzylearn)![PyPI - Downloads](https://img.shields.io/pypi/dd/fuzzylearn) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fuzzylearn)
# FuzzyLearn
FuzzyLearn is a new estimator for creating classification and regression estimators using fuzzy set concept. Some of it classes use [Optuna](https://optuna.readthedocs.io/en/stable/index.html) and [Ray tune Scikit-Learn API](https://docs.ray.io/en/latest/tune/api_docs/sklearn.html) to tune most of its hyper-parameters.
### Installation
FuzzyLearn package is available on PyPI and can be installed with pip:
```sh
pip install fuzzylearn
```
#### Example 1: Adult Dataset (Use FuzzyLearn as a Classifier)
```
from fuzzylearn.classification.fast.fast import FLClassifier
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import ray
urldata = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
# column names
col_names = [
"age",
"workclass",
"fnlwgt",
"education",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"label",
]
# read data
data = pd.read_csv(urldata, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(20000)
data.head()
data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0
data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1
data["label"] = data["label"].astype(int)
# Train test split
X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y["label"], random_state=42
)
int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)
pipeline =Pipeline([
# int missing values imputers
('intimputer', MeanMedianImputer(
imputation_method='median', variables=int_cols)),
# category missing values imputers
('catimputer', CategoricalImputer(variables=cat_cols)),
#
('catencoder', OrdinalEncoder()),
])
X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)
start_time = time.time()
model = FLClassifier(number_of_intervals=5,fuzzy_type="triangular",fuzzy_cut=0.3,threshold=0.7,metric = 'euclidean')
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))
start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))
print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))
```
#### Example 2: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine)
```
from fuzzylearn.classification.fast.optimum import FLOptunaClassifier
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import zipfile
import urllib.request
urldata = "https://archive.ics.uci.edu/static/public/2/adult.zip"
adult_data = 'fuzzylearn/data/adult.zip'
try:
urllib.request.urlretrieve(urldata, adult_data)
except:
print('error!')
with zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:
zip_ref.extractall('fuzzylearn/data/adult')
folder_path = 'fuzzylearn/data/adult/'
dataset_filename = 'adult.data'
#df = pd.read_csv(folder_path + dataset_filename)
# column names
col_names = [
"age",
"workclass",
"fnlwgt",
"education",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"label",
]
# read data
data = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(1000)
data.head()
data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0
data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1
data["label"] = data["label"].astype(int)
# Train test split
X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y["label"], random_state=42
)
int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)
pipeline_steps = []
if len(int_cols) > 0 :
# append int missing values imputers
pipeline_steps.append(('intimputer', MeanMedianImputer(
imputation_method='median', variables=int_cols)))
if len(float_cols) > 0 :
# append float missing values imputers
pipeline_steps.append(('floatimputer', MeanMedianImputer(
imputation_method='mean', variables=float_cols)))
if len(cat_cols) > 0 :
# append cat missing values imputers
pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))
# encode categorical variables
pipeline_steps.append(('catencoder', OrdinalEncoder()))
pipeline =Pipeline(pipeline_steps)
X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)
start_time = time.time()
model = FLOptunaClassifier(optimizer = "optuna",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average="weighted")',n_trials=100)
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))
start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))
print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))
```
#### Example 3: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine and Ray integrated.)
```
from fuzzylearn.classification.fast.optimum import FLOptunaClassifier
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import zipfile
import urllib.request
urldata = "https://archive.ics.uci.edu/static/public/2/adult.zip"
adult_data = 'fuzzylearn/data/adult.zip'
try:
urllib.request.urlretrieve(urldata, adult_data)
except:
print('error!')
with zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:
zip_ref.extractall('fuzzylearn/data/adult')
folder_path = 'fuzzylearn/data/adult/'
dataset_filename = 'adult.data'
#df = pd.read_csv(folder_path + dataset_filename)
# column names
col_names = [
"age",
"workclass",
"fnlwgt",
"education",
"education-num",
"marital-status",
"occupation",
"relationship",
"race",
"sex",
"capital-gain",
"capital-loss",
"hours-per-week",
"native-country",
"label",
]
# read data
data = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(1000)
data.head()
data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0
data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1
data["label"] = data["label"].astype(int)
# Train test split
X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y["label"], random_state=42
)
int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()
print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)
pipeline_steps = []
if len(int_cols) > 0 :
# append int missing values imputers
pipeline_steps.append(('intimputer', MeanMedianImputer(
imputation_method='median', variables=int_cols)))
if len(float_cols) > 0 :
# append float missing values imputers
pipeline_steps.append(('floatimputer', MeanMedianImputer(
imputation_method='mean', variables=float_cols)))
if len(cat_cols) > 0 :
# append cat missing values imputers
pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))
# encode categorical variables
pipeline_steps.append(('catencoder', OrdinalEncoder()))
pipeline =Pipeline(pipeline_steps)
X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)
start_time = time.time()
model = FLOptunaClassifier(optimizer = "optuna_ray",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average="weighted")',n_trials=100)
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))
start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))
print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))
```
There are some more examples available in the [examples](https://github.com/drhosseinjavedani/fuzzylearn/tree/main/fuzzylearn/examples) webpage.
#### License
Licensed under the [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause) License.
Raw data
{
"_id": null,
"home_page": "",
"name": "fuzzylearn",
"maintainer": "",
"docs_url": null,
"requires_python": "==3.8.5",
"maintainer_email": "",
"keywords": "",
"author": "drhosseinjavedani",
"author_email": "h.javedani@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/4f/b9/1d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685/fuzzylearn-1.2.0.tar.gz",
"platform": null,
"description": "![GitHub Repo stars](https://img.shields.io/github/stars/drhosseinjavedani/fuzzylearn) ![GitHub forks](https://img.shields.io/github/forks/drhosseinjavedani/fuzzylearn) ![GitHub language count](https://img.shields.io/github/languages/count/drhosseinjavedani/fuzzylearn) ![GitHub repo size](https://img.shields.io/github/repo-size/drhosseinjavedani/fuzzylearn) ![GitHub](https://img.shields.io/github/license/drhosseinjavedani/fuzzylearn)![PyPI - Downloads](https://img.shields.io/pypi/dd/fuzzylearn) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fuzzylearn) \n\n# FuzzyLearn\n\nFuzzyLearn is a new estimator for creating classification and regression estimators using fuzzy set concept. Some of it classes use [Optuna](https://optuna.readthedocs.io/en/stable/index.html) and [Ray tune Scikit-Learn API](https://docs.ray.io/en/latest/tune/api_docs/sklearn.html) to tune most of its hyper-parameters.\n\n\n### Installation\n\nFuzzyLearn package is available on PyPI and can be installed with pip:\n\n```sh\npip install fuzzylearn\n```\n\n#### Example 1: Adult Dataset (Use FuzzyLearn as a Classifier)\n\n```\nfrom fuzzylearn.classification.fast.fast import FLClassifier\nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport ray\n\nurldata = \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\"\n# column names\ncol_names = [\n \"age\",\n \"workclass\",\n \"fnlwgt\",\n \"education\",\n \"education-num\",\n \"marital-status\",\n \"occupation\",\n \"relationship\",\n \"race\",\n \"sex\",\n \"capital-gain\",\n \"capital-loss\",\n \"hours-per-week\",\n \"native-country\",\n \"label\",\n]\n# read data\ndata = pd.read_csv(urldata, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(20000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\n\npipeline =Pipeline([\n # int missing values imputers\n ('intimputer', MeanMedianImputer(\n imputation_method='median', variables=int_cols)),\n # category missing values imputers\n ('catimputer', CategoricalImputer(variables=cat_cols)),\n #\n ('catencoder', OrdinalEncoder()),\n\n\n ])\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLClassifier(number_of_intervals=5,fuzzy_type=\"triangular\",fuzzy_cut=0.3,threshold=0.7,metric = 'euclidean')\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\n\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n```\n\n#### Example 2: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine)\n\n```\nfrom fuzzylearn.classification.fast.optimum import FLOptunaClassifier \nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport zipfile\nimport urllib.request\n\n\nurldata = \"https://archive.ics.uci.edu/static/public/2/adult.zip\"\nadult_data = 'fuzzylearn/data/adult.zip'\ntry:\n urllib.request.urlretrieve(urldata, adult_data)\nexcept:\n print('error!')\nwith zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:\n zip_ref.extractall('fuzzylearn/data/adult')\nfolder_path = 'fuzzylearn/data/adult/'\ndataset_filename = 'adult.data'\n#df = pd.read_csv(folder_path + dataset_filename)\n\n\n# column names\ncol_names = [\n \"age\",\n \"workclass\",\n \"fnlwgt\",\n \"education\",\n \"education-num\",\n \"marital-status\",\n \"occupation\",\n \"relationship\",\n \"race\",\n \"sex\",\n \"capital-gain\",\n \"capital-loss\",\n \"hours-per-week\",\n \"native-country\",\n \"label\",\n]\n# read data\ndata = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(1000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\npipeline_steps = []\nif len(int_cols) > 0 :\n # append int missing values imputers\n pipeline_steps.append(('intimputer', MeanMedianImputer(\n imputation_method='median', variables=int_cols)))\nif len(float_cols) > 0 :\n # append float missing values imputers\n pipeline_steps.append(('floatimputer', MeanMedianImputer(\n imputation_method='mean', variables=float_cols)))\nif len(cat_cols) > 0 :\n # append cat missing values imputers\n pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))\n # encode categorical variables\n pipeline_steps.append(('catencoder', OrdinalEncoder()))\n\n\npipeline =Pipeline(pipeline_steps)\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLOptunaClassifier(optimizer = \"optuna\",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average=\"weighted\")',n_trials=100)\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n```\n#### Example 3: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine and Ray integrated.)\n\n```\nfrom fuzzylearn.classification.fast.optimum import FLOptunaClassifier \nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport zipfile\nimport urllib.request\n\n\nurldata = \"https://archive.ics.uci.edu/static/public/2/adult.zip\"\nadult_data = 'fuzzylearn/data/adult.zip'\ntry:\n urllib.request.urlretrieve(urldata, adult_data)\nexcept:\n print('error!')\nwith zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:\n zip_ref.extractall('fuzzylearn/data/adult')\nfolder_path = 'fuzzylearn/data/adult/'\ndataset_filename = 'adult.data'\n#df = pd.read_csv(folder_path + dataset_filename)\n\n\n# column names\ncol_names = [\n \"age\",\n \"workclass\",\n \"fnlwgt\",\n \"education\",\n \"education-num\",\n \"marital-status\",\n \"occupation\",\n \"relationship\",\n \"race\",\n \"sex\",\n \"capital-gain\",\n \"capital-loss\",\n \"hours-per-week\",\n \"native-country\",\n \"label\",\n]\n# read data\ndata = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(1000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\npipeline_steps = []\nif len(int_cols) > 0 :\n # append int missing values imputers\n pipeline_steps.append(('intimputer', MeanMedianImputer(\n imputation_method='median', variables=int_cols)))\nif len(float_cols) > 0 :\n # append float missing values imputers\n pipeline_steps.append(('floatimputer', MeanMedianImputer(\n imputation_method='mean', variables=float_cols)))\nif len(cat_cols) > 0 :\n # append cat missing values imputers\n pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))\n # encode categorical variables\n pipeline_steps.append(('catencoder', OrdinalEncoder()))\n\n\npipeline =Pipeline(pipeline_steps)\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLOptunaClassifier(optimizer = \"optuna_ray\",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average=\"weighted\")',n_trials=100)\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n\n\n```\nThere are some more examples available in the [examples](https://github.com/drhosseinjavedani/fuzzylearn/tree/main/fuzzylearn/examples) webpage. \n\n#### License\nLicensed under the [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause) License.",
"bugtrack_url": null,
"license": "",
"summary": "",
"version": "1.2.0",
"project_urls": null,
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "57addd3b62b6bf7ddd9c07bc11a38d56b5a9cc3cec5d8dc4777282bef403ae07",
"md5": "ea889027e3383fedeb67c649f49f1958",
"sha256": "6bb8cf4ef0e43b75606160460d4579871a865034d46d918e4181f96b817debf3"
},
"downloads": -1,
"filename": "fuzzylearn-1.2.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "ea889027e3383fedeb67c649f49f1958",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": "==3.8.5",
"size": 46144,
"upload_time": "2023-06-16T06:42:17",
"upload_time_iso_8601": "2023-06-16T06:42:17.053452Z",
"url": "https://files.pythonhosted.org/packages/57/ad/dd3b62b6bf7ddd9c07bc11a38d56b5a9cc3cec5d8dc4777282bef403ae07/fuzzylearn-1.2.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "4fb91d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685",
"md5": "d3f101f01ca46c6426ba3298937d9144",
"sha256": "8567445c1a21c314f95805cb3363617051527155eb9f071b09834d2e9655b2a1"
},
"downloads": -1,
"filename": "fuzzylearn-1.2.0.tar.gz",
"has_sig": false,
"md5_digest": "d3f101f01ca46c6426ba3298937d9144",
"packagetype": "sdist",
"python_version": "source",
"requires_python": "==3.8.5",
"size": 21549,
"upload_time": "2023-06-16T06:42:18",
"upload_time_iso_8601": "2023-06-16T06:42:18.787496Z",
"url": "https://files.pythonhosted.org/packages/4f/b9/1d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685/fuzzylearn-1.2.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-06-16 06:42:18",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "fuzzylearn"
}