fuzzylearn


Namefuzzylearn JSON
Version 1.2.0 PyPI version JSON
download
home_page
Summary
upload_time2023-06-16 06:42:18
maintainer
docs_urlNone
authordrhosseinjavedani
requires_python==3.8.5
license
keywords
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            ![GitHub Repo stars](https://img.shields.io/github/stars/drhosseinjavedani/fuzzylearn) ![GitHub forks](https://img.shields.io/github/forks/drhosseinjavedani/fuzzylearn) ![GitHub language count](https://img.shields.io/github/languages/count/drhosseinjavedani/fuzzylearn) ![GitHub repo size](https://img.shields.io/github/repo-size/drhosseinjavedani/fuzzylearn) ![GitHub](https://img.shields.io/github/license/drhosseinjavedani/fuzzylearn)![PyPI - Downloads](https://img.shields.io/pypi/dd/fuzzylearn) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fuzzylearn) 

# FuzzyLearn

FuzzyLearn is a new estimator for creating classification and regression estimators using fuzzy set concept. Some of it classes use [Optuna](https://optuna.readthedocs.io/en/stable/index.html) and [Ray tune Scikit-Learn API](https://docs.ray.io/en/latest/tune/api_docs/sklearn.html) to tune most of its hyper-parameters.


### Installation

FuzzyLearn package is available on PyPI and can be installed with pip:

```sh
pip install fuzzylearn
```

#### Example 1: Adult Dataset (Use FuzzyLearn as a Classifier)

```
from fuzzylearn.classification.fast.fast import FLClassifier
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import ray

urldata = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
# column names
col_names = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education-num",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
    "native-country",
    "label",
]
# read data
data = pd.read_csv(urldata, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(20000)
data.head()

data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0

data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1

data["label"] = data["label"].astype(int)

# Train test split

X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, stratify=y["label"], random_state=42
)


int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()


print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)


pipeline =Pipeline([
            # int missing values imputers
            ('intimputer', MeanMedianImputer(
                imputation_method='median', variables=int_cols)),
            # category missing values imputers
            ('catimputer', CategoricalImputer(variables=cat_cols)),
            #
            ('catencoder', OrdinalEncoder()),


 ])

X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)


start_time = time.time()
model = FLClassifier(number_of_intervals=5,fuzzy_type="triangular",fuzzy_cut=0.3,threshold=0.7,metric = 'euclidean')
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))

start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))

print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))


```

#### Example 2: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine)

```
from fuzzylearn.classification.fast.optimum import FLOptunaClassifier 
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import zipfile
import urllib.request


urldata = "https://archive.ics.uci.edu/static/public/2/adult.zip"
adult_data = 'fuzzylearn/data/adult.zip'
try:
    urllib.request.urlretrieve(urldata, adult_data)
except:
    print('error!')
with zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:
    zip_ref.extractall('fuzzylearn/data/adult')
folder_path = 'fuzzylearn/data/adult/'
dataset_filename = 'adult.data'
#df = pd.read_csv(folder_path + dataset_filename)


# column names
col_names = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education-num",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
    "native-country",
    "label",
]
# read data
data = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(1000)
data.head()

data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0

data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1

data["label"] = data["label"].astype(int)

# Train test split

X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, stratify=y["label"], random_state=42
)


int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()


print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)

pipeline_steps = []
if len(int_cols) > 0 :
  # append int missing values imputers
  pipeline_steps.append(('intimputer', MeanMedianImputer(
                imputation_method='median', variables=int_cols)))
if len(float_cols) > 0 :
  # append float missing values imputers
  pipeline_steps.append(('floatimputer', MeanMedianImputer(
                imputation_method='mean', variables=float_cols)))
if len(cat_cols) > 0 :
  # append cat missing values imputers
  pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))
  # encode categorical variables
  pipeline_steps.append(('catencoder', OrdinalEncoder()))


pipeline =Pipeline(pipeline_steps)

X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)


start_time = time.time()
model = FLOptunaClassifier(optimizer = "optuna",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average="weighted")',n_trials=100)
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))
start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))

print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))


```
#### Example 3: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine and Ray integrated.)

```
from fuzzylearn.classification.fast.optimum import FLOptunaClassifier 
from sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score
from feature_engine.imputation import CategoricalImputer, MeanMedianImputer
from category_encoders import OrdinalEncoder
from sklearn.pipeline import Pipeline
import pandas as pd
import time
from sklearn.model_selection import train_test_split
import zipfile
import urllib.request


urldata = "https://archive.ics.uci.edu/static/public/2/adult.zip"
adult_data = 'fuzzylearn/data/adult.zip'
try:
    urllib.request.urlretrieve(urldata, adult_data)
except:
    print('error!')
with zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:
    zip_ref.extractall('fuzzylearn/data/adult')
folder_path = 'fuzzylearn/data/adult/'
dataset_filename = 'adult.data'
#df = pd.read_csv(folder_path + dataset_filename)


# column names
col_names = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education-num",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
    "native-country",
    "label",
]
# read data
data = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=",")
# use sample of 1000 rows of data only
data = data.sample(1000)
data.head()

data.loc[data["label"] == "<=50K", "label"] = 0
data.loc[data["label"] == " <=50K", "label"] = 0

data.loc[data["label"] == ">50K", "label"] = 1
data.loc[data["label"] == " >50K", "label"] = 1

data["label"] = data["label"].astype(int)

# Train test split

X = data.loc[:, data.columns != "label"]
y = data.loc[:, data.columns == "label"]


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, stratify=y["label"], random_state=42
)


int_cols = X_train.select_dtypes(include=["int"]).columns.tolist()
float_cols = X_train.select_dtypes(include=["float"]).columns.tolist()
cat_cols = X_train.select_dtypes(include=["object"]).columns.tolist()


print('int_cols')
print(int_cols)
print('float_cols')
print(float_cols)
print('cat_cols')
print(cat_cols)

pipeline_steps = []
if len(int_cols) > 0 :
  # append int missing values imputers
  pipeline_steps.append(('intimputer', MeanMedianImputer(
                imputation_method='median', variables=int_cols)))
if len(float_cols) > 0 :
  # append float missing values imputers
  pipeline_steps.append(('floatimputer', MeanMedianImputer(
                imputation_method='mean', variables=float_cols)))
if len(cat_cols) > 0 :
  # append cat missing values imputers
  pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))
  # encode categorical variables
  pipeline_steps.append(('catencoder', OrdinalEncoder()))


pipeline =Pipeline(pipeline_steps)

X_train = pipeline.fit_transform(X_train,y_train)
X_test = pipeline.transform(X_test)


start_time = time.time()
model = FLOptunaClassifier(optimizer = "optuna_ray",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average="weighted")',n_trials=100)
model.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)
print("--- %s seconds for training ---" % (time.time() - start_time))
start_time = time.time()
y_pred = model.predict(X=X_test)
print("--- %s seconds for prediction ---" % (time.time() - start_time))

print("classification_report :")
print(classification_report(y_test, y_pred))
print("confusion_matrix : ")
print(confusion_matrix(y_test, y_pred))
print("roc_auc_score : ")
print(roc_auc_score(y_test, y_pred))
print("f1_score : ")
print(f1_score(y_test, y_pred))




```
There are some more examples available in the [examples](https://github.com/drhosseinjavedani/fuzzylearn/tree/main/fuzzylearn/examples) webpage. 

#### License
Licensed under the [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause) License.
            

Raw data

            {
    "_id": null,
    "home_page": "",
    "name": "fuzzylearn",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "==3.8.5",
    "maintainer_email": "",
    "keywords": "",
    "author": "drhosseinjavedani",
    "author_email": "h.javedani@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/4f/b9/1d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685/fuzzylearn-1.2.0.tar.gz",
    "platform": null,
    "description": "![GitHub Repo stars](https://img.shields.io/github/stars/drhosseinjavedani/fuzzylearn) ![GitHub forks](https://img.shields.io/github/forks/drhosseinjavedani/fuzzylearn) ![GitHub language count](https://img.shields.io/github/languages/count/drhosseinjavedani/fuzzylearn) ![GitHub repo size](https://img.shields.io/github/repo-size/drhosseinjavedani/fuzzylearn) ![GitHub](https://img.shields.io/github/license/drhosseinjavedani/fuzzylearn)![PyPI - Downloads](https://img.shields.io/pypi/dd/fuzzylearn) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/fuzzylearn) \n\n# FuzzyLearn\n\nFuzzyLearn is a new estimator for creating classification and regression estimators using fuzzy set concept. Some of it classes use [Optuna](https://optuna.readthedocs.io/en/stable/index.html) and [Ray tune Scikit-Learn API](https://docs.ray.io/en/latest/tune/api_docs/sklearn.html) to tune most of its hyper-parameters.\n\n\n### Installation\n\nFuzzyLearn package is available on PyPI and can be installed with pip:\n\n```sh\npip install fuzzylearn\n```\n\n#### Example 1: Adult Dataset (Use FuzzyLearn as a Classifier)\n\n```\nfrom fuzzylearn.classification.fast.fast import FLClassifier\nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport ray\n\nurldata = \"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data\"\n# column names\ncol_names = [\n    \"age\",\n    \"workclass\",\n    \"fnlwgt\",\n    \"education\",\n    \"education-num\",\n    \"marital-status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"sex\",\n    \"capital-gain\",\n    \"capital-loss\",\n    \"hours-per-week\",\n    \"native-country\",\n    \"label\",\n]\n# read data\ndata = pd.read_csv(urldata, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(20000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\n\npipeline =Pipeline([\n            # int missing values imputers\n            ('intimputer', MeanMedianImputer(\n                imputation_method='median', variables=int_cols)),\n            # category missing values imputers\n            ('catimputer', CategoricalImputer(variables=cat_cols)),\n            #\n            ('catencoder', OrdinalEncoder()),\n\n\n ])\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLClassifier(number_of_intervals=5,fuzzy_type=\"triangular\",fuzzy_cut=0.3,threshold=0.7,metric = 'euclidean')\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\n\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n```\n\n#### Example 2: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine)\n\n```\nfrom fuzzylearn.classification.fast.optimum import FLOptunaClassifier \nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport zipfile\nimport urllib.request\n\n\nurldata = \"https://archive.ics.uci.edu/static/public/2/adult.zip\"\nadult_data = 'fuzzylearn/data/adult.zip'\ntry:\n    urllib.request.urlretrieve(urldata, adult_data)\nexcept:\n    print('error!')\nwith zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:\n    zip_ref.extractall('fuzzylearn/data/adult')\nfolder_path = 'fuzzylearn/data/adult/'\ndataset_filename = 'adult.data'\n#df = pd.read_csv(folder_path + dataset_filename)\n\n\n# column names\ncol_names = [\n    \"age\",\n    \"workclass\",\n    \"fnlwgt\",\n    \"education\",\n    \"education-num\",\n    \"marital-status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"sex\",\n    \"capital-gain\",\n    \"capital-loss\",\n    \"hours-per-week\",\n    \"native-country\",\n    \"label\",\n]\n# read data\ndata = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(1000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\npipeline_steps = []\nif len(int_cols) > 0 :\n  # append int missing values imputers\n  pipeline_steps.append(('intimputer', MeanMedianImputer(\n                imputation_method='median', variables=int_cols)))\nif len(float_cols) > 0 :\n  # append float missing values imputers\n  pipeline_steps.append(('floatimputer', MeanMedianImputer(\n                imputation_method='mean', variables=float_cols)))\nif len(cat_cols) > 0 :\n  # append cat missing values imputers\n  pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))\n  # encode categorical variables\n  pipeline_steps.append(('catencoder', OrdinalEncoder()))\n\n\npipeline =Pipeline(pipeline_steps)\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLOptunaClassifier(optimizer = \"optuna\",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average=\"weighted\")',n_trials=100)\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n```\n#### Example 3: Adult Dataset (Use FuzzyLearn as a Classifier with Optuna optimization engine and Ray integrated.)\n\n```\nfrom fuzzylearn.classification.fast.optimum import FLOptunaClassifier \nfrom sklearn.metrics import classification_report,confusion_matrix,f1_score,roc_auc_score\nfrom feature_engine.imputation import CategoricalImputer, MeanMedianImputer\nfrom category_encoders import OrdinalEncoder\nfrom sklearn.pipeline import Pipeline\nimport pandas as pd\nimport time\nfrom sklearn.model_selection import train_test_split\nimport zipfile\nimport urllib.request\n\n\nurldata = \"https://archive.ics.uci.edu/static/public/2/adult.zip\"\nadult_data = 'fuzzylearn/data/adult.zip'\ntry:\n    urllib.request.urlretrieve(urldata, adult_data)\nexcept:\n    print('error!')\nwith zipfile.ZipFile('fuzzylearn/data/adult.zip', 'r') as zip_ref:\n    zip_ref.extractall('fuzzylearn/data/adult')\nfolder_path = 'fuzzylearn/data/adult/'\ndataset_filename = 'adult.data'\n#df = pd.read_csv(folder_path + dataset_filename)\n\n\n# column names\ncol_names = [\n    \"age\",\n    \"workclass\",\n    \"fnlwgt\",\n    \"education\",\n    \"education-num\",\n    \"marital-status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"sex\",\n    \"capital-gain\",\n    \"capital-loss\",\n    \"hours-per-week\",\n    \"native-country\",\n    \"label\",\n]\n# read data\ndata = pd.read_csv(folder_path + dataset_filename, header=None, names=col_names, sep=\",\")\n# use sample of 1000 rows of data only\ndata = data.sample(1000)\ndata.head()\n\ndata.loc[data[\"label\"] == \"<=50K\", \"label\"] = 0\ndata.loc[data[\"label\"] == \" <=50K\", \"label\"] = 0\n\ndata.loc[data[\"label\"] == \">50K\", \"label\"] = 1\ndata.loc[data[\"label\"] == \" >50K\", \"label\"] = 1\n\ndata[\"label\"] = data[\"label\"].astype(int)\n\n# Train test split\n\nX = data.loc[:, data.columns != \"label\"]\ny = data.loc[:, data.columns == \"label\"]\n\n\nX_train, X_test, y_train, y_test = train_test_split(\n    X, y, test_size=0.5, stratify=y[\"label\"], random_state=42\n)\n\n\nint_cols = X_train.select_dtypes(include=[\"int\"]).columns.tolist()\nfloat_cols = X_train.select_dtypes(include=[\"float\"]).columns.tolist()\ncat_cols = X_train.select_dtypes(include=[\"object\"]).columns.tolist()\n\n\nprint('int_cols')\nprint(int_cols)\nprint('float_cols')\nprint(float_cols)\nprint('cat_cols')\nprint(cat_cols)\n\npipeline_steps = []\nif len(int_cols) > 0 :\n  # append int missing values imputers\n  pipeline_steps.append(('intimputer', MeanMedianImputer(\n                imputation_method='median', variables=int_cols)))\nif len(float_cols) > 0 :\n  # append float missing values imputers\n  pipeline_steps.append(('floatimputer', MeanMedianImputer(\n                imputation_method='mean', variables=float_cols)))\nif len(cat_cols) > 0 :\n  # append cat missing values imputers\n  pipeline_steps.append(('catimputer', CategoricalImputer(variables=cat_cols)))\n  # encode categorical variables\n  pipeline_steps.append(('catencoder', OrdinalEncoder()))\n\n\npipeline =Pipeline(pipeline_steps)\n\nX_train = pipeline.fit_transform(X_train,y_train)\nX_test = pipeline.transform(X_test)\n\n\nstart_time = time.time()\nmodel = FLOptunaClassifier(optimizer = \"optuna_ray\",metrics_list=['cosine','manhattan'],fuzzy_type_list=['simple','triangular'],fuzzy_cut_range=[0.05,0.45],number_of_intervals_range=[5,14],threshold_range=[0.1,12.0], error_measurement_metric= 'f1_score(y_true, y_pred, average=\"weighted\")',n_trials=100)\nmodel.fit(X=X_train,y=y_train,X_valid=None,y_valid=None)\nprint(\"--- %s seconds for training ---\" % (time.time() - start_time))\nstart_time = time.time()\ny_pred = model.predict(X=X_test)\nprint(\"--- %s seconds for prediction ---\" % (time.time() - start_time))\n\nprint(\"classification_report :\")\nprint(classification_report(y_test, y_pred))\nprint(\"confusion_matrix : \")\nprint(confusion_matrix(y_test, y_pred))\nprint(\"roc_auc_score : \")\nprint(roc_auc_score(y_test, y_pred))\nprint(\"f1_score : \")\nprint(f1_score(y_test, y_pred))\n\n\n\n\n```\nThere are some more examples available in the [examples](https://github.com/drhosseinjavedani/fuzzylearn/tree/main/fuzzylearn/examples) webpage. \n\n#### License\nLicensed under the [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause) License.",
    "bugtrack_url": null,
    "license": "",
    "summary": "",
    "version": "1.2.0",
    "project_urls": null,
    "split_keywords": [],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "57addd3b62b6bf7ddd9c07bc11a38d56b5a9cc3cec5d8dc4777282bef403ae07",
                "md5": "ea889027e3383fedeb67c649f49f1958",
                "sha256": "6bb8cf4ef0e43b75606160460d4579871a865034d46d918e4181f96b817debf3"
            },
            "downloads": -1,
            "filename": "fuzzylearn-1.2.0-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "ea889027e3383fedeb67c649f49f1958",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": "==3.8.5",
            "size": 46144,
            "upload_time": "2023-06-16T06:42:17",
            "upload_time_iso_8601": "2023-06-16T06:42:17.053452Z",
            "url": "https://files.pythonhosted.org/packages/57/ad/dd3b62b6bf7ddd9c07bc11a38d56b5a9cc3cec5d8dc4777282bef403ae07/fuzzylearn-1.2.0-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "4fb91d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685",
                "md5": "d3f101f01ca46c6426ba3298937d9144",
                "sha256": "8567445c1a21c314f95805cb3363617051527155eb9f071b09834d2e9655b2a1"
            },
            "downloads": -1,
            "filename": "fuzzylearn-1.2.0.tar.gz",
            "has_sig": false,
            "md5_digest": "d3f101f01ca46c6426ba3298937d9144",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": "==3.8.5",
            "size": 21549,
            "upload_time": "2023-06-16T06:42:18",
            "upload_time_iso_8601": "2023-06-16T06:42:18.787496Z",
            "url": "https://files.pythonhosted.org/packages/4f/b9/1d9f1fad1e8c2c56ca9f2a7498a049555330f21ec27dfbbe00898cfe4685/fuzzylearn-1.2.0.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2023-06-16 06:42:18",
    "github": false,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "lcname": "fuzzylearn"
}
        
Elapsed time: 0.10295s