spinesTS


NamespinesTS JSON
Version 0.4.11 PyPI version JSON
download
home_pagehttps://github.com/BirchKwok/spinesTS
SummaryspinesTS, a powerful timeseries toolsets.
upload_time2024-01-23 09:45:20
maintainer
docs_urlNone
authorBirch Kwok
requires_python
license
keywords machine learning
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # spinesTS 
## Time Series forecasting toolsets

- [Install](https://github.com/BirchKwok/spinesTS#install)
- [spinesTS Modules](https://github.com/BirchKwok/spinesTS#spinests-modules)
- [Tutorials](https://github.com/BirchKwok/spinesTS#tutorials)
  - [Getting started](https://github.com/BirchKwok/spinesTS#getting-started)
  - [Using nn module](https://github.com/BirchKwok/spinesTS#using-nn-module)
    - [StackingRNN](https://github.com/BirchKwok/spinesTS#stackingrnn)
    - [GAUNet](https://github.com/BirchKwok/spinesTS#gaunet)
    - [Time2VecNet](https://github.com/BirchKwok/spinesTS#time2vecnet)
  - [Using ml_model module](https://github.com/BirchKwok/spinesTS#using-ml_model-module)
    - [MultiStepRegressor](https://github.com/BirchKwok/spinesTS#multistepregressor)
    - [MultiOutputRegressor](https://github.com/BirchKwok/spinesTS#multioutputregressor)
    - [WideGBRT](https://github.com/BirchKwok/spinesTS#widegbrt)
  - [Using Data module](https://github.com/BirchKwok/spinesTS#using-data-module)


## Install
```
pip install spinesTS
```

## spinesTS Modules

- base: Model base class
- data: Built-in datasets and data wrapper classes
- feature_generator: Feature generation functions
- metrics: Model performance measurement function
- ml_model: Machine learning models
- nn: neural network models
- pipeline: Model fitting and prediction pipeline
- plotting: Visualization of model prediction results
- preprocessing: data preprocessing
- utils: Tool functions set
- layers: Neural network layer

## Tutorials

### Getting started
```python
# simple demo to predict Electric data
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt

from spinesTS.pipeline import Pipeline
from spinesTS.data import LoadElectricDataSets
from spinesTS.ml_model import MultiOutputRegressor
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    x_seq=df['value'], 
    y_seq=df['value'],  # The sequence of parameter y_seq is cut based on parameter x_seq
    # sliding window size, every 30 before days to predict after days
    window_size=30, 
    # predict after 30 days
    pred_steps=30, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# Assemble the model using Pipeline class
model = Pipeline([
    ('sc', StandardScaler()),
    ('model', MultiOutputRegressor(LGBMRegressor(random_state=2022)))
])
print("Model successfully initialization...")

# fitting model
model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)
print(f"r2_score is {model.score(x_test, y_test)}")

# plot the predicted results
fig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), 
       eval_slices='[:30]', labels=['y_test', 'y_pred'])
plt.show()
```
```
[output]:
x_train shape is (270, 30), x_test shape is (68, 30),y_train shape is (270, 30),y_test shape is (68, 30)
Model successfully initialization...
r2_score is 0.8186046606725977
```
![model prediction image](https://github.com/BirchKwok/spinesTS/blob/main/examples/visual/GettingStarted.png)

### Using nn module
#### StackingRNN
```python
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d
from spinesTS.nn import StackingRNN
from spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    x_seq=df['value'], 
    y_seq=df['value'],
    # sliding window size, every 128 before days to predict after days
    window_size=128, 
    # predict after 24 days goods incoming
    pred_steps=24, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = StackingRNN(in_features=128, out_features=24, 
                    random_seed=42, loss_fn='mae', 
                    learning_rate=0.001, dropout=0.1, diff_n=1, 
                    stack_num=2, bidirectional=True, device='cpu')

model.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,
             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler=None)
y_pred_cs = model.predict(x_test[-2:])
print(f"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}")
print(f"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}")
print(f"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}")
a = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))
plt.show()
```
#### GAUNet
```python
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d
from spinesTS.nn import GAUNet
from spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    x_seq=df['value'], 
    y_seq=df['value'],
    # sliding window size, every 128 before days to predict after days
    window_size=128, 
    # predict after 24 days 
    pred_steps=24, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = GAUNet(in_features=128, out_features=24, 
               random_seed=42, flip_features=False, 
               learning_rate=0.001, level=5, device='cpu')

model.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,
             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler='ReduceLROnPlateau')
y_pred_cs = model.predict(x_test[-2:])
print(f"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}")
print(f"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}")
print(f"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}")
a = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))
plt.show()
```
#### Time2VecNet
```python
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d
from spinesTS.nn import Time2VecNet
from spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    x_seq=df['value'], 
    y_seq=df['value'],
    # sliding window size, every 128 before days to predict after days
    window_size=128, 
    # predict after 24 days 
    pred_steps=24, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = Time2VecNet(in_features=128, out_features=24, 
               random_seed=42, flip_features=False, 
               learning_rate=0.001, device='cpu')

model.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,
             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler='CosineAnnealingLR')
y_pred_cs = model.predict(x_test[-2:])
print(f"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}")
print(f"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}")
print(f"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}")
a = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))
plt.show()
```

### Using ml_model module
#### MultiStepRegressor
```python
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.ml_model import MultiStepRegressor
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    df['value'], 
    df['value'],
    # sliding window size, every 30 before days to predict after days
    window_size=30, 
    # predict after 30 days 
    pred_steps=30, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = MultiStepRegressor(LGBMRegressor(random_state=2022))
print("Model successfully initialization...")

# fitting model
model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)
print(f"r2_score is {model.score(x_test, y_test)}")

# plot the predicted results
fig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), 
       eval_slices='[:30]', labels=['y_test', 'y_pred'])
plt.show()
```
#### MultiOutputRegressor
```python
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.ml_model import MultiOutputRegressor
from spinesTS.preprocessing import split_series
from spinesTS.plotting import plot2d


# load data
df = LoadElectricDataSets()

# split data
x_train, x_test, y_train, y_test = split_series(
    df['value'], 
    df['value'],
    # sliding window size, every 30 before days to predict after days
    window_size=30, 
    # predict after 30 days 
    pred_steps=30, 
    train_size=0.8
)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = MultiOutputRegressor(LGBMRegressor(random_state=2022))
print("Model successfully initialization...")

# fitting model
model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)
print(f"r2_score is {model.score(x_test, y_test)}")

# plot the predicted results
fig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), 
       eval_slices='[:30]', labels=['y_test', 'y_pred'])
plt.show()
```
#### WideGBRT
```python
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt

from spinesTS.data import LoadElectricDataSets
from spinesTS.ml_model import GBRTPreprocessing, WideGBRT
from spinesTS.plotting import plot2d


# load data
df = LoadElectricDataSets()

# split data and generate new features
gbrt_processor = GBRTPreprocessing(in_features=128, out_features=30, 
                                   target_col='value', train_size=0.8, date_col='date',
                                   differential_n=1  # The order of data differentiation.
                                   )
gbrt_processor.fit(df)

x_train, x_test, y_train, y_test = gbrt_processor.transform(df)

print(f"x_train shape is {x_train.shape}, "
      f"x_test shape is {x_test.shape}," 
      f"y_train shape is {y_train.shape},"
      f"y_test shape is {y_test.shape}")

# model initialization
model = WideGBRT(model=LGBMRegressor(random_state=2022))
print("Model successfully initialization...")

# fitting model
model.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)
print(f"r2_score is {model.score(x_test, y_test)}")

# plot the predicted results
fig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), 
       eval_slices='[:30]', labels=['y_test', 'y_pred'])
plt.show()
```

### Using Data module
```python
from spinesTS.data import *
series_data = BuiltInSeriesData(print_file_list=True)
```
```
+---+----------------------+----------------------------------------------+
|   | ds name              | columns                                      |
+---+----------------------+----------------------------------------------+
| 0 | ETTh1                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |
| 1 | ETTh2                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |
| 2 | ETTm1                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |
| 3 | ETTm2                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |
| 4 | Electric_Production  | date, value                                  |
| 5 | Messages_Sent        | date, ta, tb, tc                             |
| 6 | Messages_Sent_Hour   | date, hour, ta, tb, tc                       |
| 7 | Supermarket_Incoming | date, goods_cnt                              |
| 8 | Web_Sales            | date, type_a, type_b, sales_cnt              |
+---+----------------------+----------------------------------------------+
```
```python
# select one dataset
df_a = series_data['ETTh1']  # series_data[0], it works, too
print(type(df_a))  # <class 'spinesTS.data._data_base.DataTS'>

# Because DataTS inherit from pandas DataFrame, it has all the functionality of pandas DataFrame
df_a.head() ,df_a.tail(), df_a.shape
```


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/BirchKwok/spinesTS",
    "name": "spinesTS",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "machine learning",
    "author": "Birch Kwok",
    "author_email": "birchkwok@gmail.com",
    "download_url": "https://files.pythonhosted.org/packages/db/28/955e14e2b0434a823df29fef18d7f3e7a6b5c7de76897cd3b93a0f7eab90/spinesTS-0.4.11.tar.gz",
    "platform": null,
    "description": "# spinesTS \n## Time Series forecasting toolsets\n\n- [Install](https://github.com/BirchKwok/spinesTS#install)\n- [spinesTS Modules](https://github.com/BirchKwok/spinesTS#spinests-modules)\n- [Tutorials](https://github.com/BirchKwok/spinesTS#tutorials)\n  - [Getting started](https://github.com/BirchKwok/spinesTS#getting-started)\n  - [Using nn module](https://github.com/BirchKwok/spinesTS#using-nn-module)\n    - [StackingRNN](https://github.com/BirchKwok/spinesTS#stackingrnn)\n    - [GAUNet](https://github.com/BirchKwok/spinesTS#gaunet)\n    - [Time2VecNet](https://github.com/BirchKwok/spinesTS#time2vecnet)\n  - [Using ml_model module](https://github.com/BirchKwok/spinesTS#using-ml_model-module)\n    - [MultiStepRegressor](https://github.com/BirchKwok/spinesTS#multistepregressor)\n    - [MultiOutputRegressor](https://github.com/BirchKwok/spinesTS#multioutputregressor)\n    - [WideGBRT](https://github.com/BirchKwok/spinesTS#widegbrt)\n  - [Using Data module](https://github.com/BirchKwok/spinesTS#using-data-module)\n\n\n## Install\n```\npip install spinesTS\n```\n\n## spinesTS Modules\n\n- base: Model base class\n- data: Built-in datasets and data wrapper classes\n- feature_generator: Feature generation functions\n- metrics: Model performance measurement function\n- ml_model: Machine learning models\n- nn: neural network models\n- pipeline: Model fitting and prediction pipeline\n- plotting: Visualization of model prediction results\n- preprocessing: data preprocessing\n- utils: Tool functions set\n- layers: Neural network layer\n\n## Tutorials\n\n### Getting started\n```python\n# simple demo to predict Electric data\nfrom sklearn.preprocessing import StandardScaler\nfrom lightgbm import LGBMRegressor\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.pipeline import Pipeline\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.ml_model import MultiOutputRegressor\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    x_seq=df['value'], \n    y_seq=df['value'],  # The sequence of parameter y_seq is cut based on parameter x_seq\n    # sliding window size, every 30 before days to predict after days\n    window_size=30, \n    # predict after 30 days\n    pred_steps=30, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# Assemble the model using Pipeline class\nmodel = Pipeline([\n    ('sc', StandardScaler()),\n    ('model', MultiOutputRegressor(LGBMRegressor(random_state=2022)))\n])\nprint(\"Model successfully initialization...\")\n\n# fitting model\nmodel.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)\nprint(f\"r2_score is {model.score(x_test, y_test)}\")\n\n# plot the predicted results\nfig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), \n       eval_slices='[:30]', labels=['y_test', 'y_pred'])\nplt.show()\n```\n```\n[output]:\nx_train shape is (270, 30), x_test shape is (68, 30),y_train shape is (270, 30),y_test shape is (68, 30)\nModel successfully initialization...\nr2_score is 0.8186046606725977\n```\n![model prediction image](https://github.com/BirchKwok/spinesTS/blob/main/examples/visual/GettingStarted.png)\n\n### Using nn module\n#### StackingRNN\n```python\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\nfrom spinesTS.nn import StackingRNN\nfrom spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    x_seq=df['value'], \n    y_seq=df['value'],\n    # sliding window size, every 128 before days to predict after days\n    window_size=128, \n    # predict after 24 days goods incoming\n    pred_steps=24, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = StackingRNN(in_features=128, out_features=24, \n                    random_seed=42, loss_fn='mae', \n                    learning_rate=0.001, dropout=0.1, diff_n=1, \n                    stack_num=2, bidirectional=True, device='cpu')\n\nmodel.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,\n             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler=None)\ny_pred_cs = model.predict(x_test[-2:])\nprint(f\"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}\")\nprint(f\"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}\")\nprint(f\"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}\")\na = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))\nplt.show()\n```\n#### GAUNet\n```python\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\nfrom spinesTS.nn import GAUNet\nfrom spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    x_seq=df['value'], \n    y_seq=df['value'],\n    # sliding window size, every 128 before days to predict after days\n    window_size=128, \n    # predict after 24 days \n    pred_steps=24, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = GAUNet(in_features=128, out_features=24, \n               random_seed=42, flip_features=False, \n               learning_rate=0.001, level=5, device='cpu')\n\nmodel.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,\n             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler='ReduceLROnPlateau')\ny_pred_cs = model.predict(x_test[-2:])\nprint(f\"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}\")\nprint(f\"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}\")\nprint(f\"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}\")\na = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))\nplt.show()\n```\n#### Time2VecNet\n```python\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\nfrom spinesTS.nn import Time2VecNet\nfrom spinesTS.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    x_seq=df['value'], \n    y_seq=df['value'],\n    # sliding window size, every 128 before days to predict after days\n    window_size=128, \n    # predict after 24 days \n    pred_steps=24, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = Time2VecNet(in_features=128, out_features=24, \n               random_seed=42, flip_features=False, \n               learning_rate=0.001, device='cpu')\n\nmodel.fit(x_train, y_train, eval_set=(x_test[:-2], y_test[:-2]), batch_size=32,\n             min_delta=0, patience=100, epochs=3000, verbose=False, lr_scheduler='CosineAnnealingLR')\ny_pred_cs = model.predict(x_test[-2:])\nprint(f\"r2: {r2_score(y_test[-2:].T, y_pred_cs.T)}\")\nprint(f\"mae: {mean_absolute_error(y_test[-2:], y_pred_cs)}\")\nprint(f\"mape: {mean_absolute_percentage_error(y_test[-2:], y_pred_cs)}\")\na = plot2d(y_test[-2:], y_pred_cs, eval_slices='[-1]', labels=['y_test', 'y_pred'], figsize=(20, 6))\nplt.show()\n```\n\n### Using ml_model module\n#### MultiStepRegressor\n```python\nfrom lightgbm import LGBMRegressor\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.ml_model import MultiStepRegressor\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    df['value'], \n    df['value'],\n    # sliding window size, every 30 before days to predict after days\n    window_size=30, \n    # predict after 30 days \n    pred_steps=30, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = MultiStepRegressor(LGBMRegressor(random_state=2022))\nprint(\"Model successfully initialization...\")\n\n# fitting model\nmodel.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)\nprint(f\"r2_score is {model.score(x_test, y_test)}\")\n\n# plot the predicted results\nfig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), \n       eval_slices='[:30]', labels=['y_test', 'y_pred'])\nplt.show()\n```\n#### MultiOutputRegressor\n```python\nfrom lightgbm import LGBMRegressor\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.ml_model import MultiOutputRegressor\nfrom spinesTS.preprocessing import split_series\nfrom spinesTS.plotting import plot2d\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data\nx_train, x_test, y_train, y_test = split_series(\n    df['value'], \n    df['value'],\n    # sliding window size, every 30 before days to predict after days\n    window_size=30, \n    # predict after 30 days \n    pred_steps=30, \n    train_size=0.8\n)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = MultiOutputRegressor(LGBMRegressor(random_state=2022))\nprint(\"Model successfully initialization...\")\n\n# fitting model\nmodel.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)\nprint(f\"r2_score is {model.score(x_test, y_test)}\")\n\n# plot the predicted results\nfig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), \n       eval_slices='[:30]', labels=['y_test', 'y_pred'])\nplt.show()\n```\n#### WideGBRT\n```python\nfrom lightgbm import LGBMRegressor\nimport matplotlib.pyplot as plt\n\nfrom spinesTS.data import LoadElectricDataSets\nfrom spinesTS.ml_model import GBRTPreprocessing, WideGBRT\nfrom spinesTS.plotting import plot2d\n\n\n# load data\ndf = LoadElectricDataSets()\n\n# split data and generate new features\ngbrt_processor = GBRTPreprocessing(in_features=128, out_features=30, \n                                   target_col='value', train_size=0.8, date_col='date',\n                                   differential_n=1  # The order of data differentiation.\n                                   )\ngbrt_processor.fit(df)\n\nx_train, x_test, y_train, y_test = gbrt_processor.transform(df)\n\nprint(f\"x_train shape is {x_train.shape}, \"\n      f\"x_test shape is {x_test.shape},\" \n      f\"y_train shape is {y_train.shape},\"\n      f\"y_test shape is {y_test.shape}\")\n\n# model initialization\nmodel = WideGBRT(model=LGBMRegressor(random_state=2022))\nprint(\"Model successfully initialization...\")\n\n# fitting model\nmodel.fit(x_train, y_train, eval_set=(x_test, y_test), verbose=0)\nprint(f\"r2_score is {model.score(x_test, y_test)}\")\n\n# plot the predicted results\nfig = plot2d(y_test, model.predict(x_test), figsize=(20, 10), \n       eval_slices='[:30]', labels=['y_test', 'y_pred'])\nplt.show()\n```\n\n### Using Data module\n```python\nfrom spinesTS.data import *\nseries_data = BuiltInSeriesData(print_file_list=True)\n```\n```\n+---+----------------------+----------------------------------------------+\n|   | ds name              | columns                                      |\n+---+----------------------+----------------------------------------------+\n| 0 | ETTh1                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |\n| 1 | ETTh2                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |\n| 2 | ETTm1                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |\n| 3 | ETTm2                | date, HUFL, HULL, MUFL, MULL, LUFL, LULL, OT |\n| 4 | Electric_Production  | date, value                                  |\n| 5 | Messages_Sent        | date, ta, tb, tc                             |\n| 6 | Messages_Sent_Hour   | date, hour, ta, tb, tc                       |\n| 7 | Supermarket_Incoming | date, goods_cnt                              |\n| 8 | Web_Sales            | date, type_a, type_b, sales_cnt              |\n+---+----------------------+----------------------------------------------+\n```\n```python\n# select one dataset\ndf_a = series_data['ETTh1']  # series_data[0], it works, too\nprint(type(df_a))  # <class 'spinesTS.data._data_base.DataTS'>\n\n# Because DataTS inherit from pandas DataFrame, it has all the functionality of pandas DataFrame\ndf_a.head() ,df_a.tail(), df_a.shape\n```\n\n",
    "bugtrack_url": null,
    "license": "",
    "summary": "spinesTS, a powerful timeseries toolsets.",
    "version": "0.4.11",
    "project_urls": {
        "Homepage": "https://github.com/BirchKwok/spinesTS"
    },
    "split_keywords": [
        "machine",
        "learning"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "e2946792d7bf69f24b5f6c699c10561511ef807535e2551fc48f1d951bf721cd",
                "md5": "d4194bd3a9a69069d561d05b00de08e8",
                "sha256": "03c3afcba18d313c41d4f524c05136ad67e1034091876c39f80f48498591bd55"
            },
            "downloads": -1,
            "filename": "spinesTS-0.4.11-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "d4194bd3a9a69069d561d05b00de08e8",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 8869708,
            "upload_time": "2024-01-23T09:45:16",
            "upload_time_iso_8601": "2024-01-23T09:45:16.421210Z",
            "url": "https://files.pythonhosted.org/packages/e2/94/6792d7bf69f24b5f6c699c10561511ef807535e2551fc48f1d951bf721cd/spinesTS-0.4.11-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "db28955e14e2b0434a823df29fef18d7f3e7a6b5c7de76897cd3b93a0f7eab90",
                "md5": "7c9a26a7f9f68cc612412aa5f481bdf2",
                "sha256": "8647960b6d9dbd9907b3831ee4e2ceb64d0df627b96ddff943e5f544ac15290d"
            },
            "downloads": -1,
            "filename": "spinesTS-0.4.11.tar.gz",
            "has_sig": false,
            "md5_digest": "7c9a26a7f9f68cc612412aa5f481bdf2",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 2166743,
            "upload_time": "2024-01-23T09:45:20",
            "upload_time_iso_8601": "2024-01-23T09:45:20.321396Z",
            "url": "https://files.pythonhosted.org/packages/db/28/955e14e2b0434a823df29fef18d7f3e7a6b5c7de76897cd3b93a0f7eab90/spinesTS-0.4.11.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-01-23 09:45:20",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "BirchKwok",
    "github_project": "spinesTS",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "lcname": "spinests"
}
        
Elapsed time: 2.07694s