timesmash


Nametimesmash JSON
Version 0.2.26 PyPI version JSON
download
home_pagehttps://github.com/zeroknowledgediscovery/timesmash
SummaryQuantifier of universal similarity amongst arbitrary data streams without a priori knowledge, features, or training.
upload_time2025-01-15 16:02:28
maintainerVictor Rotaru
docs_urlNone
authorNone
requires_pythonNone
licenseNone
keywords timeseries
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # Time Smash

Time series clustering and classification suite using notions of *Universal similarity* among  data streams, especially without a priori knowledge about the "correct" features to use for time series data.

+ Featurization algorithms: SymbolicDerivative, InferredHMMLikelihood, Csmash
+ Distance measure: LikelihoodDistance

## Example publications


+ Huang, Yi, Victor Rotaru, and Ishanu Chattopadhyay. "Sequence likelihood divergence for fast time series comparison." Knowledge and Information Systems 65, no. 7 (2023): 3079-3098. https://link.springer.com/article/10.1007/s10115-023-01855-0

+ Chattopadhyay, Ishanu, and Hod Lipson. "Data smashing: uncovering lurking order in data." Journal of The Royal Society Interface 11, no. 101 (2014): 20140826.
https://royalsocietypublishing.org/doi/10.1098/rsif.2014.0826

+ Timesmash: Process-Aware Fast Time Series Clustering and Classification https://easychair.org/publications/preprint/qpVv


For questions or suggestions contact:research@paraknowledge.ai

##	Usage examples	
### SymbolicDerivative
	from timesmash import SymbolicDerivative
	from sklearn.ensemble import RandomForestClassifier

	train = [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]
	train_label = [[0], [1]]
	test = [[0, 1, 1, 0, 1, 1]]
	train_features, test_features = SymbolicDerivative().fit_transform(
	    train=train, test=test, label=train_label
	)
	clf = RandomForestClassifier().fit(train_features, train_label)
	label = clf.predict(test_features)
	print("Predicted label: ", label)
	
###	LikelihoodDistance	
	from timesmash import LikelihoodDistance
	from sklearn.cluster import KMeans
	train = [[1, 0, 1.1, 0, 11.2, 0], [1, 1, 0, 1, 1, 0], [0, 0.9, 0, 1, 0, 1], [0, 1, 1, 0, 1, 1]]
	dist_calc = LikelihoodDistance().fit(train)
	dist = dist_calc.produce()
	from sklearn.cluster import KMeans
	clusters = KMeans(n_clusters = 2).fit(dist).labels_
	print("Clusters: " clusters)
	
###	InferredHMMLikelihood	
	from timesmash import InferredHMMLikelihood
	from sklearn.ensemble import RandomForestClassifier

	train = [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]
	train_label = [[0], [1]]
	test = [[0, 1, 1, 0, 1, 1]]
	train_features, test_features = InferredHMMLikelihood().fit_transform(
	    train=train, test=test, label=train_label
	)
	clf = RandomForestClassifier().fit(train_features, train_label)
	label = clf.predict(test_features)
	print("Predicted label: ", label)

###	ClusteredHMMClassifier:	
	from timesmash import Quantizer, InferredHMMLikelihood, LikelihoodDistance
	from sklearn.cluster import KMeans
	from sklearn.ensemble import RandomForestClassifier
	import pandas as pd

	train = pd.DataFrame(
	    [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0], [1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]
	)
	train_label = pd.DataFrame([[0], [1], [0], [1]])
	test = pd.DataFrame([[0, 1, 1, 0, 1, 1]])

	qtz = Quantizer().fit(train, label=train_label)
	new_labels = train_label.copy()
	for label, dataframe in train_label.groupby(train_label.columns[0]):
	    dist = LikelihoodDistance(quantizer=qtz).fit(train.loc[dataframe.index]).produce()
	    sub_labels = KMeans(n_clusters=2, random_state=0).fit(dist).labels_
	    new_label_names = [str(label) + "_" + str(i) for i in sub_labels]
	    new_labels.loc[dataframe.index, train_label.columns[0]] = new_label_names

	featurizer = InferredHMMLikelihood(quantizer=qtz, epsilon=0.01)
	train_features, test_features = featurizer.fit_transform(
	    train=train, test=test, label=new_labels
	)

	clf = RandomForestClassifier().fit(train_features, train_label)
	print("Predicted label: ", clf.predict(test_features))

###	XHMMFeatures for anomaly detection:	
	import pandas as pd
	from timesmash import XHMMFeatures
	from sklearn.neighbors import LocalOutlierFactor

	channel1_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])
	channel2_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])
	labels = pd.DataFrame([1,1], index=['person_1', 'person_2'])
	    
	alg = XHMMFeatures(n_quantizations=1)
	features_train = alg.fit_transform([channel1_train,channel2_train], labels)
	    
	clf = LocalOutlierFactor(novelty=True)  
	clf.fit(features_train)
	        
	channel1_test = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1]], index=['person_test_1', 'person_test_2'])
	channel2_test= pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[0,1,0,1,0,1,0,1,0]], index=['person_test_1', 'person_test_2'])

	features_test = alg.transform([channel1_test,channel2_test])
	print(clf.predict(features_test))

###	XHMMFeatures for classification:	
	import pandas as pd
	from timesmash import XHMMFeatures
	from sklearn.ensemble import RandomForestClassifier

	d1_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])
	d2_train = pd.DataFrame([[1,0,1,0,1,0,1,0,1,0],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])
	labels = pd.DataFrame([0,1], index=['person_1', 'person_2'])
	    
	alg = XHMMFeatures(n_quantizations=1)
	features_train = alg.fit_transform([d1_train,d2_train], labels)
	    
	clf = RandomForestClassifier()  
	clf.fit(features_train, labels)
	        
	d1_test = pd.DataFrame([[1,0,1,0,1,0,1,0,1]], index=['person_test'])
	d2_test= pd.DataFrame([[0,1,0,1,0,1,0,1,0]], index=['person_test'])

	features_test = alg.transform([d1_test,d2_test])
	    
	print(clf.predict(features_test))

###	XHMMClustering for multichannel clustering:	
    import pandas as pd
    from timesmash import XHMMClustering

    channel1 = pd.DataFrame(
        [
            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
        ],
        index=["person_1", "person_2", "person_3", "person_4"],
    )
    channel2 = pd.DataFrame(
        [
            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
        ],
        index=["person_1", "person_2", "person_3", "person_4"],
    )
    alg = XHMMClustering(n_quantizations=1).fit(
        [channel1, channel2]
    )
    clusters = alg.labels_
    print(clusters)

	
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/zeroknowledgediscovery/timesmash/HEAD)

            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/zeroknowledgediscovery/timesmash",
    "name": "timesmash",
    "maintainer": "Victor Rotaru",
    "docs_url": null,
    "requires_python": null,
    "maintainer_email": "virotaru@uchicago.edu",
    "keywords": "timeseries",
    "author": null,
    "author_email": null,
    "download_url": "https://files.pythonhosted.org/packages/16/ea/6975cb44e75c397b867eee78591732bff6911c6cb77d957d6849df7a4516/timesmash-0.2.26.tar.gz",
    "platform": null,
    "description": "# Time Smash\n\nTime series clustering and classification suite using notions of *Universal similarity* among  data streams, especially without a priori knowledge about the \"correct\" features to use for time series data.\n\n+ Featurization algorithms: SymbolicDerivative, InferredHMMLikelihood, Csmash\n+ Distance measure: LikelihoodDistance\n\n## Example publications\n\n\n+ Huang, Yi, Victor Rotaru, and Ishanu Chattopadhyay. \"Sequence likelihood divergence for fast time series comparison.\" Knowledge and Information Systems 65, no. 7 (2023): 3079-3098. https://link.springer.com/article/10.1007/s10115-023-01855-0\n\n+ Chattopadhyay, Ishanu, and Hod Lipson. \"Data smashing: uncovering lurking order in data.\" Journal of The Royal Society Interface 11, no. 101 (2014): 20140826.\nhttps://royalsocietypublishing.org/doi/10.1098/rsif.2014.0826\n\n+ Timesmash: Process-Aware Fast Time Series Clustering and Classification https://easychair.org/publications/preprint/qpVv\n\n\nFor questions or suggestions contact:research@paraknowledge.ai\n\n##\tUsage examples\t\n### SymbolicDerivative\n\tfrom timesmash import SymbolicDerivative\n\tfrom sklearn.ensemble import RandomForestClassifier\n\n\ttrain = [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]\n\ttrain_label = [[0], [1]]\n\ttest = [[0, 1, 1, 0, 1, 1]]\n\ttrain_features, test_features = SymbolicDerivative().fit_transform(\n\t    train=train, test=test, label=train_label\n\t)\n\tclf = RandomForestClassifier().fit(train_features, train_label)\n\tlabel = clf.predict(test_features)\n\tprint(\"Predicted label: \", label)\n\t\n###\tLikelihoodDistance\t\n\tfrom timesmash import LikelihoodDistance\n\tfrom sklearn.cluster import KMeans\n\ttrain = [[1, 0, 1.1, 0, 11.2, 0], [1, 1, 0, 1, 1, 0], [0, 0.9, 0, 1, 0, 1], [0, 1, 1, 0, 1, 1]]\n\tdist_calc = LikelihoodDistance().fit(train)\n\tdist = dist_calc.produce()\n\tfrom sklearn.cluster import KMeans\n\tclusters = KMeans(n_clusters = 2).fit(dist).labels_\n\tprint(\"Clusters: \" clusters)\n\t\n###\tInferredHMMLikelihood\t\n\tfrom timesmash import InferredHMMLikelihood\n\tfrom sklearn.ensemble import RandomForestClassifier\n\n\ttrain = [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]\n\ttrain_label = [[0], [1]]\n\ttest = [[0, 1, 1, 0, 1, 1]]\n\ttrain_features, test_features = InferredHMMLikelihood().fit_transform(\n\t    train=train, test=test, label=train_label\n\t)\n\tclf = RandomForestClassifier().fit(train_features, train_label)\n\tlabel = clf.predict(test_features)\n\tprint(\"Predicted label: \", label)\n\n###\tClusteredHMMClassifier:\t\n\tfrom timesmash import Quantizer, InferredHMMLikelihood, LikelihoodDistance\n\tfrom sklearn.cluster import KMeans\n\tfrom sklearn.ensemble import RandomForestClassifier\n\timport pandas as pd\n\n\ttrain = pd.DataFrame(\n\t    [[1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0], [1, 0, 1, 0, 1, 0], [1, 1, 0, 1, 1, 0]]\n\t)\n\ttrain_label = pd.DataFrame([[0], [1], [0], [1]])\n\ttest = pd.DataFrame([[0, 1, 1, 0, 1, 1]])\n\n\tqtz = Quantizer().fit(train, label=train_label)\n\tnew_labels = train_label.copy()\n\tfor label, dataframe in train_label.groupby(train_label.columns[0]):\n\t    dist = LikelihoodDistance(quantizer=qtz).fit(train.loc[dataframe.index]).produce()\n\t    sub_labels = KMeans(n_clusters=2, random_state=0).fit(dist).labels_\n\t    new_label_names = [str(label) + \"_\" + str(i) for i in sub_labels]\n\t    new_labels.loc[dataframe.index, train_label.columns[0]] = new_label_names\n\n\tfeaturizer = InferredHMMLikelihood(quantizer=qtz, epsilon=0.01)\n\ttrain_features, test_features = featurizer.fit_transform(\n\t    train=train, test=test, label=new_labels\n\t)\n\n\tclf = RandomForestClassifier().fit(train_features, train_label)\n\tprint(\"Predicted label: \", clf.predict(test_features))\n\n###\tXHMMFeatures for anomaly detection:\t\n\timport pandas as pd\n\tfrom timesmash import XHMMFeatures\n\tfrom sklearn.neighbors import LocalOutlierFactor\n\n\tchannel1_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])\n\tchannel2_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])\n\tlabels = pd.DataFrame([1,1], index=['person_1', 'person_2'])\n\t    \n\talg = XHMMFeatures(n_quantizations=1)\n\tfeatures_train = alg.fit_transform([channel1_train,channel2_train], labels)\n\t    \n\tclf = LocalOutlierFactor(novelty=True)  \n\tclf.fit(features_train)\n\t        \n\tchannel1_test = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1]], index=['person_test_1', 'person_test_2'])\n\tchannel2_test= pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[0,1,0,1,0,1,0,1,0]], index=['person_test_1', 'person_test_2'])\n\n\tfeatures_test = alg.transform([channel1_test,channel2_test])\n\tprint(clf.predict(features_test))\n\n###\tXHMMFeatures for classification:\t\n\timport pandas as pd\n\tfrom timesmash import XHMMFeatures\n\tfrom sklearn.ensemble import RandomForestClassifier\n\n\td1_train = pd.DataFrame([[0,1,0,1,0,1,0,1,0,1],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])\n\td2_train = pd.DataFrame([[1,0,1,0,1,0,1,0,1,0],[1,0,1,0,1,0,1,0,1,0]], index=['person_1', 'person_2'])\n\tlabels = pd.DataFrame([0,1], index=['person_1', 'person_2'])\n\t    \n\talg = XHMMFeatures(n_quantizations=1)\n\tfeatures_train = alg.fit_transform([d1_train,d2_train], labels)\n\t    \n\tclf = RandomForestClassifier()  \n\tclf.fit(features_train, labels)\n\t        \n\td1_test = pd.DataFrame([[1,0,1,0,1,0,1,0,1]], index=['person_test'])\n\td2_test= pd.DataFrame([[0,1,0,1,0,1,0,1,0]], index=['person_test'])\n\n\tfeatures_test = alg.transform([d1_test,d2_test])\n\t    \n\tprint(clf.predict(features_test))\n\n###\tXHMMClustering for multichannel clustering:\t\n    import pandas as pd\n    from timesmash import XHMMClustering\n\n    channel1 = pd.DataFrame(\n        [\n            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],\n            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],\n            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n        ],\n        index=[\"person_1\", \"person_2\", \"person_3\", \"person_4\"],\n    )\n    channel2 = pd.DataFrame(\n        [\n            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],\n            [1, 0, 1, 0, 1, 0, 1, 0, 1, 0],\n            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n            [0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n        ],\n        index=[\"person_1\", \"person_2\", \"person_3\", \"person_4\"],\n    )\n    alg = XHMMClustering(n_quantizations=1).fit(\n        [channel1, channel2]\n    )\n    clusters = alg.labels_\n    print(clusters)\n\n\t\n[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/zeroknowledgediscovery/timesmash/HEAD)\n",
    "bugtrack_url": null,
    "license": null,
    "summary": "Quantifier of universal similarity amongst arbitrary data streams without a priori knowledge, features, or training.",
    "version": "0.2.26",
    "project_urls": {
        "Homepage": "https://github.com/zeroknowledgediscovery/timesmash"
    },
    "split_keywords": [
        "timeseries"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "72f372e33344cebba8df5570e543aae800cc0f9038f8c6383e791e5f697c5377",
                "md5": "699fde4abeca7dc697490e489e082a3c",
                "sha256": "8db671cf02517f32b92f240798d072154e6b9d578a0bf10f5d98e551cb093346"
            },
            "downloads": -1,
            "filename": "timesmash-0.2.26-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "699fde4abeca7dc697490e489e082a3c",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 33052635,
            "upload_time": "2025-01-15T16:02:20",
            "upload_time_iso_8601": "2025-01-15T16:02:20.505906Z",
            "url": "https://files.pythonhosted.org/packages/72/f3/72e33344cebba8df5570e543aae800cc0f9038f8c6383e791e5f697c5377/timesmash-0.2.26-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "16ea6975cb44e75c397b867eee78591732bff6911c6cb77d957d6849df7a4516",
                "md5": "9a6c3d0159ffac88ee6e128cc526578d",
                "sha256": "1c0ee0038552589ec7ab6c6dcf7d1e403451ac7cb0b60cca4f229d785028c9ff"
            },
            "downloads": -1,
            "filename": "timesmash-0.2.26.tar.gz",
            "has_sig": false,
            "md5_digest": "9a6c3d0159ffac88ee6e128cc526578d",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 32852302,
            "upload_time": "2025-01-15T16:02:28",
            "upload_time_iso_8601": "2025-01-15T16:02:28.973635Z",
            "url": "https://files.pythonhosted.org/packages/16/ea/6975cb44e75c397b867eee78591732bff6911c6cb77d957d6849df7a4516/timesmash-0.2.26.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-01-15 16:02:28",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "zeroknowledgediscovery",
    "github_project": "timesmash",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": true,
    "lcname": "timesmash"
}
        
Elapsed time: 3.82904s