Datahub Python SDK
==================
|PyPI version| |Docs| |License| |Implementation|
Elegant way to access Datahub Python SDK API.
`Documentation <https://aliyun-datahub-sdk-python.readthedocs.io/en/latest/>`__
Installation
------------
The quick way:
.. code:: shell
$ sudo pip install pydatahub
The dependencies will be installed automatically.
Or from source code:
.. code:: shell
$ virtualenv pydatahub_env
$ source pydatahub_env/bin/activate
$ git clone <git clone URL> pydatahub
$ cd pydatahub
$ python setup.py install
If python-dev was not installed, error message like 'Python.h: No such file or directory' will be printed. `See this <https://stackoverflow.com/questions/21530577/fatal-error-python-h-no-such-file-or-directory>`__
If install in windows, error message like 'Microsoft Visual C++ XX.0 is required', download and install dependency `here <https://wiki.python.org/moin/WindowsCompilers>`__
If network is not available, requirements are in dependency folder:
.. code:: shell
$ cd dependency
$ pip install -r first.txt
$ pip install -r second.txt
Python Version
-------------------
Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6 and pypy, Python 3.6 recommended
Dependencies
---------------
- setuptools (>=39.2.0)
- requests (>=2.4.0)
- simplejson (>=3.3.0)
- six (>=1.1.0)
- enum34 (>=1.1.5 for python_version < '3.4')
- crcmod (>=1.7)
- lz4 (>=2.0.0)
- cprotobuf (>=0.1.9)
- funcsigs (>=1.0.2)
- atomic (>=0.7.0)
- rwlock (>=0.0.6)
- urllib3 (>=1.26.10)
Run Tests
---------
- install tox:
.. code:: shell
$ pip install -U tox
- fill datahub/tests/datahub.ini with your configuration
- run shell
.. code:: shell
$ tox
Usage
-----
.. code:: python
from datahub import DataHub
dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**')
# with security token
# dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**', security_token='**your-security-token**')
# ============================= create project =============================
project_name = 'my_project_name'
comment = 'my project'
dh.create_project(project_name, comment)
# ============================= get project =============================
project_result = dh.get_project('pydatahub_test')
print(project_result)
# ============================= create tuple topic =============================
from datahub.models import RecordSchema, FieldType
topic_name='tuple_topic_test'
shard_count = 3
life_cycle = 7
comment = 'tuple topic'
record_schema = RecordSchema.from_lists(['bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field'],
[FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN, FieldType.TIMESTAMP])
dh.create_tuple_topic(project_name, topic_name, shard_count, life_cycle, record_schema, comment)
# ============================= create blob topic =============================
topic_name='blob_topic_test'
shard_count = 3
life_cycle = 7
comment = 'blob topic'
dh.create_blob_topic(project_name, topic_name, shard_count, life_cycle, comment)
# ============================= get topic =============================
topic_result = dh.get_topic(project_name, topic_name)
print(topic_result)
print(topic_result.record_schema)
# ============================= list shard =============================
shards_result = dh.list_shard(project_name, topic_name)
print(shards_result)
# ============================= put tuple records =============================
from datahub.models import TupleRecord
# put records by shard is recommended
records0 = []
record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])
record0.put_attribute('AK', '47')
records0.append(record0)
put_result = dh.put_records_by_shard('pydatahub_test', 'tuple_topic_test', "0", records0)
# records0 = []
# record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])
# record0.shard_id = '0'
# record0.put_attribute('AK', '47')
# records0.append(record0)
# put_result = dh.put_records('pydatahub_test', 'tuple_topic_test', records0)
print(put_result)
# ============================= put blob records =============================
from datahub.models import BlobRecord
# put records by shard is recommended
data = None
with open(os.path.join(root_path, 'tests/resources/datahub.png'), 'rb') as f:
data = f.read()
records1 = []
record1 = BlobRecord(blob_data=data)
record1.put_attribute('a', 'b')
records1.append(record1)
put_result = dh.put_records_by_shard('pydatahub_test', 'blob_topic_test', "0" records1)
# records1 = []
# record1 = BlobRecord(blob_data=data)
# record1.shard_id = '0'
# record1.put_attribute('a', 'b')
# records1.append(record1)
# put_result = dh.put_records('pydatahub_test', 'blob_topic_test', records1)
print(put_result)
# ============================= get cursor =============================
from datahub.models import CursorType
cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
print(cursor_result)
# ============================= get blob records =============================
limit = 10
blob_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
get_result = dh.get_blob_records(project_name, topic_name, '0', blob_cursor_result.cursor, limit)
print(get_result)
print(get_result.records)
print(get_result.records[0])
# ============================= get tuple records =============================
limit = 10
tuple_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
get_result = dh.get_tuple_records(project_name, topic_name, '0', record_schema, tuple_cursor_result.cursor, limit)
print(get_result)
print(get_result.records)
print(get_result.records[0].values)
Examples
-----------
see more examples in `examples <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/examples>`__
Release
--------
Update `changelog <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/changelog.rst>`__, then use `bumpversion <https://github.com/peritus/bumpversion>`__ to update version:
1. bugfix: ``bumpversion patch``
2. small feature: ``bumpversion minor``
3. breaking change: ``bumpversion major``
Contributing
------------
For a development install, clone the repository and then install from
source:
::
git clone https://github.com/aliyun/aliyun-datahub-sdk-python.git
License
-------
Licensed under the `Apache License
2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__
.. |PyPI version| image:: https://img.shields.io/pypi/v/pydatahub.svg?style=flat-square
:target: https://pypi.python.org/pypi/pydatahub
.. |Docs| image:: https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat-square
:target: http://pydatahub.readthedocs.io/zh_CN/latest/
.. |License| image:: https://img.shields.io/pypi/l/pydatahub.svg?style=flat-square
:target: https://github.com/aliyun/aliyun-datahub-sdk-python/blob/master/LICENSE
.. |Implementation| image:: https://img.shields.io/pypi/implementation/pydatahub.svg?style=flat-square
Raw data
{
"_id": null,
"home_page": "https://github.com/aliyun/aliyun-datahub-sdk-python",
"name": "pydatahub",
"maintainer": null,
"docs_url": null,
"requires_python": null,
"maintainer_email": null,
"keywords": "pydatahub, python, aliyun, datahub, sdk",
"author": "panjinxing.pjx",
"author_email": "panjinxing.pjx@alibaba-inc.com",
"download_url": "https://files.pythonhosted.org/packages/a6/c6/ae0fd940e62e65092b51bf96de40bd1a9a56aee0d19b4d06a7fe1d4ef944/pydatahub-2.25.4.tar.gz",
"platform": null,
"description": "Datahub Python SDK\n==================\n\n|PyPI version| |Docs| |License| |Implementation|\n\nElegant way to access Datahub Python SDK API.\n`Documentation <https://aliyun-datahub-sdk-python.readthedocs.io/en/latest/>`__\n\nInstallation\n------------\n\nThe quick way:\n\n.. code:: shell\n\n $ sudo pip install pydatahub\n\nThe dependencies will be installed automatically.\n\nOr from source code:\n\n.. code:: shell\n\n $ virtualenv pydatahub_env\n $ source pydatahub_env/bin/activate\n $ git clone <git clone URL> pydatahub\n $ cd pydatahub\n $ python setup.py install\n\nIf python-dev was not installed, error message like 'Python.h: No such file or directory' will be printed. `See this <https://stackoverflow.com/questions/21530577/fatal-error-python-h-no-such-file-or-directory>`__\n\nIf install in windows, error message like 'Microsoft Visual C++ XX.0 is required', download and install dependency `here <https://wiki.python.org/moin/WindowsCompilers>`__\n\n\nIf network is not available, requirements are in dependency folder:\n\n.. code:: shell\n\n $ cd dependency\n $ pip install -r first.txt\n $ pip install -r second.txt\n\nPython Version\n-------------------\n\nTested on Python 2.7, 3.3, 3.4, 3.5, 3.6 and pypy, Python 3.6 recommended\n\nDependencies\n---------------\n\n- setuptools (>=39.2.0)\n- requests (>=2.4.0)\n- simplejson (>=3.3.0)\n- six (>=1.1.0)\n- enum34 (>=1.1.5 for python_version < '3.4')\n- crcmod (>=1.7)\n- lz4 (>=2.0.0)\n- cprotobuf (>=0.1.9)\n- funcsigs (>=1.0.2)\n- atomic (>=0.7.0)\n- rwlock (>=0.0.6)\n- urllib3 (>=1.26.10)\n\nRun Tests\n---------\n\n- install tox:\n\n.. code:: shell\n\n $ pip install -U tox\n\n- fill datahub/tests/datahub.ini with your configuration\n- run shell\n\n.. code:: shell\n\n $ tox\n\nUsage\n-----\n\n.. code:: python\n\n from datahub import DataHub\n dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**')\n\n # with security token\n # dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**', security_token='**your-security-token**')\n\n # ============================= create project =============================\n\n project_name = 'my_project_name'\n comment = 'my project'\n dh.create_project(project_name, comment)\n\n # ============================= get project =============================\n\n project_result = dh.get_project('pydatahub_test')\n print(project_result)\n\n # ============================= create tuple topic =============================\n\n from datahub.models import RecordSchema, FieldType\n topic_name='tuple_topic_test'\n shard_count = 3\n life_cycle = 7\n comment = 'tuple topic'\n record_schema = RecordSchema.from_lists(['bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field'],\n [FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN, FieldType.TIMESTAMP])\n dh.create_tuple_topic(project_name, topic_name, shard_count, life_cycle, record_schema, comment)\n\n # ============================= create blob topic =============================\n\n topic_name='blob_topic_test'\n shard_count = 3\n life_cycle = 7\n comment = 'blob topic'\n dh.create_blob_topic(project_name, topic_name, shard_count, life_cycle, comment)\n\n # ============================= get topic =============================\n\n topic_result = dh.get_topic(project_name, topic_name)\n print(topic_result)\n print(topic_result.record_schema)\n\n # ============================= list shard =============================\n\n shards_result = dh.list_shard(project_name, topic_name)\n print(shards_result)\n\n # ============================= put tuple records =============================\n\n from datahub.models import TupleRecord\n\n # put records by shard is recommended\n records0 = []\n record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])\n record0.put_attribute('AK', '47')\n records0.append(record0)\n put_result = dh.put_records_by_shard('pydatahub_test', 'tuple_topic_test', \"0\", records0)\n\n # records0 = []\n # record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])\n # record0.shard_id = '0'\n # record0.put_attribute('AK', '47')\n # records0.append(record0)\n # put_result = dh.put_records('pydatahub_test', 'tuple_topic_test', records0)\n print(put_result)\n\n # ============================= put blob records =============================\n\n from datahub.models import BlobRecord\n # put records by shard is recommended\n data = None\n with open(os.path.join(root_path, 'tests/resources/datahub.png'), 'rb') as f:\n data = f.read()\n records1 = []\n record1 = BlobRecord(blob_data=data)\n record1.put_attribute('a', 'b')\n records1.append(record1)\n put_result = dh.put_records_by_shard('pydatahub_test', 'blob_topic_test', \"0\" records1)\n\n # records1 = []\n # record1 = BlobRecord(blob_data=data)\n # record1.shard_id = '0'\n # record1.put_attribute('a', 'b')\n # records1.append(record1)\n # put_result = dh.put_records('pydatahub_test', 'blob_topic_test', records1)\n print(put_result)\n\n # ============================= get cursor =============================\n\n from datahub.models import CursorType\n cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n print(cursor_result)\n\n # ============================= get blob records =============================\n\n limit = 10\n blob_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n get_result = dh.get_blob_records(project_name, topic_name, '0', blob_cursor_result.cursor, limit)\n print(get_result)\n print(get_result.records)\n print(get_result.records[0])\n\n # ============================= get tuple records =============================\n\n limit = 10\n tuple_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n get_result = dh.get_tuple_records(project_name, topic_name, '0', record_schema, tuple_cursor_result.cursor, limit)\n print(get_result)\n print(get_result.records)\n print(get_result.records[0].values)\n\nExamples\n-----------\n\nsee more examples in `examples <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/examples>`__\n\nRelease\n--------\n\nUpdate `changelog <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/changelog.rst>`__, then use `bumpversion <https://github.com/peritus/bumpversion>`__ to update version:\n\n1. bugfix: ``bumpversion patch``\n2. small feature: ``bumpversion minor``\n3. breaking change: ``bumpversion major``\n\nContributing\n------------\n\nFor a development install, clone the repository and then install from\nsource:\n\n::\n\n git clone https://github.com/aliyun/aliyun-datahub-sdk-python.git\n\nLicense\n-------\n\nLicensed under the `Apache License\n2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__\n\n.. |PyPI version| image:: https://img.shields.io/pypi/v/pydatahub.svg?style=flat-square\n :target: https://pypi.python.org/pypi/pydatahub\n.. |Docs| image:: https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat-square\n :target: http://pydatahub.readthedocs.io/zh_CN/latest/\n.. |License| image:: https://img.shields.io/pypi/l/pydatahub.svg?style=flat-square\n :target: https://github.com/aliyun/aliyun-datahub-sdk-python/blob/master/LICENSE\n.. |Implementation| image:: https://img.shields.io/pypi/implementation/pydatahub.svg?style=flat-square\n\n",
"bugtrack_url": null,
"license": "Apache License 2.0",
"summary": "Datahub Python SDK",
"version": "2.25.4",
"project_urls": {
"Homepage": "https://github.com/aliyun/aliyun-datahub-sdk-python"
},
"split_keywords": [
"pydatahub",
" python",
" aliyun",
" datahub",
" sdk"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "a6c6ae0fd940e62e65092b51bf96de40bd1a9a56aee0d19b4d06a7fe1d4ef944",
"md5": "25e2bb3b41f54995524aae537ff3b261",
"sha256": "962ea601a3767a8ea6cea8b5d1b5e99dca9372f3b8804b532858d643f1dd2a21"
},
"downloads": -1,
"filename": "pydatahub-2.25.4.tar.gz",
"has_sig": false,
"md5_digest": "25e2bb3b41f54995524aae537ff3b261",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 80929,
"upload_time": "2024-11-08T03:13:27",
"upload_time_iso_8601": "2024-11-08T03:13:27.254774Z",
"url": "https://files.pythonhosted.org/packages/a6/c6/ae0fd940e62e65092b51bf96de40bd1a9a56aee0d19b4d06a7fe1d4ef944/pydatahub-2.25.4.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-11-08 03:13:27",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "aliyun",
"github_project": "aliyun-datahub-sdk-python",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "funcsigs",
"specs": [
[
">=",
"1.0.2"
]
]
},
{
"name": "requests",
"specs": [
[
">=",
"2.4.0"
]
]
},
{
"name": "simplejson",
"specs": [
[
">=",
"3.3.0"
]
]
},
{
"name": "six",
"specs": [
[
">=",
"1.1.0"
]
]
},
{
"name": "enum34",
"specs": [
[
">=",
"1.1.5"
]
]
},
{
"name": "crcmod",
"specs": [
[
">=",
"1.7"
]
]
},
{
"name": "lz4",
"specs": [
[
">=",
"2.0.0"
]
]
},
{
"name": "cprotobuf",
"specs": [
[
">=",
"0.1.9"
]
]
},
{
"name": "atomic",
"specs": [
[
">=",
"0.7.0"
]
]
},
{
"name": "rwlock",
"specs": [
[
">=",
"0.0.6"
]
]
},
{
"name": "urllib3",
"specs": [
[
">=",
"1.26.10"
]
]
}
],
"tox": true,
"lcname": "pydatahub"
}