pydatahub


Namepydatahub JSON
Version 2.25.1 PyPI version JSON
download
home_pagehttps://github.com/aliyun/aliyun-datahub-sdk-python
SummaryDatahub Python SDK
upload_time2024-02-06 09:45:37
maintainer
docs_urlNone
authorpanjinxing.pjx
requires_python
licenseApache License 2.0
keywords pydatahub python aliyun datahub sdk
VCS
bugtrack_url
requirements funcsigs requests simplejson six enum34 crcmod lz4 cprotobuf atomic rwlock urllib3
Travis-CI No Travis.
coveralls test coverage No coveralls.
            Datahub Python SDK
==================

|PyPI version| |Docs| |License| |Implementation|

Elegant way to access Datahub Python SDK API.
`Documentation <https://aliyun-datahub-sdk-python.readthedocs.io/en/latest/>`__

Installation
------------

The quick way:

.. code:: shell

    $ sudo pip install pydatahub

The dependencies will be installed automatically.

Or from source code:

.. code:: shell

    $ virtualenv pydatahub_env
    $ source pydatahub_env/bin/activate
    $ git clone <git clone URL> pydatahub
    $ cd pydatahub
    $ python setup.py install

If python-dev was not installed, error message like 'Python.h: No such file or directory' will be printed. `See this <https://stackoverflow.com/questions/21530577/fatal-error-python-h-no-such-file-or-directory>`__

If install in windows, error message like 'Microsoft Visual C++ XX.0 is required', download and install dependency `here <https://wiki.python.org/moin/WindowsCompilers>`__


If network is not available, requirements are in dependency folder:

.. code:: shell

    $ cd dependency
    $ pip install -r first.txt
    $ pip install -r second.txt

Python Version
-------------------

Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6 and pypy, Python 3.6 recommended

Dependencies
---------------

-  setuptools (>=39.2.0)
-  requests (>=2.4.0)
-  simplejson (>=3.3.0)
-  six (>=1.1.0)
-  enum34 (>=1.1.5 for python_version < '3.4')
-  crcmod (>=1.7)
-  lz4 (>=2.0.0)
-  cprotobuf (>=0.1.9)
-  funcsigs (>=1.0.2)
-  atomic (>=0.7.0)
-  rwlock (>=0.0.6)
-  urllib3 (>=1.26.10)

Run Tests
---------

-  install tox:

.. code:: shell

    $ pip install -U tox

-  fill datahub/tests/datahub.ini with your configuration
-  run shell

.. code:: shell

    $ tox

Usage
-----

.. code:: python

    from datahub import DataHub
    dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**')

    # with security token
    # dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**', security_token='**your-security-token**')

    # ============================= create project =============================

    project_name = 'my_project_name'
    comment = 'my project'
    dh.create_project(project_name, comment)

    # ============================= get project =============================

    project_result = dh.get_project('pydatahub_test')
    print(project_result)

    # ============================= create tuple topic =============================

    from datahub.models import RecordSchema, FieldType
    topic_name='tuple_topic_test'
    shard_count = 3
    life_cycle = 7
    comment = 'tuple topic'
    record_schema = RecordSchema.from_lists(['bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field'],
                                            [FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN, FieldType.TIMESTAMP])
    dh.create_tuple_topic(project_name, topic_name, shard_count, life_cycle, record_schema, comment)

    # ============================= create blob topic =============================

    topic_name='blob_topic_test'
    shard_count = 3
    life_cycle = 7
    comment = 'blob topic'
    dh.create_blob_topic(project_name, topic_name, shard_count, life_cycle, comment)

    # ============================= get topic =============================

    topic_result = dh.get_topic(project_name, topic_name)
    print(topic_result)
    print(topic_result.record_schema)

    # ============================= list shard =============================

    shards_result = dh.list_shard(project_name, topic_name)
    print(shards_result)

    # ============================= put tuple records =============================

    from datahub.models import TupleRecord

    # put records by shard is recommended
    records0 = []
    record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])
    record0.put_attribute('AK', '47')
    records0.append(record0)
    put_result = dh.put_records_by_shard('pydatahub_test', 'tuple_topic_test', "0", records0)

    # records0 = []
    # record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])
    # record0.shard_id = '0'
    # record0.put_attribute('AK', '47')
    # records0.append(record0)
    # put_result = dh.put_records('pydatahub_test', 'tuple_topic_test', records0)
    print(put_result)

    # ============================= put blob records =============================

    from datahub.models import BlobRecord
    # put records by shard is recommended
    data = None
    with open(os.path.join(root_path, 'tests/resources/datahub.png'), 'rb') as f:
        data = f.read()
    records1 = []
    record1 = BlobRecord(blob_data=data)
    record1.put_attribute('a', 'b')
    records1.append(record1)
    put_result = dh.put_records_by_shard('pydatahub_test', 'blob_topic_test', "0" records1)

    # records1 = []
    # record1 = BlobRecord(blob_data=data)
    # record1.shard_id = '0'
    # record1.put_attribute('a', 'b')
    # records1.append(record1)
    # put_result = dh.put_records('pydatahub_test', 'blob_topic_test', records1)
    print(put_result)

    # ============================= get cursor =============================

    from datahub.models import CursorType
    cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
    print(cursor_result)

    # ============================= get blob records =============================

    limit = 10
    blob_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
    get_result = dh.get_blob_records(project_name, topic_name, '0', blob_cursor_result.cursor, limit)
    print(get_result)
    print(get_result.records)
    print(get_result.records[0])

    # ============================= get tuple records =============================

    limit = 10
    tuple_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)
    get_result = dh.get_tuple_records(project_name, topic_name, '0', record_schema, tuple_cursor_result.cursor, limit)
    print(get_result)
    print(get_result.records)
    print(get_result.records[0].values)

Examples
-----------

see more examples in `examples <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/examples>`__

Release
--------

Update `changelog <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/changelog.rst>`__, then use `bumpversion <https://github.com/peritus/bumpversion>`__ to update version:

1. bugfix: ``bumpversion patch``
2. small feature: ``bumpversion minor``
3. breaking change: ``bumpversion major``

Contributing
------------

For a development install, clone the repository and then install from
source:

::

    git clone https://github.com/aliyun/aliyun-datahub-sdk-python.git

License
-------

Licensed under the `Apache License
2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__

.. |PyPI version| image:: https://img.shields.io/pypi/v/pydatahub.svg?style=flat-square
   :target: https://pypi.python.org/pypi/pydatahub
.. |Docs| image:: https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat-square
   :target: http://pydatahub.readthedocs.io/zh_CN/latest/
.. |License| image:: https://img.shields.io/pypi/l/pydatahub.svg?style=flat-square
   :target: https://github.com/aliyun/aliyun-datahub-sdk-python/blob/master/LICENSE
.. |Implementation| image:: https://img.shields.io/pypi/implementation/pydatahub.svg?style=flat-square


            

Raw data

            {
    "_id": null,
    "home_page": "https://github.com/aliyun/aliyun-datahub-sdk-python",
    "name": "pydatahub",
    "maintainer": "",
    "docs_url": null,
    "requires_python": "",
    "maintainer_email": "",
    "keywords": "pydatahub,python,aliyun,datahub,sdk",
    "author": "panjinxing.pjx",
    "author_email": "panjinxing.pjx@alibaba-inc.com",
    "download_url": "https://files.pythonhosted.org/packages/ca/14/48cdb5d7c1f2ccde5292480d5530935ffcab20e92432d408a487b809cfdf/pydatahub-2.25.1.tar.gz",
    "platform": null,
    "description": "Datahub Python SDK\n==================\n\n|PyPI version| |Docs| |License| |Implementation|\n\nElegant way to access Datahub Python SDK API.\n`Documentation <https://aliyun-datahub-sdk-python.readthedocs.io/en/latest/>`__\n\nInstallation\n------------\n\nThe quick way:\n\n.. code:: shell\n\n    $ sudo pip install pydatahub\n\nThe dependencies will be installed automatically.\n\nOr from source code:\n\n.. code:: shell\n\n    $ virtualenv pydatahub_env\n    $ source pydatahub_env/bin/activate\n    $ git clone <git clone URL> pydatahub\n    $ cd pydatahub\n    $ python setup.py install\n\nIf python-dev was not installed, error message like 'Python.h: No such file or directory' will be printed. `See this <https://stackoverflow.com/questions/21530577/fatal-error-python-h-no-such-file-or-directory>`__\n\nIf install in windows, error message like 'Microsoft Visual C++ XX.0 is required', download and install dependency `here <https://wiki.python.org/moin/WindowsCompilers>`__\n\n\nIf network is not available, requirements are in dependency folder:\n\n.. code:: shell\n\n    $ cd dependency\n    $ pip install -r first.txt\n    $ pip install -r second.txt\n\nPython Version\n-------------------\n\nTested on Python 2.7, 3.3, 3.4, 3.5, 3.6 and pypy, Python 3.6 recommended\n\nDependencies\n---------------\n\n-  setuptools (>=39.2.0)\n-  requests (>=2.4.0)\n-  simplejson (>=3.3.0)\n-  six (>=1.1.0)\n-  enum34 (>=1.1.5 for python_version < '3.4')\n-  crcmod (>=1.7)\n-  lz4 (>=2.0.0)\n-  cprotobuf (>=0.1.9)\n-  funcsigs (>=1.0.2)\n-  atomic (>=0.7.0)\n-  rwlock (>=0.0.6)\n-  urllib3 (>=1.26.10)\n\nRun Tests\n---------\n\n-  install tox:\n\n.. code:: shell\n\n    $ pip install -U tox\n\n-  fill datahub/tests/datahub.ini with your configuration\n-  run shell\n\n.. code:: shell\n\n    $ tox\n\nUsage\n-----\n\n.. code:: python\n\n    from datahub import DataHub\n    dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**')\n\n    # with security token\n    # dh = DataHub('**your-access-id**', '**your-secret-access-key**', endpoint='**your-end-point**', security_token='**your-security-token**')\n\n    # ============================= create project =============================\n\n    project_name = 'my_project_name'\n    comment = 'my project'\n    dh.create_project(project_name, comment)\n\n    # ============================= get project =============================\n\n    project_result = dh.get_project('pydatahub_test')\n    print(project_result)\n\n    # ============================= create tuple topic =============================\n\n    from datahub.models import RecordSchema, FieldType\n    topic_name='tuple_topic_test'\n    shard_count = 3\n    life_cycle = 7\n    comment = 'tuple topic'\n    record_schema = RecordSchema.from_lists(['bigint_field', 'string_field', 'double_field', 'bool_field', 'time_field'],\n                                            [FieldType.BIGINT, FieldType.STRING, FieldType.DOUBLE, FieldType.BOOLEAN, FieldType.TIMESTAMP])\n    dh.create_tuple_topic(project_name, topic_name, shard_count, life_cycle, record_schema, comment)\n\n    # ============================= create blob topic =============================\n\n    topic_name='blob_topic_test'\n    shard_count = 3\n    life_cycle = 7\n    comment = 'blob topic'\n    dh.create_blob_topic(project_name, topic_name, shard_count, life_cycle, comment)\n\n    # ============================= get topic =============================\n\n    topic_result = dh.get_topic(project_name, topic_name)\n    print(topic_result)\n    print(topic_result.record_schema)\n\n    # ============================= list shard =============================\n\n    shards_result = dh.list_shard(project_name, topic_name)\n    print(shards_result)\n\n    # ============================= put tuple records =============================\n\n    from datahub.models import TupleRecord\n\n    # put records by shard is recommended\n    records0 = []\n    record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])\n    record0.put_attribute('AK', '47')\n    records0.append(record0)\n    put_result = dh.put_records_by_shard('pydatahub_test', 'tuple_topic_test', \"0\", records0)\n\n    # records0 = []\n    # record0 = TupleRecord(schema=record_schema, values=[1, 'yc1', 10.01, True, 1455869335000000])\n    # record0.shard_id = '0'\n    # record0.put_attribute('AK', '47')\n    # records0.append(record0)\n    # put_result = dh.put_records('pydatahub_test', 'tuple_topic_test', records0)\n    print(put_result)\n\n    # ============================= put blob records =============================\n\n    from datahub.models import BlobRecord\n    # put records by shard is recommended\n    data = None\n    with open(os.path.join(root_path, 'tests/resources/datahub.png'), 'rb') as f:\n        data = f.read()\n    records1 = []\n    record1 = BlobRecord(blob_data=data)\n    record1.put_attribute('a', 'b')\n    records1.append(record1)\n    put_result = dh.put_records_by_shard('pydatahub_test', 'blob_topic_test', \"0\" records1)\n\n    # records1 = []\n    # record1 = BlobRecord(blob_data=data)\n    # record1.shard_id = '0'\n    # record1.put_attribute('a', 'b')\n    # records1.append(record1)\n    # put_result = dh.put_records('pydatahub_test', 'blob_topic_test', records1)\n    print(put_result)\n\n    # ============================= get cursor =============================\n\n    from datahub.models import CursorType\n    cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n    print(cursor_result)\n\n    # ============================= get blob records =============================\n\n    limit = 10\n    blob_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n    get_result = dh.get_blob_records(project_name, topic_name, '0', blob_cursor_result.cursor, limit)\n    print(get_result)\n    print(get_result.records)\n    print(get_result.records[0])\n\n    # ============================= get tuple records =============================\n\n    limit = 10\n    tuple_cursor_result = dh.get_cursor(project_name, topic_name, '0', CursorType.OLDEST)\n    get_result = dh.get_tuple_records(project_name, topic_name, '0', record_schema, tuple_cursor_result.cursor, limit)\n    print(get_result)\n    print(get_result.records)\n    print(get_result.records[0].values)\n\nExamples\n-----------\n\nsee more examples in `examples <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/examples>`__\n\nRelease\n--------\n\nUpdate `changelog <https://github.com/aliyun/aliyun-datahub-sdk-python/tree/master/changelog.rst>`__, then use `bumpversion <https://github.com/peritus/bumpversion>`__ to update version:\n\n1. bugfix: ``bumpversion patch``\n2. small feature: ``bumpversion minor``\n3. breaking change: ``bumpversion major``\n\nContributing\n------------\n\nFor a development install, clone the repository and then install from\nsource:\n\n::\n\n    git clone https://github.com/aliyun/aliyun-datahub-sdk-python.git\n\nLicense\n-------\n\nLicensed under the `Apache License\n2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__\n\n.. |PyPI version| image:: https://img.shields.io/pypi/v/pydatahub.svg?style=flat-square\n   :target: https://pypi.python.org/pypi/pydatahub\n.. |Docs| image:: https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat-square\n   :target: http://pydatahub.readthedocs.io/zh_CN/latest/\n.. |License| image:: https://img.shields.io/pypi/l/pydatahub.svg?style=flat-square\n   :target: https://github.com/aliyun/aliyun-datahub-sdk-python/blob/master/LICENSE\n.. |Implementation| image:: https://img.shields.io/pypi/implementation/pydatahub.svg?style=flat-square\n\n",
    "bugtrack_url": null,
    "license": "Apache License 2.0",
    "summary": "Datahub Python SDK",
    "version": "2.25.1",
    "project_urls": {
        "Homepage": "https://github.com/aliyun/aliyun-datahub-sdk-python"
    },
    "split_keywords": [
        "pydatahub",
        "python",
        "aliyun",
        "datahub",
        "sdk"
    ],
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "blake2b_256": "ca1448cdb5d7c1f2ccde5292480d5530935ffcab20e92432d408a487b809cfdf",
                "md5": "0c0e36d8a16154de8b063fa32243dc48",
                "sha256": "5a2b1c619a033d199250f9148538cb4d281893cb7bb86c0fcd580d50bfff9ad7"
            },
            "downloads": -1,
            "filename": "pydatahub-2.25.1.tar.gz",
            "has_sig": false,
            "md5_digest": "0c0e36d8a16154de8b063fa32243dc48",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 80880,
            "upload_time": "2024-02-06T09:45:37",
            "upload_time_iso_8601": "2024-02-06T09:45:37.032357Z",
            "url": "https://files.pythonhosted.org/packages/ca/14/48cdb5d7c1f2ccde5292480d5530935ffcab20e92432d408a487b809cfdf/pydatahub-2.25.1.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2024-02-06 09:45:37",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "aliyun",
    "github_project": "aliyun-datahub-sdk-python",
    "travis_ci": false,
    "coveralls": false,
    "github_actions": false,
    "requirements": [
        {
            "name": "funcsigs",
            "specs": [
                [
                    ">=",
                    "1.0.2"
                ]
            ]
        },
        {
            "name": "requests",
            "specs": [
                [
                    ">=",
                    "2.4.0"
                ]
            ]
        },
        {
            "name": "simplejson",
            "specs": [
                [
                    ">=",
                    "3.3.0"
                ]
            ]
        },
        {
            "name": "six",
            "specs": [
                [
                    ">=",
                    "1.1.0"
                ]
            ]
        },
        {
            "name": "enum34",
            "specs": [
                [
                    ">=",
                    "1.1.5"
                ]
            ]
        },
        {
            "name": "crcmod",
            "specs": [
                [
                    ">=",
                    "1.7"
                ]
            ]
        },
        {
            "name": "lz4",
            "specs": [
                [
                    ">=",
                    "2.0.0"
                ]
            ]
        },
        {
            "name": "cprotobuf",
            "specs": [
                [
                    ">=",
                    "0.1.9"
                ]
            ]
        },
        {
            "name": "atomic",
            "specs": [
                [
                    ">=",
                    "0.7.0"
                ]
            ]
        },
        {
            "name": "rwlock",
            "specs": [
                [
                    ">=",
                    "0.0.6"
                ]
            ]
        },
        {
            "name": "urllib3",
            "specs": [
                [
                    ">=",
                    "1.26.10"
                ]
            ]
        }
    ],
    "tox": true,
    "lcname": "pydatahub"
}
        
Elapsed time: 0.18234s