sparrow-python


Namesparrow-python JSON
Version 0.6.2 PyPI version JSON
download
home_pageNone
SummaryNone
upload_time2025-08-21 01:32:09
maintainerNone
docs_urlNone
authorNone
requires_python>=3.10
licenseNone
keywords machine learning cli cv nlp
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # sparrow-python

[![image](https://img.shields.io/badge/Pypi-0.1.7-green.svg)](https://pypi.org/project/sparrow-python)
[![image](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/)
[![image](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)

---

## 快速命令索引

### 🎯 常用命令速查
```bash
# 查看表格数据
spr table_viewer data.csv

# 图像批量处理  
spr mllm_call_images ./photos
spr download_images "关键词" --num_images=100

# 视频处理
spr video_dedup video.mp4
spr frames_to_video frames_dir

# 文件操作
spr pack folder_name        # 压缩
spr split large_file.dat    # 分割大文件
spr kill 8080              # 杀死端口进程

# 项目工具
spr create my_project      # 创建项目
spr clone repo_url         # 克隆仓库
spr gen_key project_name   # 生成SSH密钥

# 服务启动
spr start_server           # 多进程服务器
spr reminder              # 提醒服务
```

### 📖 详细命令说明
所有命令都支持 `sp`、`spr`、`sparrow` 三种调用方式。
使用 `spr <command> --help` 查看具体参数说明。

---

## TODO
- [ ] 多模态图像预处理 考虑使用多进程
- [ ] 找一个可以优雅绘制流程图、示意图的工具,如ppt?
- [ ]  实现一个优雅的TextSplitter

- [ ] prompt调试页面
- [ ] 相关配置指定支持:prompt后端地址;模型参数配置;
- [ ] 
- [ ] 添加测试按钮,模型选项,模型配置
- [ ] 原生git下载支持
- [ ]
- [X] streamlit 多模态chat input: https://github.com/streamlit/streamlit/issues/7409
- [ ] https://github.com/hiyouga/LLaMA-Factory/blob/main/src/llamafactory/chat/vllm_engine.py#L99

识别下面链接的滚动截图:
https://sjh.baidu.com/site/dzfmws.cn/da721a31-476d-42ed-aad1-81c2dc3a66a3



vllm 异步推理示例:

new 实例(from deepwiki)  
```python
import asyncio  
from fastapi import FastAPI, Request  
from fastapi.responses import JSONResponse, StreamingResponse  
from vllm.engine.arg_utils import AsyncEngineArgs  
from vllm.engine.async_llm_engine import AsyncLLMEngine  
from vllm.sampling_params import SamplingParams  
from vllm.utils import random_uuid  
import json  
  
app = FastAPI()  
engine = None  
  
async def init_engine():  
    """初始化 vLLM 引擎"""  
    global engine  
    # 配置引擎参数  
    engine_args = AsyncEngineArgs(  
        model="your-model-name",  # 替换为您的模型  
        tensor_parallel_size=1,   # 根据您的GPU数量调整  
        dtype="auto",  
        max_model_len=2048,  
    )  
    engine = AsyncLLMEngine.from_engine_args(engine_args)  
  
@app.on_event("startup")  
async def startup_event():  
    await init_engine()  
  
@app.post("/generate")  
async def generate(request: Request):  
    """生成文本的端点"""  
    request_dict = await request.json()  
    prompt = request_dict.get("prompt")  
    stream = request_dict.get("stream", False)  
      
    # 创建采样参数  
    sampling_params = SamplingParams(  
        temperature=request_dict.get("temperature", 0.7),  
        max_tokens=request_dict.get("max_tokens", 100),  
        top_p=request_dict.get("top_p", 1.0),  
    )  
      
    request_id = random_uuid()  
    results_generator = engine.generate(prompt, sampling_params, request_id)  
      
    if stream:  
        # 流式响应  
        async def stream_results():  
            async for request_output in results_generator:  
                text_outputs = [output.text for output in request_output.outputs]  
                ret = {"text": text_outputs}
                yield f"data: {json.dumps(ret)}\n\n"  
          
        return StreamingResponse(stream_results(), media_type="text/plain")  
    else:  
        # 非流式响应  
        final_output = None  
        async for request_output in results_generator:  
            final_output = request_output  
          
        text_outputs = [output.text for output in final_output.outputs]  
        return JSONResponse({"text": text_outputs})  
  
if __name__ == "__main__":  
    import uvicorn  
    uvicorn.run(app, host="0.0.0.0", port=8000)
```

```python
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import uvicorn
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams
import torch

# Define request data model
class RequestData(BaseModel):
    prompts: List[str]
    max_tokens: int = 2048
    temperature: float = 0.7

# Initialize FastAPI app
app = FastAPI()

# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize AsyncLLMEngine
engine_args = AsyncEngineArgs(
    model="your-model-name",  # Replace with your model name
    dtype="bfloat16",
    gpu_memory_utilization=0.8,
    max_model_len=4096,
    trust_remote_code=True
)
llm_engine = AsyncLLMEngine.from_engine_args(engine_args)

# Define the inference endpoint
@app.post("/predict")
async def generate_text(data: RequestData):
    sampling_params = SamplingParams(
        max_tokens=data.max_tokens,
        temperature=data.temperature
    )
    request_id = "unique_request_id"  # Generate a unique request ID
    results_generator = llm_engine.generate(data.prompts, sampling_params, request_id)
  
    final_output = None
    async for request_output in results_generator:
        final_output = request_output
  
    assert final_output is not None
    text_outputs = [output.text for output in final_output.outputs]
    return {"responses": text_outputs}

# Run the server
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

```



## 待添加脚本

## Install

```bash
pip install sparrow-python
# Or dev version
pip install sparrow-python[dev]
# Or
pip install -e .
# Or
pip install -e .[dev]
```

## Usage

### Multiprocessing SyncManager

Open server first:

```bash
$ spr start-server
```

The defualt port `50001`.

(Process1) productor:

```python
from sparrow.multiprocess.client import Client

client = Client(port=50001)
client.update_dict({'a': 1, 'b': 2})
```

(Process2) consumer:

```python
from sparrow.multiprocess.client import Client

client = Client(port=50001)
print(client.get_dict_data())

>> > {'a': 1, 'b': 2}
```

### 常用工具

#### 数据处理与查看
- **表格查看器**
```bash
# 基本用法
spr table_viewer sample_products.csv --port 8081

# 指定图像列并设置端口
spr table_viewer "products.xlsx" --image_columns="product_image,thumbnail" --port=9090

# 指定工作表
spr table_viewer "report.xlsx" --sheet_name="Sheet2"
```

- **文本去重**
```bash
# 使用编辑距离去重
spr deduplicate input.txt output.txt --method=edit --threshold=0.8

# 使用ROUGE相似度去重
spr deduplicate data.csv clean.csv --method=rouge --target_col=content
```

- **文件压缩与解压**
支持格式:"zip", "tar", "gztar", "bztar", "xztar"
```bash
# 压缩文件/文件夹
spr pack pack_dir

# 解压文件
spr unpack filename extract_dir
```

- **大文件分割与合并**
```bash
# 分割大文件 (默认1GB块)
spr split large_file.dat

# 合并分割文件
spr merge large_file.dat
```

#### 项目管理
- **项目脚手架**
```bash
spr create awesome-project
```

- **Git仓库克隆**
```bash
# 基本克隆
spr clone https://github.com/user/repo.git

# 指定分支和保存路径
spr clone https://github.com/user/repo.git --branch=dev --save_path=./my_project
```

- **自动Git提交监控**
```bash
spr auto_commit --interval=60
```

- **SSH密钥生成**
```bash
spr gen_key project_name --email=your@email.com
```

- **配置管理**
```bash
# 初始化配置文件
spr init_config

# 查看当前配置
spr get_config

# 查看特定配置项
spr get_config mllm.model
```

#### 系统工具
- **端口进程管理**
```bash
# 杀死指定端口进程
spr kill 8080

# 获取本机IP
spr get_ip
spr get_ip --env=outer  # 获取外网IP
```

- **Docker管理**
```bash
# 保存所有Docker镜像
spr save_docker_images

# 加载Docker镜像
spr load_docker_images

# Docker GPU状态监控
spr docker_gpu_stat
```

#### 多媒体处理
- **视频帧去重**
```bash
# 基本去重 (默认phash算法)
spr video_dedup video.mp4

# 自定义参数
spr video_dedup video.mp4 --method=dhash --threshold=5 --step=2 --workers=4
```

- **图像帧转视频**
```bash
# 将帧目录转换为视频
spr frames_to_video frames_dir --fps=24

# 一站式:去重+生成视频
spr dedup_and_create_video video.mp4 --video_fps=15
```

- **视频字幕处理**
```bash
# 自动生成字幕(转录+翻译)
spr subtitles video.mp4

# 翻译现有字幕
spr translate_subt subtitles.srt

# 合并双语字幕
spr merge_subtitles en.srt zh.srt
```

#### 图像下载与处理
- **批量图像下载**
```bash
# 单关键词下载
spr download_images "猫咪" --num_images=100

# 多关键词,多搜索引擎
spr download_images "猫咪,狗狗" --engines="bing,google,baidu" --save_dir="animals"
```

#### 大模型与AI
- **批量图像识别(表格)**
```bash
# 基本用法
spr mllm_call_table images.xlsx --image_col=图片路径

# 自定义模型和提示词
spr mllm_call_table data.csv \
    --model="gpt-4o-mini" \
    --text_prompt="详细描述这张图片" \
    --output_file="results.csv"
```

- **批量图像识别(文件夹)**
```bash
# 处理文件夹中所有图片
spr mllm_call_images ./photos --recursive=True

# 指定文件类型和数量限制
spr mllm_call_images ./images \
    --extensions=".jpg,.png" \
    --max_num=50 \
    --output_file="analysis.csv"
```

#### 网络与API
- **异步HTTP请求**
```bash
# POST请求
spr post "https://api.example.com" '{"key": "value"}' --concurrent=10

# GET请求
spr get_url "https://api.example.com" --concurrent=5
```

- **文件传输**
```bash
# P2P文件传输 (基于croc)
spr send file.txt
spr recv  # 在另一台机器上接收

# 云存储传输
spr send2 file.txt workspace_name
spr recv2 file.txt workspace_name
```

#### 数据库与服务
- **启动多进程同步服务器**
```bash
spr start_server --port=50001
```

- **Milvus向量数据库**
```bash
# 启动Milvus服务
spr milvus start

# 停止Milvus服务
spr milvus stop
```

- **数据存储 (FlaxKV)**
```bash
# 存储文件到指定空间
spr set mykey /path/to/file.txt

# 获取存储的数据
spr get mykey

# 查看所有存储的键
spr keys

# 清理过期数据
spr clean
```

#### 开发工具
- **软件安装**
```bash
# 安装Node.js (通过NVM)
spr install_node --version=18

# 安装/卸载Neovim
spr install_nvim --version=0.9.2
spr uninstall_nvim
```

- **定时器工具**
```bash
spr timer --dt=0.5  # 0.5秒间隔定时器
```

- **性能测试**
```bash
# 测试PyTorch环境
spr test_torch
```

#### 高级功能
- **提醒服务**
```bash
# 启动Web提醒服务
spr reminder --port=8000
```

### Some useful functions

> `sparrow.relp`
> Relative path, which is used to read or save files more easily.

> `sparrow.performance.MeasureTime`
> For measuring time (including gpu time)

> `sparrow.performance.get_process_memory`
> Get the memory size occupied by the process

> `sparrow.performance.get_virtual_memory`
> Get virtual machine memory information

> `sparrow.add_env_path`
> Add python environment variable (use relative file path)

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "sparrow-python",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.10",
    "maintainer_email": null,
    "keywords": "Machine Learning, cli, cv, nlp",
    "author": null,
    "author_email": "kunyuan <beidongjiedeguang@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/ef/79/f5c67d5102bf9474ba809aa1e6eaa747bdd2f8ea151125924742e4ab9e61/sparrow_python-0.6.2.tar.gz",
    "platform": null,
    "description": "# sparrow-python\n\n[![image](https://img.shields.io/badge/Pypi-0.1.7-green.svg)](https://pypi.org/project/sparrow-python)\n[![image](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/)\n[![image](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)\n\n---\n\n## \u5feb\u901f\u547d\u4ee4\u7d22\u5f15\n\n### \ud83c\udfaf \u5e38\u7528\u547d\u4ee4\u901f\u67e5\n```bash\n# \u67e5\u770b\u8868\u683c\u6570\u636e\nspr table_viewer data.csv\n\n# \u56fe\u50cf\u6279\u91cf\u5904\u7406  \nspr mllm_call_images ./photos\nspr download_images \"\u5173\u952e\u8bcd\" --num_images=100\n\n# \u89c6\u9891\u5904\u7406\nspr video_dedup video.mp4\nspr frames_to_video frames_dir\n\n# \u6587\u4ef6\u64cd\u4f5c\nspr pack folder_name        # \u538b\u7f29\nspr split large_file.dat    # \u5206\u5272\u5927\u6587\u4ef6\nspr kill 8080              # \u6740\u6b7b\u7aef\u53e3\u8fdb\u7a0b\n\n# \u9879\u76ee\u5de5\u5177\nspr create my_project      # \u521b\u5efa\u9879\u76ee\nspr clone repo_url         # \u514b\u9686\u4ed3\u5e93\nspr gen_key project_name   # \u751f\u6210SSH\u5bc6\u94a5\n\n# \u670d\u52a1\u542f\u52a8\nspr start_server           # \u591a\u8fdb\u7a0b\u670d\u52a1\u5668\nspr reminder              # \u63d0\u9192\u670d\u52a1\n```\n\n### \ud83d\udcd6 \u8be6\u7ec6\u547d\u4ee4\u8bf4\u660e\n\u6240\u6709\u547d\u4ee4\u90fd\u652f\u6301 `sp`\u3001`spr`\u3001`sparrow` \u4e09\u79cd\u8c03\u7528\u65b9\u5f0f\u3002\n\u4f7f\u7528 `spr <command> --help` \u67e5\u770b\u5177\u4f53\u53c2\u6570\u8bf4\u660e\u3002\n\n---\n\n## TODO\n- [ ] \u591a\u6a21\u6001\u56fe\u50cf\u9884\u5904\u7406 \u8003\u8651\u4f7f\u7528\u591a\u8fdb\u7a0b\n- [ ] \u627e\u4e00\u4e2a\u53ef\u4ee5\u4f18\u96c5\u7ed8\u5236\u6d41\u7a0b\u56fe\u3001\u793a\u610f\u56fe\u7684\u5de5\u5177\uff0c\u5982ppt\uff1f\n- [ ]  \u5b9e\u73b0\u4e00\u4e2a\u4f18\u96c5\u7684TextSplitter\n\n- [ ] prompt\u8c03\u8bd5\u9875\u9762\n- [ ] \u76f8\u5173\u914d\u7f6e\u6307\u5b9a\u652f\u6301\uff1aprompt\u540e\u7aef\u5730\u5740\uff1b\u6a21\u578b\u53c2\u6570\u914d\u7f6e\uff1b\n- [ ] \n- [ ] \u6dfb\u52a0\u6d4b\u8bd5\u6309\u94ae\uff0c\u6a21\u578b\u9009\u9879\uff0c\u6a21\u578b\u914d\u7f6e\n- [ ] \u539f\u751fgit\u4e0b\u8f7d\u652f\u6301\n- [ ]\n- [X] streamlit \u591a\u6a21\u6001chat input: https://github.com/streamlit/streamlit/issues/7409\n- [ ] https://github.com/hiyouga/LLaMA-Factory/blob/main/src/llamafactory/chat/vllm_engine.py#L99\n\n\u8bc6\u522b\u4e0b\u9762\u94fe\u63a5\u7684\u6eda\u52a8\u622a\u56fe\uff1a\nhttps://sjh.baidu.com/site/dzfmws.cn/da721a31-476d-42ed-aad1-81c2dc3a66a3\n\n\n\nvllm \u5f02\u6b65\u63a8\u7406\u793a\u4f8b\uff1a\n\nnew \u5b9e\u4f8b(from deepwiki)  \n```python\nimport asyncio  \nfrom fastapi import FastAPI, Request  \nfrom fastapi.responses import JSONResponse, StreamingResponse  \nfrom vllm.engine.arg_utils import AsyncEngineArgs  \nfrom vllm.engine.async_llm_engine import AsyncLLMEngine  \nfrom vllm.sampling_params import SamplingParams  \nfrom vllm.utils import random_uuid  \nimport json  \n  \napp = FastAPI()  \nengine = None  \n  \nasync def init_engine():  \n    \"\"\"\u521d\u59cb\u5316 vLLM \u5f15\u64ce\"\"\"  \n    global engine  \n    # \u914d\u7f6e\u5f15\u64ce\u53c2\u6570  \n    engine_args = AsyncEngineArgs(  \n        model=\"your-model-name\",  # \u66ff\u6362\u4e3a\u60a8\u7684\u6a21\u578b  \n        tensor_parallel_size=1,   # \u6839\u636e\u60a8\u7684GPU\u6570\u91cf\u8c03\u6574  \n        dtype=\"auto\",  \n        max_model_len=2048,  \n    )  \n    engine = AsyncLLMEngine.from_engine_args(engine_args)  \n  \n@app.on_event(\"startup\")  \nasync def startup_event():  \n    await init_engine()  \n  \n@app.post(\"/generate\")  \nasync def generate(request: Request):  \n    \"\"\"\u751f\u6210\u6587\u672c\u7684\u7aef\u70b9\"\"\"  \n    request_dict = await request.json()  \n    prompt = request_dict.get(\"prompt\")  \n    stream = request_dict.get(\"stream\", False)  \n      \n    # \u521b\u5efa\u91c7\u6837\u53c2\u6570  \n    sampling_params = SamplingParams(  \n        temperature=request_dict.get(\"temperature\", 0.7),  \n        max_tokens=request_dict.get(\"max_tokens\", 100),  \n        top_p=request_dict.get(\"top_p\", 1.0),  \n    )  \n      \n    request_id = random_uuid()  \n    results_generator = engine.generate(prompt, sampling_params, request_id)  \n      \n    if stream:  \n        # \u6d41\u5f0f\u54cd\u5e94  \n        async def stream_results():  \n            async for request_output in results_generator:  \n                text_outputs = [output.text for output in request_output.outputs]  \n                ret = {\"text\": text_outputs}\n                yield f\"data: {json.dumps(ret)}\\n\\n\"  \n          \n        return StreamingResponse(stream_results(), media_type=\"text/plain\")  \n    else:  \n        # \u975e\u6d41\u5f0f\u54cd\u5e94  \n        final_output = None  \n        async for request_output in results_generator:  \n            final_output = request_output  \n          \n        text_outputs = [output.text for output in final_output.outputs]  \n        return JSONResponse({\"text\": text_outputs})  \n  \nif __name__ == \"__main__\":  \n    import uvicorn  \n    uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n```\n\n```python\nfrom fastapi import FastAPI\nfrom pydantic import BaseModel\nfrom typing import List\nimport uvicorn\nfrom vllm.engine.arg_utils import AsyncEngineArgs\nfrom vllm.engine.async_llm_engine import AsyncLLMEngine\nfrom vllm.sampling_params import SamplingParams\nimport torch\n\n# Define request data model\nclass RequestData(BaseModel):\n    prompts: List[str]\n    max_tokens: int = 2048\n    temperature: float = 0.7\n\n# Initialize FastAPI app\napp = FastAPI()\n\n# Determine device\ndevice = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\n# Initialize AsyncLLMEngine\nengine_args = AsyncEngineArgs(\n    model=\"your-model-name\",  # Replace with your model name\n    dtype=\"bfloat16\",\n    gpu_memory_utilization=0.8,\n    max_model_len=4096,\n    trust_remote_code=True\n)\nllm_engine = AsyncLLMEngine.from_engine_args(engine_args)\n\n# Define the inference endpoint\n@app.post(\"/predict\")\nasync def generate_text(data: RequestData):\n    sampling_params = SamplingParams(\n        max_tokens=data.max_tokens,\n        temperature=data.temperature\n    )\n    request_id = \"unique_request_id\"  # Generate a unique request ID\n    results_generator = llm_engine.generate(data.prompts, sampling_params, request_id)\n  \n    final_output = None\n    async for request_output in results_generator:\n        final_output = request_output\n  \n    assert final_output is not None\n    text_outputs = [output.text for output in final_output.outputs]\n    return {\"responses\": text_outputs}\n\n# Run the server\nif __name__ == \"__main__\":\n    uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n\n```\n\n\n\n## \u5f85\u6dfb\u52a0\u811a\u672c\n\n## Install\n\n```bash\npip install sparrow-python\n# Or dev version\npip install sparrow-python[dev]\n# Or\npip install -e .\n# Or\npip install -e .[dev]\n```\n\n## Usage\n\n### Multiprocessing SyncManager\n\nOpen server first:\n\n```bash\n$ spr start-server\n```\n\nThe defualt port `50001`.\n\n(Process1) productor:\n\n```python\nfrom sparrow.multiprocess.client import Client\n\nclient = Client(port=50001)\nclient.update_dict({'a': 1, 'b': 2})\n```\n\n(Process2) consumer:\n\n```python\nfrom sparrow.multiprocess.client import Client\n\nclient = Client(port=50001)\nprint(client.get_dict_data())\n\n>> > {'a': 1, 'b': 2}\n```\n\n### \u5e38\u7528\u5de5\u5177\n\n#### \u6570\u636e\u5904\u7406\u4e0e\u67e5\u770b\n- **\u8868\u683c\u67e5\u770b\u5668**\n```bash\n# \u57fa\u672c\u7528\u6cd5\nspr table_viewer sample_products.csv --port 8081\n\n# \u6307\u5b9a\u56fe\u50cf\u5217\u5e76\u8bbe\u7f6e\u7aef\u53e3\nspr table_viewer \"products.xlsx\" --image_columns=\"product_image,thumbnail\" --port=9090\n\n# \u6307\u5b9a\u5de5\u4f5c\u8868\nspr table_viewer \"report.xlsx\" --sheet_name=\"Sheet2\"\n```\n\n- **\u6587\u672c\u53bb\u91cd**\n```bash\n# \u4f7f\u7528\u7f16\u8f91\u8ddd\u79bb\u53bb\u91cd\nspr deduplicate input.txt output.txt --method=edit --threshold=0.8\n\n# \u4f7f\u7528ROUGE\u76f8\u4f3c\u5ea6\u53bb\u91cd\nspr deduplicate data.csv clean.csv --method=rouge --target_col=content\n```\n\n- **\u6587\u4ef6\u538b\u7f29\u4e0e\u89e3\u538b**\n\u652f\u6301\u683c\u5f0f\uff1a\"zip\", \"tar\", \"gztar\", \"bztar\", \"xztar\"\n```bash\n# \u538b\u7f29\u6587\u4ef6/\u6587\u4ef6\u5939\nspr pack pack_dir\n\n# \u89e3\u538b\u6587\u4ef6\nspr unpack filename extract_dir\n```\n\n- **\u5927\u6587\u4ef6\u5206\u5272\u4e0e\u5408\u5e76**\n```bash\n# \u5206\u5272\u5927\u6587\u4ef6 (\u9ed8\u8ba41GB\u5757)\nspr split large_file.dat\n\n# \u5408\u5e76\u5206\u5272\u6587\u4ef6\nspr merge large_file.dat\n```\n\n#### \u9879\u76ee\u7ba1\u7406\n- **\u9879\u76ee\u811a\u624b\u67b6**\n```bash\nspr create awesome-project\n```\n\n- **Git\u4ed3\u5e93\u514b\u9686**\n```bash\n# \u57fa\u672c\u514b\u9686\nspr clone https://github.com/user/repo.git\n\n# \u6307\u5b9a\u5206\u652f\u548c\u4fdd\u5b58\u8def\u5f84\nspr clone https://github.com/user/repo.git --branch=dev --save_path=./my_project\n```\n\n- **\u81ea\u52a8Git\u63d0\u4ea4\u76d1\u63a7**\n```bash\nspr auto_commit --interval=60\n```\n\n- **SSH\u5bc6\u94a5\u751f\u6210**\n```bash\nspr gen_key project_name --email=your@email.com\n```\n\n- **\u914d\u7f6e\u7ba1\u7406**\n```bash\n# \u521d\u59cb\u5316\u914d\u7f6e\u6587\u4ef6\nspr init_config\n\n# \u67e5\u770b\u5f53\u524d\u914d\u7f6e\nspr get_config\n\n# \u67e5\u770b\u7279\u5b9a\u914d\u7f6e\u9879\nspr get_config mllm.model\n```\n\n#### \u7cfb\u7edf\u5de5\u5177\n- **\u7aef\u53e3\u8fdb\u7a0b\u7ba1\u7406**\n```bash\n# \u6740\u6b7b\u6307\u5b9a\u7aef\u53e3\u8fdb\u7a0b\nspr kill 8080\n\n# \u83b7\u53d6\u672c\u673aIP\nspr get_ip\nspr get_ip --env=outer  # \u83b7\u53d6\u5916\u7f51IP\n```\n\n- **Docker\u7ba1\u7406**\n```bash\n# \u4fdd\u5b58\u6240\u6709Docker\u955c\u50cf\nspr save_docker_images\n\n# \u52a0\u8f7dDocker\u955c\u50cf\nspr load_docker_images\n\n# Docker GPU\u72b6\u6001\u76d1\u63a7\nspr docker_gpu_stat\n```\n\n#### \u591a\u5a92\u4f53\u5904\u7406\n- **\u89c6\u9891\u5e27\u53bb\u91cd**\n```bash\n# \u57fa\u672c\u53bb\u91cd (\u9ed8\u8ba4phash\u7b97\u6cd5)\nspr video_dedup video.mp4\n\n# \u81ea\u5b9a\u4e49\u53c2\u6570\nspr video_dedup video.mp4 --method=dhash --threshold=5 --step=2 --workers=4\n```\n\n- **\u56fe\u50cf\u5e27\u8f6c\u89c6\u9891**\n```bash\n# \u5c06\u5e27\u76ee\u5f55\u8f6c\u6362\u4e3a\u89c6\u9891\nspr frames_to_video frames_dir --fps=24\n\n# \u4e00\u7ad9\u5f0f\uff1a\u53bb\u91cd+\u751f\u6210\u89c6\u9891\nspr dedup_and_create_video video.mp4 --video_fps=15\n```\n\n- **\u89c6\u9891\u5b57\u5e55\u5904\u7406**\n```bash\n# \u81ea\u52a8\u751f\u6210\u5b57\u5e55\uff08\u8f6c\u5f55+\u7ffb\u8bd1\uff09\nspr subtitles video.mp4\n\n# \u7ffb\u8bd1\u73b0\u6709\u5b57\u5e55\nspr translate_subt subtitles.srt\n\n# \u5408\u5e76\u53cc\u8bed\u5b57\u5e55\nspr merge_subtitles en.srt zh.srt\n```\n\n#### \u56fe\u50cf\u4e0b\u8f7d\u4e0e\u5904\u7406\n- **\u6279\u91cf\u56fe\u50cf\u4e0b\u8f7d**\n```bash\n# \u5355\u5173\u952e\u8bcd\u4e0b\u8f7d\nspr download_images \"\u732b\u54aa\" --num_images=100\n\n# \u591a\u5173\u952e\u8bcd\uff0c\u591a\u641c\u7d22\u5f15\u64ce\nspr download_images \"\u732b\u54aa,\u72d7\u72d7\" --engines=\"bing,google,baidu\" --save_dir=\"animals\"\n```\n\n#### \u5927\u6a21\u578b\u4e0eAI\n- **\u6279\u91cf\u56fe\u50cf\u8bc6\u522b\uff08\u8868\u683c\uff09**\n```bash\n# \u57fa\u672c\u7528\u6cd5\nspr mllm_call_table images.xlsx --image_col=\u56fe\u7247\u8def\u5f84\n\n# \u81ea\u5b9a\u4e49\u6a21\u578b\u548c\u63d0\u793a\u8bcd\nspr mllm_call_table data.csv \\\n    --model=\"gpt-4o-mini\" \\\n    --text_prompt=\"\u8be6\u7ec6\u63cf\u8ff0\u8fd9\u5f20\u56fe\u7247\" \\\n    --output_file=\"results.csv\"\n```\n\n- **\u6279\u91cf\u56fe\u50cf\u8bc6\u522b\uff08\u6587\u4ef6\u5939\uff09**\n```bash\n# \u5904\u7406\u6587\u4ef6\u5939\u4e2d\u6240\u6709\u56fe\u7247\nspr mllm_call_images ./photos --recursive=True\n\n# \u6307\u5b9a\u6587\u4ef6\u7c7b\u578b\u548c\u6570\u91cf\u9650\u5236\nspr mllm_call_images ./images \\\n    --extensions=\".jpg,.png\" \\\n    --max_num=50 \\\n    --output_file=\"analysis.csv\"\n```\n\n#### \u7f51\u7edc\u4e0eAPI\n- **\u5f02\u6b65HTTP\u8bf7\u6c42**\n```bash\n# POST\u8bf7\u6c42\nspr post \"https://api.example.com\" '{\"key\": \"value\"}' --concurrent=10\n\n# GET\u8bf7\u6c42\nspr get_url \"https://api.example.com\" --concurrent=5\n```\n\n- **\u6587\u4ef6\u4f20\u8f93**\n```bash\n# P2P\u6587\u4ef6\u4f20\u8f93 (\u57fa\u4e8ecroc)\nspr send file.txt\nspr recv  # \u5728\u53e6\u4e00\u53f0\u673a\u5668\u4e0a\u63a5\u6536\n\n# \u4e91\u5b58\u50a8\u4f20\u8f93\nspr send2 file.txt workspace_name\nspr recv2 file.txt workspace_name\n```\n\n#### \u6570\u636e\u5e93\u4e0e\u670d\u52a1\n- **\u542f\u52a8\u591a\u8fdb\u7a0b\u540c\u6b65\u670d\u52a1\u5668**\n```bash\nspr start_server --port=50001\n```\n\n- **Milvus\u5411\u91cf\u6570\u636e\u5e93**\n```bash\n# \u542f\u52a8Milvus\u670d\u52a1\nspr milvus start\n\n# \u505c\u6b62Milvus\u670d\u52a1\nspr milvus stop\n```\n\n- **\u6570\u636e\u5b58\u50a8 (FlaxKV)**\n```bash\n# \u5b58\u50a8\u6587\u4ef6\u5230\u6307\u5b9a\u7a7a\u95f4\nspr set mykey /path/to/file.txt\n\n# \u83b7\u53d6\u5b58\u50a8\u7684\u6570\u636e\nspr get mykey\n\n# \u67e5\u770b\u6240\u6709\u5b58\u50a8\u7684\u952e\nspr keys\n\n# \u6e05\u7406\u8fc7\u671f\u6570\u636e\nspr clean\n```\n\n#### \u5f00\u53d1\u5de5\u5177\n- **\u8f6f\u4ef6\u5b89\u88c5**\n```bash\n# \u5b89\u88c5Node.js (\u901a\u8fc7NVM)\nspr install_node --version=18\n\n# \u5b89\u88c5/\u5378\u8f7dNeovim\nspr install_nvim --version=0.9.2\nspr uninstall_nvim\n```\n\n- **\u5b9a\u65f6\u5668\u5de5\u5177**\n```bash\nspr timer --dt=0.5  # 0.5\u79d2\u95f4\u9694\u5b9a\u65f6\u5668\n```\n\n- **\u6027\u80fd\u6d4b\u8bd5**\n```bash\n# \u6d4b\u8bd5PyTorch\u73af\u5883\nspr test_torch\n```\n\n#### \u9ad8\u7ea7\u529f\u80fd\n- **\u63d0\u9192\u670d\u52a1**\n```bash\n# \u542f\u52a8Web\u63d0\u9192\u670d\u52a1\nspr reminder --port=8000\n```\n\n### Some useful functions\n\n> `sparrow.relp`\n> Relative path, which is used to read or save files more easily.\n\n> `sparrow.performance.MeasureTime`\n> For measuring time (including gpu time)\n\n> `sparrow.performance.get_process_memory`\n> Get the memory size occupied by the process\n\n> `sparrow.performance.get_virtual_memory`\n> Get virtual machine memory information\n\n> `sparrow.add_env_path`\n> Add python environment variable (use relative file path)\n",
    "bugtrack_url": null,
    "license": null,
    "summary": null,
    "version": "0.6.2",
    "project_urls": {
        "Issues": "https://github.com/beidongjiedeguang/sparrow/issues",
        "Source": "https://github.com/beidongjiedeguang/sparrow",
        "documentation": "https://github.com/beidongjiedeguang/sparrow#sparrow_python",
        "homepage": "https://github.com/beidongjiedeguang/sparrow",
        "repository": "https://github.com/beidongjiedeguang/sparrow"
    },
    "split_keywords": [
        "machine learning",
        " cli",
        " cv",
        " nlp"
    ],
    "urls": [
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "a45c30372d7ab837d70ff243075555bfadae5a6d83c2ec7d24b4b87b99cf1ca6",
                "md5": "f053d57972f035c27a43bda8ffd38ff5",
                "sha256": "2824ae2af54ab95dd84d9df6266b7a4b5e7ff0e42f873e8c3841d2853a5b4709"
            },
            "downloads": -1,
            "filename": "sparrow_python-0.6.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "f053d57972f035c27a43bda8ffd38ff5",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.10",
            "size": 1776372,
            "upload_time": "2025-08-21T01:32:07",
            "upload_time_iso_8601": "2025-08-21T01:32:07.311592Z",
            "url": "https://files.pythonhosted.org/packages/a4/5c/30372d7ab837d70ff243075555bfadae5a6d83c2ec7d24b4b87b99cf1ca6/sparrow_python-0.6.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "ef79f5c67d5102bf9474ba809aa1e6eaa747bdd2f8ea151125924742e4ab9e61",
                "md5": "6b2160af6866076643e8e23a4099d116",
                "sha256": "ac1f3946585348cdf5cce22b61a35f38c20fdf115cf0593b6912e734fb31ad90"
            },
            "downloads": -1,
            "filename": "sparrow_python-0.6.2.tar.gz",
            "has_sig": false,
            "md5_digest": "6b2160af6866076643e8e23a4099d116",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.10",
            "size": 1692605,
            "upload_time": "2025-08-21T01:32:09",
            "upload_time_iso_8601": "2025-08-21T01:32:09.429560Z",
            "url": "https://files.pythonhosted.org/packages/ef/79/f5c67d5102bf9474ba809aa1e6eaa747bdd2f8ea151125924742e4ab9e61/sparrow_python-0.6.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-08-21 01:32:09",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "beidongjiedeguang",
    "github_project": "sparrow",
    "github_not_found": true,
    "lcname": "sparrow-python"
}
        
Elapsed time: 2.08221s