# Lingua SQL
一个基于 DeepSeek 和 ChromaDB 的文本转 SQL(Text-to-SQL)流水线工具。
## 项目简介
Lingua SQL 旨在帮助用户通过自然语言问题自动生成 SQL 查询,支持自定义训练、数据库结构导入、示例问答训练等,适用于数据分析、智能问答等场景。
## 主要功能
- 支持 DeepSeek、OpenAI 等大模型 API
- 支持 ChromaDB 作为向量数据库
- 支持 MySQL 数据库结构自动导入
- 支持自定义 DDL、示例问答、文档训练
- 支持持久化和内存两种存储方式
- 提供丰富的训练与推理接口
## 安装方法
建议使用 Python 3.8 及以上版本。
```bash
pip install -r requirements.txt
```
或使用 `pyproject.toml` 进行依赖管理。
## 快速开始
### 1. 基本用法
```python
import os
from dotenv import load_dotenv
from lingua_sql import LinguaSQL
# 加载环境变量
load_dotenv()
# 初始化 lingua_sql
nl = LinguaSQL(config={
"api_key": os.getenv("DEEPSEEK_API_KEY"),
"model": "deepseek-chat",
"client": "in-memory" # 可选 "persistent"
})
# 添加 DDL
nl.train(ddl="""
CREATE TABLE customers (
id INT PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100),
created_at TIMESTAMP
);
""")
# 添加示例问答
nl.train(
question="查询最近注册的5个客户",
sql="SELECT name, email, created_at FROM customers ORDER BY created_at DESC LIMIT 5;"
)
# 生成 SQL
question = "查询订单金额最高的前3个客户"
sql = nl.ask(question)
print(f"问题: {question}")
print(f"生成的 SQL: {sql}")
```
### 2. 数据库结构自动导入
```python
from lingua_sql.database.mysql_connector import MySQLConnector
# 初始化数据库连接
conn = MySQLConnector(
host="localhost",
user="root",
password="your_password",
database="your_db"
)
conn.connect()
# 获取所有表结构并导入
for table in conn.get_all_tables():
ddl = ... # 参见 examples/database_usage.py
nl.train(ddl=ddl)
conn.disconnect()
```
更多用法请参考 `examples/` 目录。
## 联系方式
作者:殷旭
邮箱:2337302325@qq.com
## 许可证
MIT License
cd lingua_sql
git init
git remote add origin git@github.com:4869-yinxu/lingua_sql.git
git add .
git commit -m "Release version 0.1.3"
git tag v0.1.3
git push origin main
git push origin v0.1.3
Raw data
{
"_id": null,
"home_page": null,
"name": "lingua-sql",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": null,
"keywords": "ai, chromadb, database, deepseek, nlp, text-to-sql",
"author": null,
"author_email": "\u6bb7\u65ed <2337302325@qq.com>",
"download_url": "https://files.pythonhosted.org/packages/80/d3/9cfdc19cca4b90c6c2031ac2779614f21dd927a5ad0d2da4541215c42276/lingua_sql-0.1.4.tar.gz",
"platform": null,
"description": "# Lingua SQL\n\n\u4e00\u4e2a\u57fa\u4e8e DeepSeek \u548c ChromaDB \u7684\u6587\u672c\u8f6c SQL\uff08Text-to-SQL\uff09\u6d41\u6c34\u7ebf\u5de5\u5177\u3002\n\n## \u9879\u76ee\u7b80\u4ecb\nLingua SQL \u65e8\u5728\u5e2e\u52a9\u7528\u6237\u901a\u8fc7\u81ea\u7136\u8bed\u8a00\u95ee\u9898\u81ea\u52a8\u751f\u6210 SQL \u67e5\u8be2\uff0c\u652f\u6301\u81ea\u5b9a\u4e49\u8bad\u7ec3\u3001\u6570\u636e\u5e93\u7ed3\u6784\u5bfc\u5165\u3001\u793a\u4f8b\u95ee\u7b54\u8bad\u7ec3\u7b49\uff0c\u9002\u7528\u4e8e\u6570\u636e\u5206\u6790\u3001\u667a\u80fd\u95ee\u7b54\u7b49\u573a\u666f\u3002\n\n## \u4e3b\u8981\u529f\u80fd\n- \u652f\u6301 DeepSeek\u3001OpenAI \u7b49\u5927\u6a21\u578b API\n- \u652f\u6301 ChromaDB \u4f5c\u4e3a\u5411\u91cf\u6570\u636e\u5e93\n- \u652f\u6301 MySQL \u6570\u636e\u5e93\u7ed3\u6784\u81ea\u52a8\u5bfc\u5165\n- \u652f\u6301\u81ea\u5b9a\u4e49 DDL\u3001\u793a\u4f8b\u95ee\u7b54\u3001\u6587\u6863\u8bad\u7ec3\n- \u652f\u6301\u6301\u4e45\u5316\u548c\u5185\u5b58\u4e24\u79cd\u5b58\u50a8\u65b9\u5f0f\n- \u63d0\u4f9b\u4e30\u5bcc\u7684\u8bad\u7ec3\u4e0e\u63a8\u7406\u63a5\u53e3\n\n## \u5b89\u88c5\u65b9\u6cd5\n\n\u5efa\u8bae\u4f7f\u7528 Python 3.8 \u53ca\u4ee5\u4e0a\u7248\u672c\u3002\n\n```bash\npip install -r requirements.txt\n```\n\u6216\u4f7f\u7528 `pyproject.toml` \u8fdb\u884c\u4f9d\u8d56\u7ba1\u7406\u3002\n\n## \u5feb\u901f\u5f00\u59cb\n\n### 1. \u57fa\u672c\u7528\u6cd5\n```python\nimport os\nfrom dotenv import load_dotenv\nfrom lingua_sql import LinguaSQL\n\n# \u52a0\u8f7d\u73af\u5883\u53d8\u91cf\nload_dotenv()\n\n# \u521d\u59cb\u5316 lingua_sql\nnl = LinguaSQL(config={\n \"api_key\": os.getenv(\"DEEPSEEK_API_KEY\"),\n \"model\": \"deepseek-chat\",\n \"client\": \"in-memory\" # \u53ef\u9009 \"persistent\"\n})\n\n# \u6dfb\u52a0 DDL\nnl.train(ddl=\"\"\"\nCREATE TABLE customers (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n email VARCHAR(100),\n created_at TIMESTAMP\n);\n\"\"\")\n\n# \u6dfb\u52a0\u793a\u4f8b\u95ee\u7b54\nnl.train(\n question=\"\u67e5\u8be2\u6700\u8fd1\u6ce8\u518c\u76845\u4e2a\u5ba2\u6237\",\n sql=\"SELECT name, email, created_at FROM customers ORDER BY created_at DESC LIMIT 5;\"\n)\n\n# \u751f\u6210 SQL\nquestion = \"\u67e5\u8be2\u8ba2\u5355\u91d1\u989d\u6700\u9ad8\u7684\u524d3\u4e2a\u5ba2\u6237\"\nsql = nl.ask(question)\nprint(f\"\u95ee\u9898: {question}\")\nprint(f\"\u751f\u6210\u7684 SQL: {sql}\")\n```\n\n### 2. \u6570\u636e\u5e93\u7ed3\u6784\u81ea\u52a8\u5bfc\u5165\n```python\nfrom lingua_sql.database.mysql_connector import MySQLConnector\n\n# \u521d\u59cb\u5316\u6570\u636e\u5e93\u8fde\u63a5\nconn = MySQLConnector(\n host=\"localhost\",\n user=\"root\",\n password=\"your_password\",\n database=\"your_db\"\n)\nconn.connect()\n\n# \u83b7\u53d6\u6240\u6709\u8868\u7ed3\u6784\u5e76\u5bfc\u5165\nfor table in conn.get_all_tables():\n ddl = ... # \u53c2\u89c1 examples/database_usage.py\n nl.train(ddl=ddl)\nconn.disconnect()\n```\n\n\u66f4\u591a\u7528\u6cd5\u8bf7\u53c2\u8003 `examples/` \u76ee\u5f55\u3002\n\n## \u8054\u7cfb\u65b9\u5f0f\n\u4f5c\u8005\uff1a\u6bb7\u65ed \n\u90ae\u7bb1\uff1a2337302325@qq.com\n\n## \u8bb8\u53ef\u8bc1\nMIT License \n\ncd lingua_sql\ngit init\ngit remote add origin git@github.com:4869-yinxu/lingua_sql.git\n\ngit add .\ngit commit -m \"Release version 0.1.3\"\ngit tag v0.1.3\ngit push origin main\ngit push origin v0.1.3",
"bugtrack_url": null,
"license": "MIT",
"summary": "A text-to-SQL pipeline using DeepSeek and ChromaDB",
"version": "0.1.4",
"project_urls": {
"Bug Tracker": "https://github.com/yourusername/nlpipe/issues",
"Changelog": "https://github.com/yourusername/nlpipe/blob/main/CHANGELOG.md",
"Documentation": "https://nlpipe.readthedocs.io/",
"Homepage": "https://github.com/yourusername/nlpipe",
"Repository": "https://github.com/yourusername/nlpipe"
},
"split_keywords": [
"ai",
" chromadb",
" database",
" deepseek",
" nlp",
" text-to-sql"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "7a8b98b19c7dcd3d7faacca9338976ed933a83a99adb01e28089c92e45510ad6",
"md5": "c11af8002fd03204df9605dbc4f47d46",
"sha256": "8cc8c2181f84641479203b70176a136c6bc48363c88cc224fe877ac16acdeabc"
},
"downloads": -1,
"filename": "lingua_sql-0.1.4-py3-none-any.whl",
"has_sig": false,
"md5_digest": "c11af8002fd03204df9605dbc4f47d46",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.8",
"size": 17071,
"upload_time": "2025-07-15T08:21:31",
"upload_time_iso_8601": "2025-07-15T08:21:31.495172Z",
"url": "https://files.pythonhosted.org/packages/7a/8b/98b19c7dcd3d7faacca9338976ed933a83a99adb01e28089c92e45510ad6/lingua_sql-0.1.4-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "80d39cfdc19cca4b90c6c2031ac2779614f21dd927a5ad0d2da4541215c42276",
"md5": "ac83802867029290d865b43c7d823b67",
"sha256": "3bd31c2ba73bbfaa7eef3c0bb3e69bee61d9306ac2f1d27f5dc3ab35ea791331"
},
"downloads": -1,
"filename": "lingua_sql-0.1.4.tar.gz",
"has_sig": false,
"md5_digest": "ac83802867029290d865b43c7d823b67",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 14822316,
"upload_time": "2025-07-15T08:22:04",
"upload_time_iso_8601": "2025-07-15T08:22:04.085332Z",
"url": "https://files.pythonhosted.org/packages/80/d3/9cfdc19cca4b90c6c2031ac2779614f21dd927a5ad0d2da4541215c42276/lingua_sql-0.1.4.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-07-15 08:22:04",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "yourusername",
"github_project": "nlpipe",
"github_not_found": true,
"lcname": "lingua-sql"
}