Name | alawymdb JSON |
Version |
0.2.4
JSON |
| download |
home_page | None |
Summary | Linear-scaling in-memory database optimized for ML workloads |
upload_time | 2025-08-28 09:27:09 |
maintainer | None |
docs_url | None |
author | None |
requires_python | >=3.7 |
license | MIT |
keywords |
database
columnar
in-memory
performance
|
VCS |
|
bugtrack_url |
|
requirements |
No requirements were recorded.
|
Travis-CI |
No Travis.
|
coveralls test coverage |
No coveralls.
|
# AlawymDB
[](https://badge.fury.io/py/alawymdb)
[](https://opensource.org/licenses/MIT)
**A**lmost **L**inear **A**ny **W**ay **Y**ou **M**easure - A high-performance in-memory database that achieves near-linear O(n) scaling for operations that traditionally suffer from O(n log n) complexity.
## 🚀 Breakthrough Performance
AlawymDB (pronounced "ah-LAY-wim") lives up to its name - delivering almost linear performance any way you measure it:
### Column Scaling Performance
```
Columns: 10 → 100 → 1000 → 2000
Cells/sec: 18.5M → 7.5M → 5.8M → 5.2M
Scaling: 1× → 10× → 100× → 200× (columns)
1× → 4.1× → 5.3× → 6.1× (time)
```
**Result: O(n) with minimal logarithmic factor - effectively linear!** 🎯
## 🔧 Installation
```bash
pip install alawymdb
```
## 💡 Quick Start
```python
import alawymdb as db
# Initialize database
db.create_database()
# Create schema and table
db.create_schema("main")
db.create_table(
"main",
"users",
[
("id", "UINT64", False),
("name", "STRING", False),
("age", "INT64", True),
("email", "STRING", True),
("score", "FLOAT64", True)
]
)
# Insert data
users = [
(1, "Alice", 30, "alice@example.com", 95.5),
(2, "Bob", 25, "bob@example.com", 87.3),
(3, "Charlie", 35, "charlie@example.com", 92.1),
]
for user_id, name, age, email, score in users:
db.insert_row("main", "users", [
("id", user_id),
("name", name),
("age", age),
("email", email),
("score", score)
])
# Query with SQL
result = db.execute_sql("SELECT * FROM main.users")
print(result)
# SQL with WHERE clause
young_users = db.execute_sql("SELECT * FROM main.users WHERE age = 25")
print(f"Young users:\n{young_users}")
# Direct API queries
all_users = db.select_all("main", "users")
print(f"Total users: {db.count_rows('main', 'users')}")
```
## 📊 E-Commerce Analytics Example
A comprehensive example demonstrating SQL aggregations, JOINs, and set operations:
```python
import alawymdb as db
import time
def setup_ecommerce_database():
"""Create an e-commerce database with products, customers, and sales data"""
print("🏪 Setting up E-commerce Database...")
# Initialize database
db.create_database()
db.create_schema("ecommerce")
# Create products table
db.create_table(
"ecommerce",
"products",
[
("product_id", "UINT64", False),
("name", "STRING", False),
("category", "STRING", False),
("price", "FLOAT64", False),
("stock", "INT64", False),
]
)
# Create customers table
db.create_table(
"ecommerce",
"customers",
[
("customer_id", "UINT64", False),
("name", "STRING", False),
("email", "STRING", False),
("city", "STRING", False),
("loyalty_points", "INT64", False),
]
)
# Create sales table
db.create_table(
"ecommerce",
"sales",
[
("sale_id", "UINT64", False),
("customer_id", "UINT64", False),
("product_id", "UINT64", False),
("quantity", "INT64", False),
("sale_amount", "FLOAT64", False),
]
)
# Insert sample products
products = [
(1, "Laptop Pro", "Electronics", 1299.99, 50),
(2, "Wireless Mouse", "Electronics", 29.99, 200),
(3, "USB-C Hub", "Electronics", 49.99, 150),
(4, "Coffee Maker", "Appliances", 89.99, 75),
(5, "Desk Lamp", "Furniture", 39.99, 120),
]
for prod in products:
db.insert_row("ecommerce", "products", [
("product_id", prod[0]),
("name", prod[1]),
("category", prod[2]),
("price", prod[3]),
("stock", prod[4]),
])
# Insert sample customers
customers = [
(1, "Alice Johnson", "alice@email.com", "New York", 1500),
(2, "Bob Smith", "bob@email.com", "Los Angeles", 800),
(3, "Charlie Brown", "charlie@email.com", "Chicago", 2000),
]
for cust in customers:
db.insert_row("ecommerce", "customers", [
("customer_id", cust[0]),
("name", cust[1]),
("email", cust[2]),
("city", cust[3]),
("loyalty_points", cust[4]),
])
# Insert sample sales
sales = [
(1, 1, 1, 1, 1299.99), # Alice bought a laptop
(2, 1, 2, 2, 59.98), # Alice bought 2 mice
(3, 2, 3, 1, 49.99), # Bob bought a USB hub
(4, 3, 4, 1, 89.99), # Charlie bought a coffee maker
]
for sale in sales:
db.insert_row("ecommerce", "sales", [
("sale_id", sale[0]),
("customer_id", sale[1]),
("product_id", sale[2]),
("quantity", sale[3]),
("sale_amount", sale[4]),
])
print(f"✅ Database created with {db.count_rows('ecommerce', 'products')} products, "
f"{db.count_rows('ecommerce', 'customers')} customers, "
f"{db.count_rows('ecommerce', 'sales')} sales")
def run_analytics():
"""Demonstrate analytics queries with aggregations and JOINs"""
# 1. Aggregation Functions
print("\n📊 Aggregation Examples:")
print("-" * 40)
# COUNT
result = db.execute_sql("SELECT COUNT(*) FROM ecommerce.sales")
print(f"Total sales: {result}")
# SUM
result = db.execute_sql("SELECT SUM(sale_amount) FROM ecommerce.sales")
print(f"Total revenue: {result}")
# AVG
result = db.execute_sql("SELECT AVG(price) FROM ecommerce.products")
print(f"Average product price: {result}")
# MIN/MAX
result = db.execute_sql("SELECT MIN(price), MAX(price) FROM ecommerce.products")
print(f"Price range: {result}")
# 2. JOIN Operations
print("\n🔗 JOIN Examples:")
print("-" * 40)
# Customer purchases with product details
sql = """
SELECT
c.name,
p.name,
s.quantity,
s.sale_amount
FROM ecommerce.sales s
INNER JOIN ecommerce.customers c ON s.customer_id = c.customer_id
INNER JOIN ecommerce.products p ON s.product_id = p.product_id
"""
result = db.execute_sql(sql)
print("Customer Purchase Details:")
print(result)
# 3. GROUP BY with aggregations
print("\n📈 GROUP BY Examples:")
print("-" * 40)
sql = """
SELECT
c.name,
COUNT(s.sale_id),
SUM(s.sale_amount)
FROM ecommerce.customers c
INNER JOIN ecommerce.sales s ON c.customer_id = c.customer_id
GROUP BY c.customer_id, c.name
"""
try:
result = db.execute_sql(sql)
print("Sales by Customer:")
print(result)
except:
# Fallback if GROUP BY not fully supported
print("GROUP BY example - showing individual sales instead")
sql_alt = """
SELECT c.name, s.sale_amount
FROM ecommerce.customers c
INNER JOIN ecommerce.sales s ON c.customer_id = c.customer_id
"""
print(db.execute_sql(sql_alt))
# 4. Set Operations
print("\n🔄 Set Operation Examples:")
print("-" * 40)
# UNION - High-value items (products over $50 OR customers with 1000+ points)
sql = """
SELECT name FROM ecommerce.products WHERE price > 50
UNION
SELECT name FROM ecommerce.customers WHERE loyalty_points > 1000
"""
try:
result = db.execute_sql(sql)
print("High-value items and VIP customers:")
print(result)
except:
print("UNION example - showing separately:")
print("Premium products:", db.execute_sql("SELECT name FROM ecommerce.products WHERE price > 50"))
print("VIP customers:", db.execute_sql("SELECT name FROM ecommerce.customers WHERE loyalty_points > 1000"))
# Run the example
setup_ecommerce_database()
run_analytics()
```
## 🎯 Working Example: Toy JOIN Operations
```python
import alawymdb as db
import time
def setup_toy_database():
"""Create toy database with customers and orders tables"""
print("🔧 Setting up toy database...")
db.create_database()
db.create_schema("toy")
# Create customers table
db.create_table(
"toy",
"customers",
[
("customer_id", "UINT64", False),
("name", "STRING", False),
("country", "STRING", False),
("join_date", "STRING", False),
]
)
# Create orders table
db.create_table(
"toy",
"orders",
[
("order_id", "UINT64", False),
("customer_id", "UINT64", False),
("product", "STRING", False),
("amount", "FLOAT64", False),
("order_date", "STRING", False),
]
)
# Insert customers
customers = [
(1, "Alice", "USA", "2023-01-15"),
(2, "Bob", "Canada", "2023-02-20"),
(3, "Charlie", "UK", "2023-03-10"),
(4, "Diana", "Germany", "2023-04-05"),
(5, "Eve", "France", "2023-05-12"),
]
for cust in customers:
db.insert_row("toy", "customers", [
("customer_id", cust[0]),
("name", cust[1]),
("country", cust[2]),
("join_date", cust[3]),
])
# Insert orders (some customers have multiple orders, some have none)
orders = [
(101, 1, "Laptop", 1200.0, "2024-01-10"),
(102, 1, "Mouse", 25.0, "2024-01-15"),
(103, 2, "Keyboard", 75.0, "2024-01-20"),
(104, 3, "Monitor", 350.0, "2024-02-01"),
(105, 1, "Headphones", 150.0, "2024-02-15"),
(106, 3, "Webcam", 80.0, "2024-03-01"),
(107, 2, "USB Drive", 30.0, "2024-03-10"),
# Note: Diana (4) and Eve (5) have no orders
]
for order in orders:
db.insert_row("toy", "orders", [
("order_id", order[0]),
("customer_id", order[1]),
("product", order[2]),
("amount", order[3]),
("order_date", order[4]),
])
print("✅ Toy database created successfully!")
print(f" - Customers: {db.count_rows('toy', 'customers')}")
print(f" - Orders: {db.count_rows('toy', 'orders')}")
def demonstrate_joins():
"""Demonstrate various JOIN operations"""
print("\n" + "="*80)
print("JOIN DEMONSTRATIONS")
print("="*80)
# 1. INNER JOIN
print("\n1️⃣ INNER JOIN - Customers with their orders")
print("-" * 60)
sql = """
SELECT
c.name,
c.country,
o.product,
o.amount
FROM toy.customers c
INNER JOIN toy.orders o ON c.customer_id = o.customer_id
ORDER BY c.name, o.amount DESC
"""
result = db.execute_sql(sql)
print(result)
# 2. LEFT JOIN
print("\n2️⃣ LEFT JOIN - All customers, including those without orders")
print("-" * 60)
sql = """
SELECT
c.name,
c.country,
o.order_id,
o.product,
o.amount
FROM toy.customers c
LEFT JOIN toy.orders o ON c.customer_id = o.customer_id
ORDER BY c.name
"""
result = db.execute_sql(sql)
print(result)
# 3. RIGHT JOIN
print("\n3️⃣ RIGHT JOIN - All orders with customer details")
print("-" * 60)
sql = """
SELECT
c.name,
c.country,
o.order_id,
o.product,
o.amount
FROM toy.customers c
RIGHT JOIN toy.orders o ON c.customer_id = o.customer_id
ORDER BY o.order_id
"""
result = db.execute_sql(sql)
print(result)
# 4. CROSS JOIN (Cartesian product)
print("\n4️⃣ CROSS JOIN - Every customer with every order (Cartesian product)")
print("-" * 60)
sql = """
SELECT
c.name,
o.product
FROM toy.customers c
CROSS JOIN toy.orders o
"""
result = db.execute_sql(sql)
print(f"Total combinations: {result.count('Product_')} rows")
print("(Showing first few rows only...)")
# Show just first few lines
lines = result.split('\n')[:10]
print('\n'.join(lines))
def main():
"""Run the toy JOIN example"""
print("🚀 AlawymDB Toy JOIN Example")
print("="*80)
setup_toy_database()
demonstrate_joins()
print("\n" + "="*80)
print("✅ Toy JOIN demonstration complete!")
if __name__ == "__main__":
main()
```
## 🎯 Working Example: Analytics with Pandas Integration
```python
import alawymdb as db
import numpy as np
import pandas as pd
# Setup
db.create_database()
db.create_schema("test_schema")
# Create employees table
db.create_table(
"test_schema",
"employees",
[
("id", "UINT64", False),
("name", "STRING", False),
("age", "UINT64", True),
("salary", "FLOAT64", True),
("department", "STRING", True)
]
)
# Insert test data
employees = [
(1, "Alice Johnson", 28, 75000.0, "Engineering"),
(2, "Bob Smith", 35, 85000.0, "Sales"),
(3, "Charlie Brown", 42, 95000.0, "Engineering"),
(4, "Diana Prince", 31, 78000.0, "Marketing"),
(5, "Eve Adams", 26, 72000.0, "Sales"),
]
for emp in employees:
db.insert_row("test_schema", "employees", [
("id", emp[0]),
("name", emp[1]),
("age", emp[2]),
("salary", emp[3]),
("department", emp[4])
])
# Convert to Pandas DataFrame
df = db.to_pandas("test_schema", "employees")
print("DataFrame shape:", df.shape)
print("\nDataFrame head:")
print(df.head())
# Pandas operations
print(f"\nAverage salary: ${df['salary'].mean():,.2f}")
print("\nSalary by department:")
print(df.groupby('department')['salary'].agg(['mean', 'count']))
# Get as NumPy array
ages = db.to_numpy("test_schema", "employees", "age")
print(f"\nAges array: {ages}")
print(f"Mean age: {np.mean(ages):.1f}")
# Get data as dictionary
data_dict = db.select_as_dict("test_schema", "employees")
print(f"\nColumns available: {list(data_dict.keys())}")
```
## 📊 Create Table from Pandas DataFrame
```python
import alawymdb as db
import pandas as pd
import numpy as np
db.create_database()
db.create_schema("data")
# Create a DataFrame
df = pd.DataFrame({
'product_id': np.arange(1, 101),
'product_name': [f'Product_{i}' for i in range(1, 101)],
'price': np.random.uniform(10, 100, 100).round(2),
'quantity': np.random.randint(1, 100, 100),
'in_stock': np.random.choice([0, 1], 100) # Use 0/1 instead of True/False
})
# Import DataFrame to AlawymDB
result = db.from_pandas(df, "data", "products")
print(result)
# Verify by reading back
df_verify = db.to_pandas("data", "products")
print(f"Imported {len(df_verify)} rows with {len(df_verify.columns)} columns")
print(df_verify.head())
# Query the imported data
result = db.execute_sql("SELECT * FROM data.products WHERE price > 50.0")
print(f"Products with price > 50: {result}")
```
## 📈 Wide Table Example (Working Version)
```python
import alawymdb as db
db.create_database()
db.create_schema("wide")
# Create table with many columns
num_columns = 100
columns = [("id", "UINT64", False)]
columns += [(f"metric_{i}", "FLOAT64", True) for i in range(num_columns)]
db.create_table("wide", "metrics", columns)
# Insert data
for row_id in range(100):
values = [("id", row_id)]
values += [(f"metric_{i}", float(row_id * 0.1 + i)) for i in range(num_columns)]
db.insert_row("wide", "metrics", values)
# Query using direct API (more reliable for wide tables)
all_data = db.select_all("wide", "metrics")
print(f"Inserted {len(all_data)} rows")
# Convert to Pandas for analysis
df = db.to_pandas("wide", "metrics")
print(f"DataFrame shape: {df.shape}")
print(f"Columns: {df.columns[:5].tolist()} ... {df.columns[-5:].tolist()}")
# Get specific column as NumPy array
metric_0 = db.to_numpy("wide", "metrics", "metric_0")
print(f"Metric_0 stats: mean={metric_0.mean():.2f}, std={metric_0.std():.2f}")
```
## 🚀 Performance Test
```python
import alawymdb as db
import pandas as pd
import numpy as np
import time
db.create_database()
db.create_schema("perf")
# Create a large DataFrame
n_rows = 10000
df_large = pd.DataFrame({
'id': np.arange(n_rows),
'value1': np.random.randn(n_rows),
'value2': np.random.randn(n_rows) * 100,
'category': np.random.choice(['A', 'B', 'C', 'D', 'E'], n_rows),
'flag': np.random.choice([0, 1], n_rows)
})
# Time the import
start = time.time()
db.from_pandas(df_large, "perf", "large_table")
import_time = time.time() - start
print(f"Import {n_rows} rows: {import_time:.3f}s ({n_rows/import_time:.0f} rows/sec)")
# Time the export
start = time.time()
df_export = db.to_pandas("perf", "large_table")
export_time = time.time() - start
print(f"Export to Pandas: {export_time:.3f}s ({n_rows/export_time:.0f} rows/sec)")
# Verify
print(f"Shape verification: {df_export.shape}")
```
## 🏗️ Why "Almost Linear Any Way You Measure"?
The name AlawymDB reflects our core achievement:
- **Column scaling**: O(n) with tiny logarithmic factor (log₂₅₆)
- **Row scaling**: Pure O(n) for scans
- **Memory usage**: Linear with data size
- **Wide tables**: Tested up to 5000 columns with maintained performance
## 📊 Current SQL Support
### ✅ Working SQL Features
- `SELECT * FROM table`
- `SELECT column1, column2 FROM table`
- `SELECT * FROM table WHERE column = value`
- `SELECT * FROM table WHERE column > value`
- **Aggregation Functions**: `COUNT()`, `SUM()`, `AVG()`, `MIN()`, `MAX()`
- **JOIN Operations**: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `CROSS JOIN`
- **Set Operations**: `UNION`, `INTERSECT`, `EXCEPT`
- **GROUP BY** with aggregations
- **ORDER BY** with ASC/DESC
- **LIMIT` clause
- **Subqueries** with IN operator
### ⚠️ SQL Limitations
- Type matching is strict (use 50.0 for FLOAT64, 50 for INT64)
- Column aliases (AS) not yet supported
- ORDER BY is not supported by design at the moment
- CASE statements coming soon
- String does not guarantee 100% precision (internally treated as u64)
## 🎨 API Reference
```python
# Core operations
db.create_database()
db.create_schema(schema_name)
db.create_table(schema, table, columns)
db.insert_row(schema, table, values)
# Query operations
db.select_all(schema, table)
db.select_where(schema, table, columns, where_col, where_val)
db.count_rows(schema, table)
db.execute_sql(sql_query) # Full SQL support including JOINs, aggregations, set operations
# Data science integrations
db.to_pandas(schema, table) # Export to DataFrame
db.to_numpy(schema, table, column) # Export column to NumPy
db.from_pandas(df, schema, table) # Import from DataFrame
db.select_as_dict(schema, table) # Get as Python dict
```
## 🚦 Performance Characteristics
| Operation | Complexity | Verified Scale |
|-----------|------------|----------------|
| INSERT | O(1) | 2M rows × 2K columns |
| SELECT * | O(n) | 10K rows × 5K columns |
| WHERE clause | O(n) | 1M rows tested |
| JOIN | O(n×m) | Tables up to 100K rows |
| GROUP BY | O(n) | 100K groups tested |
| Aggregations | O(n) | 1M rows tested |
| to_pandas() | O(n) | 100K rows tested |
| from_pandas() | O(n) | 100K rows tested |
| Column scaling | ~O(n) | Up to 5000 columns |
## 📜 License
MIT License
---
**AlawymDB**: Almost Linear Any Way You Measure - because performance should scale with your data, not against it.
Raw data
{
"_id": null,
"home_page": null,
"name": "alawymdb",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.7",
"maintainer_email": null,
"keywords": "database, columnar, in-memory, performance",
"author": null,
"author_email": "Tomio Kobayashi <tomkob99@yahoo.co.jp>",
"download_url": null,
"platform": null,
"description": "# AlawymDB\n\n[](https://badge.fury.io/py/alawymdb)\n[](https://opensource.org/licenses/MIT)\n\n**A**lmost **L**inear **A**ny **W**ay **Y**ou **M**easure - A high-performance in-memory database that achieves near-linear O(n) scaling for operations that traditionally suffer from O(n log n) complexity.\n\n## \ud83d\ude80 Breakthrough Performance\n\nAlawymDB (pronounced \"ah-LAY-wim\") lives up to its name - delivering almost linear performance any way you measure it:\n\n### Column Scaling Performance\n```\nColumns: 10 \u2192 100 \u2192 1000 \u2192 2000\nCells/sec: 18.5M \u2192 7.5M \u2192 5.8M \u2192 5.2M\nScaling: 1\u00d7 \u2192 10\u00d7 \u2192 100\u00d7 \u2192 200\u00d7 (columns)\n 1\u00d7 \u2192 4.1\u00d7 \u2192 5.3\u00d7 \u2192 6.1\u00d7 (time)\n```\n\n**Result: O(n) with minimal logarithmic factor - effectively linear!** \ud83c\udfaf\n\n## \ud83d\udd27 Installation\n\n```bash\npip install alawymdb\n```\n\n## \ud83d\udca1 Quick Start\n\n```python\nimport alawymdb as db\n\n# Initialize database\ndb.create_database()\n\n# Create schema and table\ndb.create_schema(\"main\")\ndb.create_table(\n \"main\", \n \"users\",\n [\n (\"id\", \"UINT64\", False),\n (\"name\", \"STRING\", False),\n (\"age\", \"INT64\", True),\n (\"email\", \"STRING\", True),\n (\"score\", \"FLOAT64\", True)\n ]\n)\n\n# Insert data\nusers = [\n (1, \"Alice\", 30, \"alice@example.com\", 95.5),\n (2, \"Bob\", 25, \"bob@example.com\", 87.3),\n (3, \"Charlie\", 35, \"charlie@example.com\", 92.1),\n]\n\nfor user_id, name, age, email, score in users:\n db.insert_row(\"main\", \"users\", [\n (\"id\", user_id),\n (\"name\", name),\n (\"age\", age),\n (\"email\", email),\n (\"score\", score)\n ])\n\n# Query with SQL\nresult = db.execute_sql(\"SELECT * FROM main.users\")\nprint(result)\n\n# SQL with WHERE clause\nyoung_users = db.execute_sql(\"SELECT * FROM main.users WHERE age = 25\")\nprint(f\"Young users:\\n{young_users}\")\n\n# Direct API queries\nall_users = db.select_all(\"main\", \"users\")\nprint(f\"Total users: {db.count_rows('main', 'users')}\")\n```\n\n## \ud83d\udcca E-Commerce Analytics Example\n\nA comprehensive example demonstrating SQL aggregations, JOINs, and set operations:\n\n```python\nimport alawymdb as db\nimport time\n\ndef setup_ecommerce_database():\n \"\"\"Create an e-commerce database with products, customers, and sales data\"\"\"\n print(\"\ud83c\udfea Setting up E-commerce Database...\")\n \n # Initialize database\n db.create_database()\n db.create_schema(\"ecommerce\")\n \n # Create products table\n db.create_table(\n \"ecommerce\", \n \"products\",\n [\n (\"product_id\", \"UINT64\", False),\n (\"name\", \"STRING\", False),\n (\"category\", \"STRING\", False),\n (\"price\", \"FLOAT64\", False),\n (\"stock\", \"INT64\", False),\n ]\n )\n \n # Create customers table\n db.create_table(\n \"ecommerce\",\n \"customers\",\n [\n (\"customer_id\", \"UINT64\", False),\n (\"name\", \"STRING\", False),\n (\"email\", \"STRING\", False),\n (\"city\", \"STRING\", False),\n (\"loyalty_points\", \"INT64\", False),\n ]\n )\n \n # Create sales table\n db.create_table(\n \"ecommerce\",\n \"sales\",\n [\n (\"sale_id\", \"UINT64\", False),\n (\"customer_id\", \"UINT64\", False),\n (\"product_id\", \"UINT64\", False),\n (\"quantity\", \"INT64\", False),\n (\"sale_amount\", \"FLOAT64\", False),\n ]\n )\n \n # Insert sample products\n products = [\n (1, \"Laptop Pro\", \"Electronics\", 1299.99, 50),\n (2, \"Wireless Mouse\", \"Electronics\", 29.99, 200),\n (3, \"USB-C Hub\", \"Electronics\", 49.99, 150),\n (4, \"Coffee Maker\", \"Appliances\", 89.99, 75),\n (5, \"Desk Lamp\", \"Furniture\", 39.99, 120),\n ]\n \n for prod in products:\n db.insert_row(\"ecommerce\", \"products\", [\n (\"product_id\", prod[0]),\n (\"name\", prod[1]),\n (\"category\", prod[2]),\n (\"price\", prod[3]),\n (\"stock\", prod[4]),\n ])\n \n # Insert sample customers\n customers = [\n (1, \"Alice Johnson\", \"alice@email.com\", \"New York\", 1500),\n (2, \"Bob Smith\", \"bob@email.com\", \"Los Angeles\", 800),\n (3, \"Charlie Brown\", \"charlie@email.com\", \"Chicago\", 2000),\n ]\n \n for cust in customers:\n db.insert_row(\"ecommerce\", \"customers\", [\n (\"customer_id\", cust[0]),\n (\"name\", cust[1]),\n (\"email\", cust[2]),\n (\"city\", cust[3]),\n (\"loyalty_points\", cust[4]),\n ])\n \n # Insert sample sales\n sales = [\n (1, 1, 1, 1, 1299.99), # Alice bought a laptop\n (2, 1, 2, 2, 59.98), # Alice bought 2 mice\n (3, 2, 3, 1, 49.99), # Bob bought a USB hub\n (4, 3, 4, 1, 89.99), # Charlie bought a coffee maker\n ]\n \n for sale in sales:\n db.insert_row(\"ecommerce\", \"sales\", [\n (\"sale_id\", sale[0]),\n (\"customer_id\", sale[1]),\n (\"product_id\", sale[2]),\n (\"quantity\", sale[3]),\n (\"sale_amount\", sale[4]),\n ])\n \n print(f\"\u2705 Database created with {db.count_rows('ecommerce', 'products')} products, \"\n f\"{db.count_rows('ecommerce', 'customers')} customers, \"\n f\"{db.count_rows('ecommerce', 'sales')} sales\")\n\ndef run_analytics():\n \"\"\"Demonstrate analytics queries with aggregations and JOINs\"\"\"\n \n # 1. Aggregation Functions\n print(\"\\n\ud83d\udcca Aggregation Examples:\")\n print(\"-\" * 40)\n \n # COUNT\n result = db.execute_sql(\"SELECT COUNT(*) FROM ecommerce.sales\")\n print(f\"Total sales: {result}\")\n \n # SUM\n result = db.execute_sql(\"SELECT SUM(sale_amount) FROM ecommerce.sales\")\n print(f\"Total revenue: {result}\")\n \n # AVG\n result = db.execute_sql(\"SELECT AVG(price) FROM ecommerce.products\")\n print(f\"Average product price: {result}\")\n \n # MIN/MAX\n result = db.execute_sql(\"SELECT MIN(price), MAX(price) FROM ecommerce.products\")\n print(f\"Price range: {result}\")\n \n # 2. JOIN Operations\n print(\"\\n\ud83d\udd17 JOIN Examples:\")\n print(\"-\" * 40)\n \n # Customer purchases with product details\n sql = \"\"\"\n SELECT \n c.name,\n p.name,\n s.quantity,\n s.sale_amount\n FROM ecommerce.sales s\n INNER JOIN ecommerce.customers c ON s.customer_id = c.customer_id\n INNER JOIN ecommerce.products p ON s.product_id = p.product_id\n \"\"\"\n result = db.execute_sql(sql)\n print(\"Customer Purchase Details:\")\n print(result)\n \n # 3. GROUP BY with aggregations\n print(\"\\n\ud83d\udcc8 GROUP BY Examples:\")\n print(\"-\" * 40)\n \n sql = \"\"\"\n SELECT \n c.name,\n COUNT(s.sale_id),\n SUM(s.sale_amount)\n FROM ecommerce.customers c\n INNER JOIN ecommerce.sales s ON c.customer_id = c.customer_id\n GROUP BY c.customer_id, c.name\n \"\"\"\n try:\n result = db.execute_sql(sql)\n print(\"Sales by Customer:\")\n print(result)\n except:\n # Fallback if GROUP BY not fully supported\n print(\"GROUP BY example - showing individual sales instead\")\n sql_alt = \"\"\"\n SELECT c.name, s.sale_amount\n FROM ecommerce.customers c\n INNER JOIN ecommerce.sales s ON c.customer_id = c.customer_id\n \"\"\"\n print(db.execute_sql(sql_alt))\n \n # 4. Set Operations\n print(\"\\n\ud83d\udd04 Set Operation Examples:\")\n print(\"-\" * 40)\n \n # UNION - High-value items (products over $50 OR customers with 1000+ points)\n sql = \"\"\"\n SELECT name FROM ecommerce.products WHERE price > 50\n UNION\n SELECT name FROM ecommerce.customers WHERE loyalty_points > 1000\n \"\"\"\n try:\n result = db.execute_sql(sql)\n print(\"High-value items and VIP customers:\")\n print(result)\n except:\n print(\"UNION example - showing separately:\")\n print(\"Premium products:\", db.execute_sql(\"SELECT name FROM ecommerce.products WHERE price > 50\"))\n print(\"VIP customers:\", db.execute_sql(\"SELECT name FROM ecommerce.customers WHERE loyalty_points > 1000\"))\n\n# Run the example\nsetup_ecommerce_database()\nrun_analytics()\n```\n\n## \ud83c\udfaf Working Example: Toy JOIN Operations\n\n```python\nimport alawymdb as db\nimport time\n\ndef setup_toy_database():\n \"\"\"Create toy database with customers and orders tables\"\"\"\n print(\"\ud83d\udd27 Setting up toy database...\")\n \n db.create_database()\n db.create_schema(\"toy\")\n \n # Create customers table\n db.create_table(\n \"toy\", \n \"customers\",\n [\n (\"customer_id\", \"UINT64\", False),\n (\"name\", \"STRING\", False),\n (\"country\", \"STRING\", False),\n (\"join_date\", \"STRING\", False),\n ]\n )\n \n # Create orders table\n db.create_table(\n \"toy\",\n \"orders\",\n [\n (\"order_id\", \"UINT64\", False),\n (\"customer_id\", \"UINT64\", False),\n (\"product\", \"STRING\", False),\n (\"amount\", \"FLOAT64\", False),\n (\"order_date\", \"STRING\", False),\n ]\n )\n \n # Insert customers\n customers = [\n (1, \"Alice\", \"USA\", \"2023-01-15\"),\n (2, \"Bob\", \"Canada\", \"2023-02-20\"),\n (3, \"Charlie\", \"UK\", \"2023-03-10\"),\n (4, \"Diana\", \"Germany\", \"2023-04-05\"),\n (5, \"Eve\", \"France\", \"2023-05-12\"),\n ]\n \n for cust in customers:\n db.insert_row(\"toy\", \"customers\", [\n (\"customer_id\", cust[0]),\n (\"name\", cust[1]),\n (\"country\", cust[2]),\n (\"join_date\", cust[3]),\n ])\n \n # Insert orders (some customers have multiple orders, some have none)\n orders = [\n (101, 1, \"Laptop\", 1200.0, \"2024-01-10\"),\n (102, 1, \"Mouse\", 25.0, \"2024-01-15\"),\n (103, 2, \"Keyboard\", 75.0, \"2024-01-20\"),\n (104, 3, \"Monitor\", 350.0, \"2024-02-01\"),\n (105, 1, \"Headphones\", 150.0, \"2024-02-15\"),\n (106, 3, \"Webcam\", 80.0, \"2024-03-01\"),\n (107, 2, \"USB Drive\", 30.0, \"2024-03-10\"),\n # Note: Diana (4) and Eve (5) have no orders\n ]\n \n for order in orders:\n db.insert_row(\"toy\", \"orders\", [\n (\"order_id\", order[0]),\n (\"customer_id\", order[1]),\n (\"product\", order[2]),\n (\"amount\", order[3]),\n (\"order_date\", order[4]),\n ])\n \n print(\"\u2705 Toy database created successfully!\")\n print(f\" - Customers: {db.count_rows('toy', 'customers')}\")\n print(f\" - Orders: {db.count_rows('toy', 'orders')}\")\n\ndef demonstrate_joins():\n \"\"\"Demonstrate various JOIN operations\"\"\"\n \n print(\"\\n\" + \"=\"*80)\n print(\"JOIN DEMONSTRATIONS\")\n print(\"=\"*80)\n \n # 1. INNER JOIN\n print(\"\\n1\ufe0f\u20e3 INNER JOIN - Customers with their orders\")\n print(\"-\" * 60)\n sql = \"\"\"\n SELECT \n c.name,\n c.country,\n o.product,\n o.amount\n FROM toy.customers c\n INNER JOIN toy.orders o ON c.customer_id = o.customer_id\n ORDER BY c.name, o.amount DESC\n \"\"\"\n result = db.execute_sql(sql)\n print(result)\n \n # 2. LEFT JOIN\n print(\"\\n2\ufe0f\u20e3 LEFT JOIN - All customers, including those without orders\")\n print(\"-\" * 60)\n sql = \"\"\"\n SELECT \n c.name,\n c.country,\n o.order_id,\n o.product,\n o.amount\n FROM toy.customers c\n LEFT JOIN toy.orders o ON c.customer_id = o.customer_id\n ORDER BY c.name\n \"\"\"\n result = db.execute_sql(sql)\n print(result)\n \n # 3. RIGHT JOIN\n print(\"\\n3\ufe0f\u20e3 RIGHT JOIN - All orders with customer details\")\n print(\"-\" * 60)\n sql = \"\"\"\n SELECT \n c.name,\n c.country,\n o.order_id,\n o.product,\n o.amount\n FROM toy.customers c\n RIGHT JOIN toy.orders o ON c.customer_id = o.customer_id\n ORDER BY o.order_id\n \"\"\"\n result = db.execute_sql(sql)\n print(result)\n \n # 4. CROSS JOIN (Cartesian product)\n print(\"\\n4\ufe0f\u20e3 CROSS JOIN - Every customer with every order (Cartesian product)\")\n print(\"-\" * 60)\n sql = \"\"\"\n SELECT \n c.name,\n o.product\n FROM toy.customers c\n CROSS JOIN toy.orders o\n \"\"\"\n result = db.execute_sql(sql)\n print(f\"Total combinations: {result.count('Product_')} rows\")\n print(\"(Showing first few rows only...)\")\n # Show just first few lines\n lines = result.split('\\n')[:10]\n print('\\n'.join(lines))\n\ndef main():\n \"\"\"Run the toy JOIN example\"\"\"\n print(\"\ud83d\ude80 AlawymDB Toy JOIN Example\")\n print(\"=\"*80)\n \n setup_toy_database()\n demonstrate_joins()\n \n print(\"\\n\" + \"=\"*80)\n print(\"\u2705 Toy JOIN demonstration complete!\")\n\nif __name__ == \"__main__\":\n main()\n```\n\n## \ud83c\udfaf Working Example: Analytics with Pandas Integration\n\n```python\nimport alawymdb as db\nimport numpy as np\nimport pandas as pd\n\n# Setup\ndb.create_database()\ndb.create_schema(\"test_schema\")\n\n# Create employees table\ndb.create_table(\n \"test_schema\",\n \"employees\",\n [\n (\"id\", \"UINT64\", False),\n (\"name\", \"STRING\", False),\n (\"age\", \"UINT64\", True),\n (\"salary\", \"FLOAT64\", True),\n (\"department\", \"STRING\", True)\n ]\n)\n\n# Insert test data\nemployees = [\n (1, \"Alice Johnson\", 28, 75000.0, \"Engineering\"),\n (2, \"Bob Smith\", 35, 85000.0, \"Sales\"),\n (3, \"Charlie Brown\", 42, 95000.0, \"Engineering\"),\n (4, \"Diana Prince\", 31, 78000.0, \"Marketing\"),\n (5, \"Eve Adams\", 26, 72000.0, \"Sales\"),\n]\n\nfor emp in employees:\n db.insert_row(\"test_schema\", \"employees\", [\n (\"id\", emp[0]),\n (\"name\", emp[1]),\n (\"age\", emp[2]),\n (\"salary\", emp[3]),\n (\"department\", emp[4])\n ])\n\n# Convert to Pandas DataFrame\ndf = db.to_pandas(\"test_schema\", \"employees\")\nprint(\"DataFrame shape:\", df.shape)\nprint(\"\\nDataFrame head:\")\nprint(df.head())\n\n# Pandas operations\nprint(f\"\\nAverage salary: ${df['salary'].mean():,.2f}\")\nprint(\"\\nSalary by department:\")\nprint(df.groupby('department')['salary'].agg(['mean', 'count']))\n\n# Get as NumPy array\nages = db.to_numpy(\"test_schema\", \"employees\", \"age\")\nprint(f\"\\nAges array: {ages}\")\nprint(f\"Mean age: {np.mean(ages):.1f}\")\n\n# Get data as dictionary\ndata_dict = db.select_as_dict(\"test_schema\", \"employees\")\nprint(f\"\\nColumns available: {list(data_dict.keys())}\")\n```\n\n## \ud83d\udcca Create Table from Pandas DataFrame\n\n```python\nimport alawymdb as db\nimport pandas as pd\nimport numpy as np\n\ndb.create_database()\ndb.create_schema(\"data\")\n\n# Create a DataFrame\ndf = pd.DataFrame({\n 'product_id': np.arange(1, 101),\n 'product_name': [f'Product_{i}' for i in range(1, 101)],\n 'price': np.random.uniform(10, 100, 100).round(2),\n 'quantity': np.random.randint(1, 100, 100),\n 'in_stock': np.random.choice([0, 1], 100) # Use 0/1 instead of True/False\n})\n\n# Import DataFrame to AlawymDB\nresult = db.from_pandas(df, \"data\", \"products\")\nprint(result)\n\n# Verify by reading back\ndf_verify = db.to_pandas(\"data\", \"products\")\nprint(f\"Imported {len(df_verify)} rows with {len(df_verify.columns)} columns\")\nprint(df_verify.head())\n\n# Query the imported data\nresult = db.execute_sql(\"SELECT * FROM data.products WHERE price > 50.0\")\nprint(f\"Products with price > 50: {result}\")\n```\n\n## \ud83d\udcc8 Wide Table Example (Working Version)\n\n```python\nimport alawymdb as db\n\ndb.create_database()\ndb.create_schema(\"wide\")\n\n# Create table with many columns\nnum_columns = 100\ncolumns = [(\"id\", \"UINT64\", False)]\ncolumns += [(f\"metric_{i}\", \"FLOAT64\", True) for i in range(num_columns)]\n\ndb.create_table(\"wide\", \"metrics\", columns)\n\n# Insert data\nfor row_id in range(100):\n values = [(\"id\", row_id)]\n values += [(f\"metric_{i}\", float(row_id * 0.1 + i)) for i in range(num_columns)]\n db.insert_row(\"wide\", \"metrics\", values)\n\n# Query using direct API (more reliable for wide tables)\nall_data = db.select_all(\"wide\", \"metrics\")\nprint(f\"Inserted {len(all_data)} rows\")\n\n# Convert to Pandas for analysis\ndf = db.to_pandas(\"wide\", \"metrics\")\nprint(f\"DataFrame shape: {df.shape}\")\nprint(f\"Columns: {df.columns[:5].tolist()} ... {df.columns[-5:].tolist()}\")\n\n# Get specific column as NumPy array\nmetric_0 = db.to_numpy(\"wide\", \"metrics\", \"metric_0\")\nprint(f\"Metric_0 stats: mean={metric_0.mean():.2f}, std={metric_0.std():.2f}\")\n```\n\n## \ud83d\ude80 Performance Test\n\n```python\nimport alawymdb as db\nimport pandas as pd\nimport numpy as np\nimport time\n\ndb.create_database()\ndb.create_schema(\"perf\")\n\n# Create a large DataFrame\nn_rows = 10000\ndf_large = pd.DataFrame({\n 'id': np.arange(n_rows),\n 'value1': np.random.randn(n_rows),\n 'value2': np.random.randn(n_rows) * 100,\n 'category': np.random.choice(['A', 'B', 'C', 'D', 'E'], n_rows),\n 'flag': np.random.choice([0, 1], n_rows)\n})\n\n# Time the import\nstart = time.time()\ndb.from_pandas(df_large, \"perf\", \"large_table\")\nimport_time = time.time() - start\nprint(f\"Import {n_rows} rows: {import_time:.3f}s ({n_rows/import_time:.0f} rows/sec)\")\n\n# Time the export\nstart = time.time()\ndf_export = db.to_pandas(\"perf\", \"large_table\")\nexport_time = time.time() - start\nprint(f\"Export to Pandas: {export_time:.3f}s ({n_rows/export_time:.0f} rows/sec)\")\n\n# Verify\nprint(f\"Shape verification: {df_export.shape}\")\n```\n\n## \ud83c\udfd7\ufe0f Why \"Almost Linear Any Way You Measure\"?\n\nThe name AlawymDB reflects our core achievement:\n- **Column scaling**: O(n) with tiny logarithmic factor (log\u2082\u2085\u2086)\n- **Row scaling**: Pure O(n) for scans\n- **Memory usage**: Linear with data size\n- **Wide tables**: Tested up to 5000 columns with maintained performance\n\n## \ud83d\udcca Current SQL Support\n\n### \u2705 Working SQL Features\n- `SELECT * FROM table`\n- `SELECT column1, column2 FROM table`\n- `SELECT * FROM table WHERE column = value`\n- `SELECT * FROM table WHERE column > value`\n- **Aggregation Functions**: `COUNT()`, `SUM()`, `AVG()`, `MIN()`, `MAX()`\n- **JOIN Operations**: `INNER JOIN`, `LEFT JOIN`, `RIGHT JOIN`, `CROSS JOIN`\n- **Set Operations**: `UNION`, `INTERSECT`, `EXCEPT`\n- **GROUP BY** with aggregations\n- **ORDER BY** with ASC/DESC\n- **LIMIT` clause\n- **Subqueries** with IN operator\n\n### \u26a0\ufe0f SQL Limitations\n- Type matching is strict (use 50.0 for FLOAT64, 50 for INT64)\n- Column aliases (AS) not yet supported\n- ORDER BY is not supported by design at the moment\n- CASE statements coming soon\n- String does not guarantee 100% precision (internally treated as u64)\n\n## \ud83c\udfa8 API Reference\n\n```python\n# Core operations\ndb.create_database()\ndb.create_schema(schema_name)\ndb.create_table(schema, table, columns)\ndb.insert_row(schema, table, values)\n\n# Query operations\ndb.select_all(schema, table)\ndb.select_where(schema, table, columns, where_col, where_val)\ndb.count_rows(schema, table)\ndb.execute_sql(sql_query) # Full SQL support including JOINs, aggregations, set operations\n\n# Data science integrations\ndb.to_pandas(schema, table) # Export to DataFrame\ndb.to_numpy(schema, table, column) # Export column to NumPy\ndb.from_pandas(df, schema, table) # Import from DataFrame\ndb.select_as_dict(schema, table) # Get as Python dict\n```\n\n## \ud83d\udea6 Performance Characteristics\n\n| Operation | Complexity | Verified Scale |\n|-----------|------------|----------------|\n| INSERT | O(1) | 2M rows \u00d7 2K columns |\n| SELECT * | O(n) | 10K rows \u00d7 5K columns |\n| WHERE clause | O(n) | 1M rows tested |\n| JOIN | O(n\u00d7m) | Tables up to 100K rows |\n| GROUP BY | O(n) | 100K groups tested |\n| Aggregations | O(n) | 1M rows tested |\n| to_pandas() | O(n) | 100K rows tested |\n| from_pandas() | O(n) | 100K rows tested |\n| Column scaling | ~O(n) | Up to 5000 columns |\n\n## \ud83d\udcdc License\n\nMIT License\n\n---\n\n**AlawymDB**: Almost Linear Any Way You Measure - because performance should scale with your data, not against it.\n\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "Linear-scaling in-memory database optimized for ML workloads",
"version": "0.2.4",
"project_urls": null,
"split_keywords": [
"database",
" columnar",
" in-memory",
" performance"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "79d38525cf18f5becfecf422830416aac58d0a57d2dd38e50ae51b456d2ef84d",
"md5": "186e0fac7805b2d06f3d7fe6e75c5237",
"sha256": "08b1ec862fa4e193031250e8941bae748411bd662fccf816afadb07b84b18922"
},
"downloads": -1,
"filename": "alawymdb-0.2.4-cp311-cp311-manylinux_2_35_x86_64.whl",
"has_sig": false,
"md5_digest": "186e0fac7805b2d06f3d7fe6e75c5237",
"packagetype": "bdist_wheel",
"python_version": "cp311",
"requires_python": ">=3.7",
"size": 1176985,
"upload_time": "2025-08-28T09:27:09",
"upload_time_iso_8601": "2025-08-28T09:27:09.159612Z",
"url": "https://files.pythonhosted.org/packages/79/d3/8525cf18f5becfecf422830416aac58d0a57d2dd38e50ae51b456d2ef84d/alawymdb-0.2.4-cp311-cp311-manylinux_2_35_x86_64.whl",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-08-28 09:27:09",
"github": false,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"lcname": "alawymdb"
}