# Bedrock Region Balancer
AWS Bedrock region load balancer with round-robin distribution across multiple regions.
## Features
- **Dual API Support**: Both traditional invoke_model and modern Converse API
- **NEW: Batch Processing**: Process multiple messages simultaneously with round-robin distribution
- **Round-robin load balancing** across AWS regions (default: us-west-2, eu-central-1, ap-northeast-2)
- **Async execution** for optimal performance
- **Automatic model availability checking** across regions
- **AWS Secrets Manager integration** for secure credential management
- **Intelligent caching** of model availability data
- **Error handling and automatic failover**
- **Support for Claude 3.7 Sonnet and Opus 4.x models**
- **Short model name support** (e.g., `claude-4.0-sonnet` → full model ID)
- **Multimodal content support** via Converse API (text, images, documents, video)
- **Tool use and function calling** with native Converse API integration
- **Guardrail support** for content filtering and safety
- **Environment variable configuration** for custom regions
## Installation
```bash
pip install bedrock-region-balancer
```
## Quick Start
### Authentication Methods
Bedrock Region Balancer supports 3 authentication methods:
1. **AWS Session Credentials** (access_key_id, secret_access_key, session_token)
2. **AWS Access Keys** (access_key_id, secret_access_key)
3. **Bedrock API Key** (aws_bearer_token_bedrock)
### Method 1: Using Bedrock API Key
```python
import asyncio
import json
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Method 1a: Direct parameter
async with BedrockRegionBalancer(
credentials={'bedrock_api_key': 'your-bedrock-api-key'},
default_model="claude-4.0-sonnet"
) as balancer:
model_id = balancer.get_default_model()
body = {
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100
}
# Using invoke_model API (Single Request)
response = await balancer.invoke_model(model_id, body)
print(f"invoke_model response from {response['region']}: {response['response']['content'][0]['text']}")
# Using invoke_model API (Batch Processing) - NEW!
batch_bodies = [
{
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": "What is 2+2?"}],
"max_tokens": 50
},
{
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"max_tokens": 50
}
]
batch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)
for i, response in enumerate(batch_responses, 1):
print(f"Batch {i} from {response['region']}: {response['response']['content'][0]['text']}")
# Using converse API (modern approach - Single Request)
from bedrock_region_balancer import ConverseAPIHelper, MessageRole
messages = [ConverseAPIHelper.create_message(MessageRole.USER, "Hello!")]
converse_response = await balancer.converse_model(
model_id=model_id,
messages=messages,
inference_config={"maxTokens": 100}
)
parsed = ConverseAPIHelper.parse_converse_response(converse_response['response'])
print(f"converse response from {converse_response['region']}: {parsed['content'][0]['text']}")
# Using converse API (Batch Processing) - NEW!
batch_message_lists = [
[ConverseAPIHelper.create_message(MessageRole.USER, "What is 2+2?")],
[ConverseAPIHelper.create_message(MessageRole.USER, "What is the capital of France?")]
]
batch_converse_responses = await balancer.converse_model_batch(
model_id=model_id,
message_lists=batch_message_lists,
inference_config={"maxTokens": 50}
)
for i, response in enumerate(batch_converse_responses, 1):
parsed = ConverseAPIHelper.parse_converse_response(response['response'])
print(f"Converse Batch {i} from {response['region']}: {parsed['content'][0]['text']}")
# Method 1b: Environment variable (preferred)
# Set: export AWS_BEARER_TOKEN_BEDROCK="your-api-key"
async with BedrockRegionBalancer() as balancer: # Auto-detects from environment
# Use balancer as above
pass
# Method 1c: Using .env file (most convenient)
# Create .env file with: AWS_BEARER_TOKEN_BEDROCK=your-api-key
async with BedrockRegionBalancer(use_dotenv=True) as balancer: # Auto-loads .env
# Use balancer as above
pass
# Method 1d: Custom .env file path
async with BedrockRegionBalancer(
dotenv_path='/path/to/your/.env',
use_dotenv=True
) as balancer:
# Use balancer as above
pass
asyncio.run(main())
```
### Method 2: Using AWS Session Credentials
```python
import asyncio
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Method 2a: Direct credentials
credentials = {
'aws_access_key_id': 'your-access-key-id',
'aws_secret_access_key': 'your-secret-access-key',
'aws_session_token': 'your-session-token'
}
async with BedrockRegionBalancer(
credentials=credentials,
default_model="claude-4.0-sonnet"
) as balancer:
model_id = balancer.get_default_model()
# Use balancer as shown above
# Method 2b: Environment variables
# Set: export AWS_ACCESS_KEY_ID="..." AWS_SECRET_ACCESS_KEY="..." AWS_SESSION_TOKEN="..."
async with BedrockRegionBalancer() as balancer: # Auto-detects from environment
# Use balancer as above
pass
asyncio.run(main())
```
### Method 3: Using AWS Access Keys
```python
import asyncio
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Method 3a: Direct credentials
credentials = {
'aws_access_key_id': 'your-access-key-id',
'aws_secret_access_key': 'your-secret-access-key'
}
async with BedrockRegionBalancer(
credentials=credentials,
default_model="claude-4.0-sonnet"
) as balancer:
model_id = balancer.get_default_model()
# Use balancer as shown above
asyncio.run(main())
```
### Method 4: Using AWS Secrets Manager
AWS Secrets Manager now supports multiple credential formats:
#### 4a. Bedrock API Key in Secrets Manager
```json
{
"bedrock_api_key": "your-bedrock-api-key"
}
```
or
```json
{
"aws_bearer_token_bedrock": "your-bedrock-api-key"
}
```
#### 4b. AWS Access Keys in Secrets Manager
```json
{
"access_key_id": "AKIA...",
"secret_access_key": "your-secret-key"
}
```
or
```json
{
"aws_access_key_id": "AKIA...",
"aws_secret_access_key": "your-secret-key"
}
```
#### 4c. AWS Session Credentials in Secrets Manager
```json
{
"access_key_id": "ASIA...",
"secret_access_key": "your-secret-key",
"session_token": "your-session-token"
}
```
```python
import asyncio
import json
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Initialize balancer with credentials from Secrets Manager
# Supports all credential formats above
async with BedrockRegionBalancer(
secret_name="bedrock-credentials", # Your secret name
secret_region="us-west-2",
default_model="claude-4.0-sonnet"
) as balancer:
model_id = balancer.get_default_model()
body = {
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"max_tokens": 100,
"temperature": 0.7
}
response = await balancer.invoke_model(model_id, json.dumps(body))
print(f"Response from region: {response['region']}")
response_body = json.loads(response['response'])
print(f"Model response: {response_body['content'][0]['text']}")
asyncio.run(main())
```
### Method 5: Using Default AWS Credentials
```python
import asyncio
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Initialize with default AWS credential chain
# (IAM role, instance profile, etc.)
async with BedrockRegionBalancer(
default_model="claude-4.0-sonnet" # Optional: set default model
) as balancer:
model_id = balancer.get_default_model()
# Use balancer as shown above
asyncio.run(main())
```
## .env File Configuration
For convenience and security, you can use .env files to store your credentials:
### Step 1: Install python-dotenv (included as dependency)
```bash
pip install bedrock-region-balancer # python-dotenv is included
```
### Step 2: Create .env file
Copy the provided `.env.example` to `.env` and fill in your credentials:
```bash
cp .env.example .env
```
Example `.env` file content:
```bash
# Choose ONE authentication method
# Option 1: Bedrock API Key (Recommended)
AWS_BEARER_TOKEN_BEDROCK=your-bedrock-api-key
# Option 2: AWS Session Credentials
# AWS_ACCESS_KEY_ID=ASIA...
# AWS_SECRET_ACCESS_KEY=your-secret-key
# AWS_SESSION_TOKEN=your-session-token
# Option 3: AWS Access Keys
# AWS_ACCESS_KEY_ID=AKIA...
# AWS_SECRET_ACCESS_KEY=your-secret-key
# Optional: Configuration
BEDROCK_REGIONS=us-west-2,eu-central-1,ap-northeast-2
DEFAULT_MODEL=claude-4.0-sonnet
```
### Step 3: Use in your code
```python
import asyncio
from bedrock_region_balancer import BedrockRegionBalancer
async def main():
# Automatically loads .env file from current directory
async with BedrockRegionBalancer() as balancer:
model_id = balancer.get_default_model()
body = {
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100
}
response = await balancer.invoke_model(model_id, body)
print(f"Response from {response['region']}")
asyncio.run(main())
```
### Custom .env file location
```python
async with BedrockRegionBalancer(
dotenv_path="/path/to/your/.env",
use_dotenv=True
) as balancer:
# Your code here
pass
```
### Security Best Practices for .env files
- **Never commit .env files** to version control
- Add `.env` to your `.gitignore` file
- Use different .env files for development, staging, and production
- Set appropriate file permissions: `chmod 600 .env`
## Advanced Usage
### Custom Regions
```python
# Method 1: Use custom regions via parameter
balancer = BedrockRegionBalancer(
regions=['us-east-1', 'us-west-2', 'eu-west-1'],
default_model="claude-4.0-sonnet" # Optional: set custom default model
)
# Method 2: Use environment variable
import os
os.environ['BEDROCK_REGIONS'] = 'us-west-2,eu-central-1,ap-northeast-2'
balancer = BedrockRegionBalancer(
default_model="claude-4.0-sonnet" # Optional: set default model
) # Will use regions from environment
```
### Custom Endpoints
The balancer now supports custom endpoint configuration for each region. By default, it uses official AWS Bedrock endpoints, but you can specify custom endpoints for testing or specific requirements:
```python
from bedrock_region_balancer import BedrockRegionBalancer
# Method 1: Use default endpoints (automatic)
# Default endpoints are automatically used for us-west-2, eu-central-1, ap-northeast-2
balancer = BedrockRegionBalancer() # Uses official AWS endpoints
# Method 2: Use custom endpoints (must match number of regions)
custom_regions = ['us-west-2', 'eu-central-1', 'ap-northeast-2']
custom_endpoints = [
'https://custom-bedrock.us-west-2.example.com',
'https://custom-bedrock.eu-central-1.example.com',
'https://custom-bedrock.ap-northeast-2.example.com'
]
balancer = BedrockRegionBalancer(
regions=custom_regions,
endpoints=custom_endpoints,
default_model="claude-4.0-sonnet"
)
# Method 3: Mixed configuration with custom regions and endpoints
mixed_regions = ['us-east-1', 'eu-west-1']
mixed_endpoints = [
'https://bedrock.us-east-1.amazonaws.com',
'https://bedrock.eu-west-1.amazonaws.com'
]
balancer = BedrockRegionBalancer(
regions=mixed_regions,
endpoints=mixed_endpoints
)
```
#### Default Endpoints
The following official AWS Bedrock endpoints are used by default:
| Region | Bedrock Control Plane | Bedrock Runtime |
|--------|----------------------|-----------------|
| **us-west-2** | `https://bedrock.us-west-2.amazonaws.com` | `https://bedrock-runtime.us-west-2.amazonaws.com` |
| **eu-central-1** | `https://bedrock.eu-central-1.amazonaws.com` | `https://bedrock-runtime.eu-central-1.amazonaws.com` |
| **ap-northeast-2** | `https://bedrock.ap-northeast-2.amazonaws.com` | `https://bedrock-runtime.ap-northeast-2.amazonaws.com` |
#### Important Notes
- **Endpoint Count**: Number of custom endpoints must exactly match the number of regions
- **Service Separation**: The balancer automatically handles separate endpoints for Bedrock control plane and runtime services
- **HTTPS Required**: All endpoints must use HTTPS protocol
- **Validation**: Connection is tested during initialization to ensure endpoint accessibility
### Invoke Model in All Regions
```python
# Get responses from all available regions simultaneously
responses = await balancer.invoke_model_all_regions(model_id, body)
for response in responses:
if 'error' in response:
print(f"Error in region {response['region']}: {response['error']}")
else:
print(f"Success in region {response['region']}")
```
### Check Model Availability
```python
# Get model availability report
report = balancer.get_model_availability_report()
print(f"Available regions: {report['available_regions']}")
print(f"Models by region: {report['models_by_region']}")
print(f"Default model: {report['default_model']}")
# Get just the default model
default_model = balancer.get_default_model()
print(f"Using default model: {default_model}")
```
### Disable Availability Checking
```python
# Skip availability check for faster execution
# (useful when you know the model is available)
response = await balancer.invoke_model(
model_id,
body,
check_availability=False
)
```
## Batch Processing vs Sequential Processing (NEW)
Bedrock Region Balancer now supports batch processing for both APIs, allowing you to send multiple messages simultaneously with round-robin distribution across regions for improved performance and load balancing.
### Processing Methods Comparison
| Aspect | Sequential Processing | Batch Processing |
|--------|----------------------|------------------|
| **Execution Model** | One request at a time | Multiple requests in parallel |
| **Performance** | Slower (cumulative latency) | **2-10x faster** (parallel execution) |
| **Load Distribution** | Single region per request | **Round-robin across regions** |
| **Resource Usage** | Low memory, high latency | Higher memory, low latency |
| **Error Handling** | Fail-fast, simple | Partial failures, more complex |
| **Use Case** | Simple, low-volume requests | **High-throughput, performance-critical** |
### Batch Processing Benefits
- **Performance**: **2-10x faster** execution time through parallel processing
- **Load Distribution**: Automatic round-robin distribution across available regions
- **Resource Efficiency**: Better utilization of concurrent connections and network bandwidth
- **Simplified Code**: Handle multiple requests with a single API call
- **Scalability**: Better handling of high-volume scenarios
### Basic Batch Usage
```python
from bedrock_region_balancer import BedrockRegionBalancer
async def batch_example():
async with BedrockRegionBalancer() as balancer:
model_id = "claude-4.0-sonnet"
# Prepare multiple request bodies
batch_bodies = [
{
"messages": [{"role": "user", "content": "What is 2+2?"}],
"max_tokens": 50,
"anthropic_version": "bedrock-2023-05-31"
},
{
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"max_tokens": 50,
"anthropic_version": "bedrock-2023-05-31"
}
]
# Process all requests in parallel with round-robin distribution
batch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)
for i, response in enumerate(batch_responses, 1):
print(f"Response {i} from {response['region']}: {response['response']['content'][0]['text']}")
# Converse API batch processing
async def converse_batch_example():
async with BedrockRegionBalancer() as balancer:
from bedrock_region_balancer import ConverseAPIHelper, MessageRole
# Prepare multiple message lists
batch_message_lists = [
[ConverseAPIHelper.create_message(MessageRole.USER, "What is 2+2?")],
[ConverseAPIHelper.create_message(MessageRole.USER, "What is the capital of France?")]
]
# Process all conversations in parallel
batch_responses = await balancer.converse_model_batch(
model_id="claude-4.0-sonnet",
message_lists=batch_message_lists,
inference_config={"maxTokens": 50}
)
for i, response in enumerate(batch_responses, 1):
parsed = ConverseAPIHelper.parse_converse_response(response['response'])
print(f"Conversation {i} from {response['region']}: {parsed['content'][0]['text']}")
```
### Sequential vs Batch Performance Comparison Example
```python
import time
from bedrock_region_balancer import BedrockRegionBalancer
async def performance_comparison():
async with BedrockRegionBalancer() as balancer:
model_id = "claude-4.0-sonnet"
# Prepare 5 requests
requests = [
{
"messages": [{"role": "user", "content": f"What is {i+1} + {i+1}?"}],
"max_tokens": 30,
"anthropic_version": "bedrock-2023-05-31"
}
for i in range(5)
]
# Sequential processing (traditional approach)
print("Sequential Processing:")
start_time = time.time()
sequential_responses = []
for i, body in enumerate(requests):
response = await balancer.invoke_model(model_id, body)
sequential_responses.append(response)
print(f" Request {i+1}: {response['region']}")
sequential_time = time.time() - start_time
# Batch processing (new approach)
print("\nBatch Processing:")
start_time = time.time()
batch_responses = await balancer.invoke_model_batch(model_id, requests)
batch_time = time.time() - start_time
for i, response in enumerate(batch_responses, 1):
print(f" Request {i}: {response['region']}")
# Performance comparison
speedup = sequential_time / batch_time if batch_time > 0 else 0
print(f"\nPerformance Results:")
print(f" Sequential: {sequential_time:.2f}s")
print(f" Batch: {batch_time:.2f}s")
print(f" Speedup: {speedup:.1f}x faster with batch processing")
# Run comparison
asyncio.run(performance_comparison())
```
### When to Use Each Method
**Use Sequential Processing when:**
- Processing single requests or very few requests
- Memory constraints are critical
- Simple error handling is preferred
- Interactive applications requiring immediate feedback
**Use Batch Processing when:**
- Processing multiple similar requests
- Performance and throughput are critical
- You want automatic load distribution across regions
- Handling high-volume scenarios
## Converse API Support
Bedrock Region Balancer supports both the traditional **invoke_model** API and the new **Converse API**. The Converse API provides a unified interface across different foundation models with enhanced features like multimodal content, tool use, and guardrail integration.
### Basic Converse API Usage
```python
from bedrock_region_balancer import BedrockRegionBalancer, ConverseAPIHelper, MessageRole
async def converse_example():
async with BedrockRegionBalancer() as balancer:
# Create messages using ConverseAPIHelper
messages = [
ConverseAPIHelper.create_message(
MessageRole.USER,
"Hello! Explain the benefits of the Converse API."
)
]
# Create inference configuration
inference_config = ConverseAPIHelper.create_inference_config(
max_tokens=200,
temperature=0.7,
top_p=0.9
)
# Use Converse API
response = await balancer.converse_model(
model_id="claude-4.0-sonnet",
messages=messages,
inference_config=inference_config
)
# Parse response
parsed = ConverseAPIHelper.parse_converse_response(response['response'])
print(f"Response: {parsed['content'][0]['text']}")
```
### Multimodal Content Support
```python
# Create multimodal message with text and images
content_blocks = [
ConverseAPIHelper.create_text_content("Analyze this image:"),
ConverseAPIHelper.create_image_content(
source={"bytes": image_bytes},
format="png"
)
]
messages = [
ConverseAPIHelper.create_message(MessageRole.USER, content_blocks)
]
response = await balancer.converse_model(
model_id="claude-4.0-sonnet",
messages=messages
)
```
### Tool Use and Function Calling
```python
# Define tools for the model to use
tools = [
{
"toolSpec": {
"name": "get_weather",
"description": "Get weather information for a city",
"inputSchema": {
"json": {
"type": "object",
"properties": {
"city": {"type": "string"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["city"]
}
}
}
}
]
# Create tool configuration
tool_config = ConverseAPIHelper.create_tool_config(
tools=tools,
tool_choice="auto" # Let model decide when to use tools
)
response = await balancer.converse_model(
model_id="claude-4.0-sonnet",
messages=messages,
tool_config=tool_config
)
```
### Converse API in All Regions
```python
# Use Converse API across all regions
responses = await balancer.converse_model_all_regions(
model_id="claude-4.0-sonnet",
messages=messages,
inference_config={"maxTokens": 100, "temperature": 0.5}
)
for response in responses:
if 'error' in response:
print(f"Error in region {response['region']}: {response['error']}")
else:
parsed = ConverseAPIHelper.parse_converse_response(response['response'])
print(f"Region {response['region']}: {parsed['content'][0]['text']}")
```
### Format Conversion
Convert between invoke_model and Converse API formats:
```python
# Original invoke_model format
invoke_body = {
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100,
"temperature": 0.7,
"system": "You are a helpful assistant."
}
# Convert to Converse format
converse_format = ConverseAPIHelper.convert_invoke_model_to_converse(invoke_body)
# Use with Converse API
response = await balancer.converse_model(
model_id="claude-4.0-sonnet",
messages=converse_format['messages'],
inference_config=converse_format.get('inferenceConfig'),
system=converse_format.get('system')
)
```
### API Comparison
| Feature | invoke_model | Converse API |
|---------|-------------|--------------|
| **Interface** | Model-specific formats | Unified across all models |
| **Batch Processing** | ✅ `invoke_model_batch()` | ✅ `converse_model_batch()` |
| **Multimodal** | Limited support | Native support (text, images, documents, video) |
| **Tool Use** | Model-dependent | Built-in function calling |
| **Guardrails** | External integration | Native integration |
| **Response Format** | Model-specific | Standardized structure |
| **Parameter Validation** | Basic | Enhanced validation |
| **Round-Robin Distribution** | ✅ Automatic in batch mode | ✅ Automatic in batch mode |
Both APIs are fully supported with batch processing capabilities for improved performance and load distribution.
## AWS Secrets Manager Configuration
The balancer supports multiple secret formats in AWS Secrets Manager:
### Format 1: Bedrock API Key (Recommended)
```json
{
"bedrock_api_key": "your-bedrock-api-key"
}
```
or
```json
{
"aws_bearer_token_bedrock": "your-bedrock-api-key"
}
```
### Format 2: AWS Access Keys
```json
{
"access_key_id": "AKIA...",
"secret_access_key": "your-secret-access-key"
}
```
or using full AWS naming:
```json
{
"aws_access_key_id": "AKIA...",
"aws_secret_access_key": "your-secret-access-key"
}
```
### Format 3: AWS Session Credentials
```json
{
"access_key_id": "ASIA...",
"secret_access_key": "your-secret-access-key",
"session_token": "your-session-token"
}
```
The credential format is automatically detected when the secret is retrieved.
## Configuration Options
### Constructor Parameters
- **credentials**: Direct credentials dictionary (optional)
- Bedrock API Key: `{'bedrock_api_key': 'key'}` or `{'aws_bearer_token_bedrock': 'key'}`
- AWS Access Keys: `{'aws_access_key_id': 'id', 'aws_secret_access_key': 'key'}`
- AWS Session: `{'aws_access_key_id': 'id', 'aws_secret_access_key': 'key', 'aws_session_token': 'token'}`
- **secret_name**: Name of secret in AWS Secrets Manager (optional, cannot use with credentials)
- **secret_region**: AWS region where secret is stored (default: us-west-2)
- **regions**: List of AWS regions to use for load balancing (default: us-west-2, eu-central-1, ap-northeast-2)
- **endpoints**: List of custom endpoint URLs for each region (optional, uses default AWS endpoints if not provided)
- Must match the number of regions if specified
- Uses HTTPS protocol and official AWS Bedrock endpoints by default
- **max_workers**: Maximum number of worker threads for parallel processing (default: 10)
- **Important for batch processing**: Higher values allow more concurrent requests
- **default_model**: Default model to use (default: claude-4.0-sonnet)
- **auth_type**: Force specific authentication type (optional, auto-detected from credentials)
- **use_environment**: Whether to check environment variables for credentials (default: True)
- **dotenv_path**: Path to .env file (optional, defaults to .env in current directory)
- **use_dotenv**: Whether to load .env file (default: True)
### New Batch Processing Methods
- **`invoke_model_batch(model_id, bodies, check_availability=True)`**: Process multiple invoke_model requests in parallel
- `bodies`: List of request body dictionaries
- Returns: List of responses in same order as input
- Uses round-robin distribution across available regions
- **`converse_model_batch(model_id, message_lists, inference_config=None, ...)`**: Process multiple converse requests in parallel
- `message_lists`: List of message lists (each list is one conversation)
- Returns: List of responses in same order as input
- Uses round-robin distribution across available regions
### Environment Variables
#### Region Configuration
- **BEDROCK_REGIONS**: Comma-separated list of AWS regions (e.g., `us-west-2,eu-central-1,ap-northeast-2`)
#### Authentication (Auto-detected in priority order)
1. **AWS_BEARER_TOKEN_BEDROCK**: Bedrock API key (highest priority)
2. **BEDROCK_API_KEY**: Alternative Bedrock API key name (supported for flexibility)
3. **AWS_ACCESS_KEY_ID** + **AWS_SECRET_ACCESS_KEY** + **AWS_SESSION_TOKEN**: AWS session credentials
4. **AWS_ACCESS_KEY_ID** + **AWS_SECRET_ACCESS_KEY**: AWS access keys
5. Default AWS credential chain (IAM role, instance profile, etc.)
## Error Handling
The package includes custom exceptions for better error handling:
```python
from bedrock_region_balancer import (
BedrockBalancerError,
ModelNotAvailableError,
RegionNotAvailableError,
SecretsManagerError,
AuthType
)
try:
# Example with explicit auth type
balancer = BedrockRegionBalancer(
credentials={'bedrock_api_key': 'your-key'},
auth_type=AuthType.BEDROCK_API_KEY # Optional: force auth type
)
response = await balancer.invoke_model(model_id, body)
except ModelNotAvailableError as e:
print(f"Model not available: {e}")
except RegionNotAvailableError as e:
print(f"Region not available: {e}")
except SecretsManagerError as e:
print(f"Secrets Manager error: {e}")
except ValueError as e:
print(f"Authentication error: {e}")
except BedrockBalancerError as e:
print(f"General error: {e}")
```
## Supported Models
The balancer supports all AWS Bedrock models with automatic ID mapping:
### Short Names to Cross Region Inference Profile IDs
**US West 2 (us-west-2):**
- `claude-4.0-sonnet` → `us.anthropic.claude-3-7-sonnet-20250219-v1:0`
- `claude-opus-4` → `us.anthropic.claude-opus-4-20250514-v1:0`
- `claude-opus-4.1` → `us.anthropic.claude-opus-4-1-20250805-v1:0`
- `claude-sonnet-4` → `us.anthropic.claude-sonnet-4-20250514-v1:0`
**EU Central 1 (eu-central-1):**
- `claude-4.0-sonnet` → `eu.anthropic.claude-3-7-sonnet-20250219-v1:0`
- `claude-sonnet-4` → `eu.anthropic.claude-sonnet-4-20250514-v1:0`
**Asia Pacific Northeast 2 (ap-northeast-2):**
- `claude-4.0-sonnet` → `apac.anthropic.claude-3-7-sonnet-20250219-v1:0`
- `claude-sonnet-4` → `apac.anthropic.claude-sonnet-4-20250514-v1:0`
## Performance Considerations
### Batch Processing Performance
Batch processing can provide significant performance improvements:
- **Parallel Execution**: Multiple requests processed simultaneously instead of sequentially
- **Round-Robin Distribution**: Automatic load balancing across available regions
- **Reduced Latency**: Lower overall response time for multiple requests
- **Better Resource Utilization**: More efficient use of network connections
### Recommended Settings
```python
# For high-throughput batch processing
balancer = BedrockRegionBalancer(
max_workers=20, # Increase for more concurrent requests
regions=['us-west-2', 'eu-central-1', 'ap-northeast-2'] # Use all available regions
)
# Process large batches
batch_size = 10 # Adjust based on your needs
batch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)
```
## Requirements
- Python 3.8+
- boto3>=1.40.0 (for Bedrock support)
- botocore>=1.40.0
## Development
### Setup Development Environment
```bash
# Clone the repository
git clone https://github.com/yourusername/bedrock-region-balancer.git
cd bedrock-region-balancer
# Create virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install development dependencies
pip install -e .[dev]
```
### Run Tests
```bash
# Run mock tests (no AWS credentials required)
python test_round_robin_mock.py
# Run connection test (requires AWS credentials)
python simple_bedrock_test.py
# Run basic functionality test
python test_basic.py
# Run round-robin test with actual API
python test_round_robin.py
# Test batch processing capabilities
python examples/basic_usage.py # Includes batch processing examples
python examples/advanced_usage.py # Advanced batch processing demos
python examples/batch_vs_sequential.py # Performance comparison between batch and sequential processing
```
### Code Quality
```bash
# Format code
black bedrock_region_balancer
# Lint code
flake8 bedrock_region_balancer
# Type checking
mypy bedrock_region_balancer
```
### Publishing to PyPI
#### Prerequisites
1. Create an account on [PyPI](https://pypi.org/) and [Test PyPI](https://test.pypi.org/)
2. Install build and upload tools:
```bash
pip install build twine
```
3. Configure PyPI credentials in `~/.pypirc`:
```ini
[distutils]
index-servers =
pypi
testpypi
[pypi]
username = __token__
password = pypi-your-api-token-here
[testpypi]
username = __token__
password = pypi-your-test-api-token-here
```
#### Build and Upload
1. **Clean previous builds**:
```bash
rm -rf dist/ build/ *.egg-info/
```
2. **Build the package**:
```bash
python -m build
```
3. **Test upload to Test PyPI** (recommended):
```bash
python -m twine upload --repository testpypi dist/*
# Test installation from Test PyPI
pip install --index-url https://test.pypi.org/simple/ bedrock-region-balancer
```
4. **Upload to PyPI**:
```bash
python -m twine upload dist/*
```
5. **Verify installation**:
```bash
pip install bedrock-region-balancer
```
#### Version Management
1. Update version in `setup.py`:
```python
version="0.1.1" # Increment version number
```
2. Create a git tag:
```bash
git tag -a v0.1.1 -m "Release version 0.1.1"
git push origin v0.1.1
```
3. Update CHANGELOG.md with release notes
## License
This project is licensed under the MIT License - see the LICENSE file for details.
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.
## Support
For issues and feature requests, please use the [GitHub issue tracker](https://github.com/yourusername/bedrock-region-balancer/issues).
Raw data
{
"_id": null,
"home_page": null,
"name": "bedrock-region-balancer",
"maintainer": null,
"docs_url": null,
"requires_python": ">=3.8",
"maintainer_email": null,
"keywords": "aws, bedrock, load-balancer, round-robin, ai, llm",
"author": "SungHyon Kim",
"author_email": "SungHyon Kim <spero84@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/05/c3/9a0541590381fa8222ec2ffb95c41e4a51a23dbe8876cd2584c18f9b5813/bedrock_region_balancer-0.5.0.tar.gz",
"platform": null,
"description": "# Bedrock Region Balancer\n\nAWS Bedrock region load balancer with round-robin distribution across multiple regions.\n\n## Features\n\n- **Dual API Support**: Both traditional invoke_model and modern Converse API\n- **NEW: Batch Processing**: Process multiple messages simultaneously with round-robin distribution\n- **Round-robin load balancing** across AWS regions (default: us-west-2, eu-central-1, ap-northeast-2)\n- **Async execution** for optimal performance\n- **Automatic model availability checking** across regions\n- **AWS Secrets Manager integration** for secure credential management\n- **Intelligent caching** of model availability data\n- **Error handling and automatic failover**\n- **Support for Claude 3.7 Sonnet and Opus 4.x models** \n- **Short model name support** (e.g., `claude-4.0-sonnet` \u2192 full model ID)\n- **Multimodal content support** via Converse API (text, images, documents, video)\n- **Tool use and function calling** with native Converse API integration\n- **Guardrail support** for content filtering and safety\n- **Environment variable configuration** for custom regions\n\n## Installation\n\n```bash\npip install bedrock-region-balancer\n```\n\n## Quick Start\n\n### Authentication Methods\n\nBedrock Region Balancer supports 3 authentication methods:\n\n1. **AWS Session Credentials** (access_key_id, secret_access_key, session_token)\n2. **AWS Access Keys** (access_key_id, secret_access_key) \n3. **Bedrock API Key** (aws_bearer_token_bedrock)\n\n### Method 1: Using Bedrock API Key\n\n```python\nimport asyncio\nimport json\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Method 1a: Direct parameter\n async with BedrockRegionBalancer(\n credentials={'bedrock_api_key': 'your-bedrock-api-key'},\n default_model=\"claude-4.0-sonnet\"\n ) as balancer:\n \n model_id = balancer.get_default_model()\n body = {\n \"anthropic_version\": \"bedrock-2023-05-31\",\n \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}],\n \"max_tokens\": 100\n }\n \n # Using invoke_model API (Single Request)\n response = await balancer.invoke_model(model_id, body)\n print(f\"invoke_model response from {response['region']}: {response['response']['content'][0]['text']}\")\n \n # Using invoke_model API (Batch Processing) - NEW!\n batch_bodies = [\n {\n \"anthropic_version\": \"bedrock-2023-05-31\",\n \"messages\": [{\"role\": \"user\", \"content\": \"What is 2+2?\"}],\n \"max_tokens\": 50\n },\n {\n \"anthropic_version\": \"bedrock-2023-05-31\", \n \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n \"max_tokens\": 50\n }\n ]\n batch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)\n for i, response in enumerate(batch_responses, 1):\n print(f\"Batch {i} from {response['region']}: {response['response']['content'][0]['text']}\")\n \n # Using converse API (modern approach - Single Request)\n from bedrock_region_balancer import ConverseAPIHelper, MessageRole\n messages = [ConverseAPIHelper.create_message(MessageRole.USER, \"Hello!\")]\n converse_response = await balancer.converse_model(\n model_id=model_id,\n messages=messages,\n inference_config={\"maxTokens\": 100}\n )\n parsed = ConverseAPIHelper.parse_converse_response(converse_response['response'])\n print(f\"converse response from {converse_response['region']}: {parsed['content'][0]['text']}\")\n \n # Using converse API (Batch Processing) - NEW!\n batch_message_lists = [\n [ConverseAPIHelper.create_message(MessageRole.USER, \"What is 2+2?\")],\n [ConverseAPIHelper.create_message(MessageRole.USER, \"What is the capital of France?\")]\n ]\n batch_converse_responses = await balancer.converse_model_batch(\n model_id=model_id,\n message_lists=batch_message_lists,\n inference_config={\"maxTokens\": 50}\n )\n for i, response in enumerate(batch_converse_responses, 1):\n parsed = ConverseAPIHelper.parse_converse_response(response['response'])\n print(f\"Converse Batch {i} from {response['region']}: {parsed['content'][0]['text']}\")\n\n # Method 1b: Environment variable (preferred)\n # Set: export AWS_BEARER_TOKEN_BEDROCK=\"your-api-key\"\n async with BedrockRegionBalancer() as balancer: # Auto-detects from environment\n # Use balancer as above\n pass\n\n # Method 1c: Using .env file (most convenient)\n # Create .env file with: AWS_BEARER_TOKEN_BEDROCK=your-api-key\n async with BedrockRegionBalancer(use_dotenv=True) as balancer: # Auto-loads .env\n # Use balancer as above\n pass\n\n # Method 1d: Custom .env file path\n async with BedrockRegionBalancer(\n dotenv_path='/path/to/your/.env',\n use_dotenv=True\n ) as balancer:\n # Use balancer as above\n pass\n\nasyncio.run(main())\n```\n\n### Method 2: Using AWS Session Credentials\n\n```python\nimport asyncio\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Method 2a: Direct credentials\n credentials = {\n 'aws_access_key_id': 'your-access-key-id',\n 'aws_secret_access_key': 'your-secret-access-key',\n 'aws_session_token': 'your-session-token'\n }\n \n async with BedrockRegionBalancer(\n credentials=credentials,\n default_model=\"claude-4.0-sonnet\"\n ) as balancer:\n model_id = balancer.get_default_model()\n # Use balancer as shown above\n\n # Method 2b: Environment variables\n # Set: export AWS_ACCESS_KEY_ID=\"...\" AWS_SECRET_ACCESS_KEY=\"...\" AWS_SESSION_TOKEN=\"...\"\n async with BedrockRegionBalancer() as balancer: # Auto-detects from environment\n # Use balancer as above\n pass\n\nasyncio.run(main())\n```\n\n### Method 3: Using AWS Access Keys\n\n```python\nimport asyncio\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Method 3a: Direct credentials \n credentials = {\n 'aws_access_key_id': 'your-access-key-id',\n 'aws_secret_access_key': 'your-secret-access-key'\n }\n \n async with BedrockRegionBalancer(\n credentials=credentials,\n default_model=\"claude-4.0-sonnet\"\n ) as balancer:\n model_id = balancer.get_default_model()\n # Use balancer as shown above\n\nasyncio.run(main())\n```\n\n### Method 4: Using AWS Secrets Manager\n\nAWS Secrets Manager now supports multiple credential formats:\n\n#### 4a. Bedrock API Key in Secrets Manager\n```json\n{\n \"bedrock_api_key\": \"your-bedrock-api-key\"\n}\n```\nor\n```json\n{\n \"aws_bearer_token_bedrock\": \"your-bedrock-api-key\"\n}\n```\n\n#### 4b. AWS Access Keys in Secrets Manager\n```json\n{\n \"access_key_id\": \"AKIA...\",\n \"secret_access_key\": \"your-secret-key\"\n}\n```\nor\n```json\n{\n \"aws_access_key_id\": \"AKIA...\",\n \"aws_secret_access_key\": \"your-secret-key\"\n}\n```\n\n#### 4c. AWS Session Credentials in Secrets Manager\n```json\n{\n \"access_key_id\": \"ASIA...\",\n \"secret_access_key\": \"your-secret-key\",\n \"session_token\": \"your-session-token\"\n}\n```\n\n```python\nimport asyncio\nimport json\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Initialize balancer with credentials from Secrets Manager\n # Supports all credential formats above\n async with BedrockRegionBalancer(\n secret_name=\"bedrock-credentials\", # Your secret name\n secret_region=\"us-west-2\",\n default_model=\"claude-4.0-sonnet\"\n ) as balancer:\n \n model_id = balancer.get_default_model()\n body = {\n \"anthropic_version\": \"bedrock-2023-05-31\",\n \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n \"max_tokens\": 100,\n \"temperature\": 0.7\n }\n \n response = await balancer.invoke_model(model_id, json.dumps(body))\n \n print(f\"Response from region: {response['region']}\")\n response_body = json.loads(response['response'])\n print(f\"Model response: {response_body['content'][0]['text']}\")\n\nasyncio.run(main())\n```\n\n### Method 5: Using Default AWS Credentials\n\n```python\nimport asyncio\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Initialize with default AWS credential chain\n # (IAM role, instance profile, etc.)\n async with BedrockRegionBalancer(\n default_model=\"claude-4.0-sonnet\" # Optional: set default model\n ) as balancer:\n model_id = balancer.get_default_model()\n # Use balancer as shown above\n\nasyncio.run(main())\n```\n\n## .env File Configuration\n\nFor convenience and security, you can use .env files to store your credentials:\n\n### Step 1: Install python-dotenv (included as dependency)\n```bash\npip install bedrock-region-balancer # python-dotenv is included\n```\n\n### Step 2: Create .env file\nCopy the provided `.env.example` to `.env` and fill in your credentials:\n\n```bash\ncp .env.example .env\n```\n\nExample `.env` file content:\n```bash\n# Choose ONE authentication method\n\n# Option 1: Bedrock API Key (Recommended)\nAWS_BEARER_TOKEN_BEDROCK=your-bedrock-api-key\n\n# Option 2: AWS Session Credentials\n# AWS_ACCESS_KEY_ID=ASIA...\n# AWS_SECRET_ACCESS_KEY=your-secret-key\n# AWS_SESSION_TOKEN=your-session-token\n\n# Option 3: AWS Access Keys\n# AWS_ACCESS_KEY_ID=AKIA...\n# AWS_SECRET_ACCESS_KEY=your-secret-key\n\n# Optional: Configuration\nBEDROCK_REGIONS=us-west-2,eu-central-1,ap-northeast-2\nDEFAULT_MODEL=claude-4.0-sonnet\n```\n\n### Step 3: Use in your code\n```python\nimport asyncio\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def main():\n # Automatically loads .env file from current directory\n async with BedrockRegionBalancer() as balancer:\n model_id = balancer.get_default_model()\n \n body = {\n \"anthropic_version\": \"bedrock-2023-05-31\", \n \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}],\n \"max_tokens\": 100\n }\n \n response = await balancer.invoke_model(model_id, body)\n print(f\"Response from {response['region']}\")\n\nasyncio.run(main())\n```\n\n### Custom .env file location\n```python\nasync with BedrockRegionBalancer(\n dotenv_path=\"/path/to/your/.env\",\n use_dotenv=True\n) as balancer:\n # Your code here\n pass\n```\n\n### Security Best Practices for .env files\n- **Never commit .env files** to version control\n- Add `.env` to your `.gitignore` file\n- Use different .env files for development, staging, and production\n- Set appropriate file permissions: `chmod 600 .env`\n\n## Advanced Usage\n\n### Custom Regions\n\n```python\n# Method 1: Use custom regions via parameter\nbalancer = BedrockRegionBalancer(\n regions=['us-east-1', 'us-west-2', 'eu-west-1'],\n default_model=\"claude-4.0-sonnet\" # Optional: set custom default model\n)\n\n# Method 2: Use environment variable\nimport os\nos.environ['BEDROCK_REGIONS'] = 'us-west-2,eu-central-1,ap-northeast-2'\nbalancer = BedrockRegionBalancer(\n default_model=\"claude-4.0-sonnet\" # Optional: set default model\n) # Will use regions from environment\n```\n\n### Custom Endpoints\n\nThe balancer now supports custom endpoint configuration for each region. By default, it uses official AWS Bedrock endpoints, but you can specify custom endpoints for testing or specific requirements:\n\n```python\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\n# Method 1: Use default endpoints (automatic)\n# Default endpoints are automatically used for us-west-2, eu-central-1, ap-northeast-2\nbalancer = BedrockRegionBalancer() # Uses official AWS endpoints\n\n# Method 2: Use custom endpoints (must match number of regions)\ncustom_regions = ['us-west-2', 'eu-central-1', 'ap-northeast-2']\ncustom_endpoints = [\n 'https://custom-bedrock.us-west-2.example.com',\n 'https://custom-bedrock.eu-central-1.example.com',\n 'https://custom-bedrock.ap-northeast-2.example.com'\n]\n\nbalancer = BedrockRegionBalancer(\n regions=custom_regions,\n endpoints=custom_endpoints,\n default_model=\"claude-4.0-sonnet\"\n)\n\n# Method 3: Mixed configuration with custom regions and endpoints\nmixed_regions = ['us-east-1', 'eu-west-1']\nmixed_endpoints = [\n 'https://bedrock.us-east-1.amazonaws.com',\n 'https://bedrock.eu-west-1.amazonaws.com'\n]\n\nbalancer = BedrockRegionBalancer(\n regions=mixed_regions,\n endpoints=mixed_endpoints\n)\n```\n\n#### Default Endpoints\n\nThe following official AWS Bedrock endpoints are used by default:\n\n| Region | Bedrock Control Plane | Bedrock Runtime |\n|--------|----------------------|-----------------|\n| **us-west-2** | `https://bedrock.us-west-2.amazonaws.com` | `https://bedrock-runtime.us-west-2.amazonaws.com` |\n| **eu-central-1** | `https://bedrock.eu-central-1.amazonaws.com` | `https://bedrock-runtime.eu-central-1.amazonaws.com` |\n| **ap-northeast-2** | `https://bedrock.ap-northeast-2.amazonaws.com` | `https://bedrock-runtime.ap-northeast-2.amazonaws.com` |\n\n#### Important Notes\n\n- **Endpoint Count**: Number of custom endpoints must exactly match the number of regions\n- **Service Separation**: The balancer automatically handles separate endpoints for Bedrock control plane and runtime services\n- **HTTPS Required**: All endpoints must use HTTPS protocol\n- **Validation**: Connection is tested during initialization to ensure endpoint accessibility\n\n### Invoke Model in All Regions\n\n```python\n# Get responses from all available regions simultaneously\nresponses = await balancer.invoke_model_all_regions(model_id, body)\n\nfor response in responses:\n if 'error' in response:\n print(f\"Error in region {response['region']}: {response['error']}\")\n else:\n print(f\"Success in region {response['region']}\")\n```\n\n### Check Model Availability\n\n```python\n# Get model availability report\nreport = balancer.get_model_availability_report()\nprint(f\"Available regions: {report['available_regions']}\")\nprint(f\"Models by region: {report['models_by_region']}\")\nprint(f\"Default model: {report['default_model']}\")\n\n# Get just the default model\ndefault_model = balancer.get_default_model()\nprint(f\"Using default model: {default_model}\")\n```\n\n### Disable Availability Checking\n\n```python\n# Skip availability check for faster execution\n# (useful when you know the model is available)\nresponse = await balancer.invoke_model(\n model_id, \n body, \n check_availability=False\n)\n```\n\n## Batch Processing vs Sequential Processing (NEW)\n\nBedrock Region Balancer now supports batch processing for both APIs, allowing you to send multiple messages simultaneously with round-robin distribution across regions for improved performance and load balancing.\n\n### Processing Methods Comparison\n\n| Aspect | Sequential Processing | Batch Processing |\n|--------|----------------------|------------------|\n| **Execution Model** | One request at a time | Multiple requests in parallel |\n| **Performance** | Slower (cumulative latency) | **2-10x faster** (parallel execution) |\n| **Load Distribution** | Single region per request | **Round-robin across regions** |\n| **Resource Usage** | Low memory, high latency | Higher memory, low latency |\n| **Error Handling** | Fail-fast, simple | Partial failures, more complex |\n| **Use Case** | Simple, low-volume requests | **High-throughput, performance-critical** |\n\n### Batch Processing Benefits\n\n- **Performance**: **2-10x faster** execution time through parallel processing\n- **Load Distribution**: Automatic round-robin distribution across available regions\n- **Resource Efficiency**: Better utilization of concurrent connections and network bandwidth\n- **Simplified Code**: Handle multiple requests with a single API call\n- **Scalability**: Better handling of high-volume scenarios\n\n### Basic Batch Usage\n\n```python\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def batch_example():\n async with BedrockRegionBalancer() as balancer:\n model_id = \"claude-4.0-sonnet\"\n \n # Prepare multiple request bodies\n batch_bodies = [\n {\n \"messages\": [{\"role\": \"user\", \"content\": \"What is 2+2?\"}],\n \"max_tokens\": 50,\n \"anthropic_version\": \"bedrock-2023-05-31\"\n },\n {\n \"messages\": [{\"role\": \"user\", \"content\": \"What is the capital of France?\"}],\n \"max_tokens\": 50,\n \"anthropic_version\": \"bedrock-2023-05-31\"\n }\n ]\n \n # Process all requests in parallel with round-robin distribution\n batch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)\n \n for i, response in enumerate(batch_responses, 1):\n print(f\"Response {i} from {response['region']}: {response['response']['content'][0]['text']}\")\n\n# Converse API batch processing\nasync def converse_batch_example():\n async with BedrockRegionBalancer() as balancer:\n from bedrock_region_balancer import ConverseAPIHelper, MessageRole\n \n # Prepare multiple message lists\n batch_message_lists = [\n [ConverseAPIHelper.create_message(MessageRole.USER, \"What is 2+2?\")],\n [ConverseAPIHelper.create_message(MessageRole.USER, \"What is the capital of France?\")]\n ]\n \n # Process all conversations in parallel\n batch_responses = await balancer.converse_model_batch(\n model_id=\"claude-4.0-sonnet\",\n message_lists=batch_message_lists,\n inference_config={\"maxTokens\": 50}\n )\n \n for i, response in enumerate(batch_responses, 1):\n parsed = ConverseAPIHelper.parse_converse_response(response['response'])\n print(f\"Conversation {i} from {response['region']}: {parsed['content'][0]['text']}\")\n```\n\n### Sequential vs Batch Performance Comparison Example\n\n```python\nimport time\nfrom bedrock_region_balancer import BedrockRegionBalancer\n\nasync def performance_comparison():\n async with BedrockRegionBalancer() as balancer:\n model_id = \"claude-4.0-sonnet\"\n \n # Prepare 5 requests\n requests = [\n {\n \"messages\": [{\"role\": \"user\", \"content\": f\"What is {i+1} + {i+1}?\"}],\n \"max_tokens\": 30,\n \"anthropic_version\": \"bedrock-2023-05-31\"\n }\n for i in range(5)\n ]\n \n # Sequential processing (traditional approach)\n print(\"Sequential Processing:\")\n start_time = time.time()\n sequential_responses = []\n for i, body in enumerate(requests):\n response = await balancer.invoke_model(model_id, body)\n sequential_responses.append(response)\n print(f\" Request {i+1}: {response['region']}\")\n sequential_time = time.time() - start_time\n \n # Batch processing (new approach)\n print(\"\\nBatch Processing:\")\n start_time = time.time()\n batch_responses = await balancer.invoke_model_batch(model_id, requests)\n batch_time = time.time() - start_time\n \n for i, response in enumerate(batch_responses, 1):\n print(f\" Request {i}: {response['region']}\")\n \n # Performance comparison\n speedup = sequential_time / batch_time if batch_time > 0 else 0\n print(f\"\\nPerformance Results:\")\n print(f\" Sequential: {sequential_time:.2f}s\")\n print(f\" Batch: {batch_time:.2f}s\")\n print(f\" Speedup: {speedup:.1f}x faster with batch processing\")\n\n# Run comparison\nasyncio.run(performance_comparison())\n```\n\n### When to Use Each Method\n\n**Use Sequential Processing when:**\n- Processing single requests or very few requests\n- Memory constraints are critical\n- Simple error handling is preferred\n- Interactive applications requiring immediate feedback\n\n**Use Batch Processing when:**\n- Processing multiple similar requests\n- Performance and throughput are critical\n- You want automatic load distribution across regions\n- Handling high-volume scenarios\n\n## Converse API Support\n\nBedrock Region Balancer supports both the traditional **invoke_model** API and the new **Converse API**. The Converse API provides a unified interface across different foundation models with enhanced features like multimodal content, tool use, and guardrail integration.\n\n### Basic Converse API Usage\n\n```python\nfrom bedrock_region_balancer import BedrockRegionBalancer, ConverseAPIHelper, MessageRole\n\nasync def converse_example():\n async with BedrockRegionBalancer() as balancer:\n # Create messages using ConverseAPIHelper\n messages = [\n ConverseAPIHelper.create_message(\n MessageRole.USER, \n \"Hello! Explain the benefits of the Converse API.\"\n )\n ]\n \n # Create inference configuration\n inference_config = ConverseAPIHelper.create_inference_config(\n max_tokens=200,\n temperature=0.7,\n top_p=0.9\n )\n \n # Use Converse API\n response = await balancer.converse_model(\n model_id=\"claude-4.0-sonnet\",\n messages=messages,\n inference_config=inference_config\n )\n \n # Parse response\n parsed = ConverseAPIHelper.parse_converse_response(response['response'])\n print(f\"Response: {parsed['content'][0]['text']}\")\n```\n\n### Multimodal Content Support\n\n```python\n# Create multimodal message with text and images\ncontent_blocks = [\n ConverseAPIHelper.create_text_content(\"Analyze this image:\"),\n ConverseAPIHelper.create_image_content(\n source={\"bytes\": image_bytes}, \n format=\"png\"\n )\n]\n\nmessages = [\n ConverseAPIHelper.create_message(MessageRole.USER, content_blocks)\n]\n\nresponse = await balancer.converse_model(\n model_id=\"claude-4.0-sonnet\",\n messages=messages\n)\n```\n\n### Tool Use and Function Calling\n\n```python\n# Define tools for the model to use\ntools = [\n {\n \"toolSpec\": {\n \"name\": \"get_weather\",\n \"description\": \"Get weather information for a city\",\n \"inputSchema\": {\n \"json\": {\n \"type\": \"object\",\n \"properties\": {\n \"city\": {\"type\": \"string\"},\n \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]}\n },\n \"required\": [\"city\"]\n }\n }\n }\n }\n]\n\n# Create tool configuration\ntool_config = ConverseAPIHelper.create_tool_config(\n tools=tools,\n tool_choice=\"auto\" # Let model decide when to use tools\n)\n\nresponse = await balancer.converse_model(\n model_id=\"claude-4.0-sonnet\",\n messages=messages,\n tool_config=tool_config\n)\n```\n\n### Converse API in All Regions\n\n```python\n# Use Converse API across all regions\nresponses = await balancer.converse_model_all_regions(\n model_id=\"claude-4.0-sonnet\",\n messages=messages,\n inference_config={\"maxTokens\": 100, \"temperature\": 0.5}\n)\n\nfor response in responses:\n if 'error' in response:\n print(f\"Error in region {response['region']}: {response['error']}\")\n else:\n parsed = ConverseAPIHelper.parse_converse_response(response['response'])\n print(f\"Region {response['region']}: {parsed['content'][0]['text']}\")\n```\n\n### Format Conversion\n\nConvert between invoke_model and Converse API formats:\n\n```python\n# Original invoke_model format\ninvoke_body = {\n \"messages\": [{\"role\": \"user\", \"content\": \"Hello!\"}],\n \"max_tokens\": 100,\n \"temperature\": 0.7,\n \"system\": \"You are a helpful assistant.\"\n}\n\n# Convert to Converse format\nconverse_format = ConverseAPIHelper.convert_invoke_model_to_converse(invoke_body)\n\n# Use with Converse API\nresponse = await balancer.converse_model(\n model_id=\"claude-4.0-sonnet\",\n messages=converse_format['messages'],\n inference_config=converse_format.get('inferenceConfig'),\n system=converse_format.get('system')\n)\n```\n\n### API Comparison\n\n| Feature | invoke_model | Converse API |\n|---------|-------------|--------------|\n| **Interface** | Model-specific formats | Unified across all models |\n| **Batch Processing** | \u2705 `invoke_model_batch()` | \u2705 `converse_model_batch()` |\n| **Multimodal** | Limited support | Native support (text, images, documents, video) |\n| **Tool Use** | Model-dependent | Built-in function calling |\n| **Guardrails** | External integration | Native integration |\n| **Response Format** | Model-specific | Standardized structure |\n| **Parameter Validation** | Basic | Enhanced validation |\n| **Round-Robin Distribution** | \u2705 Automatic in batch mode | \u2705 Automatic in batch mode |\n\nBoth APIs are fully supported with batch processing capabilities for improved performance and load distribution.\n\n## AWS Secrets Manager Configuration\n\nThe balancer supports multiple secret formats in AWS Secrets Manager:\n\n### Format 1: Bedrock API Key (Recommended)\n```json\n{\n \"bedrock_api_key\": \"your-bedrock-api-key\"\n}\n```\nor\n```json\n{\n \"aws_bearer_token_bedrock\": \"your-bedrock-api-key\"\n}\n```\n\n### Format 2: AWS Access Keys\n```json\n{\n \"access_key_id\": \"AKIA...\",\n \"secret_access_key\": \"your-secret-access-key\"\n}\n```\nor using full AWS naming:\n```json\n{\n \"aws_access_key_id\": \"AKIA...\",\n \"aws_secret_access_key\": \"your-secret-access-key\"\n}\n```\n\n### Format 3: AWS Session Credentials\n```json\n{\n \"access_key_id\": \"ASIA...\",\n \"secret_access_key\": \"your-secret-access-key\",\n \"session_token\": \"your-session-token\"\n}\n```\n\nThe credential format is automatically detected when the secret is retrieved.\n\n## Configuration Options\n\n### Constructor Parameters\n\n- **credentials**: Direct credentials dictionary (optional)\n - Bedrock API Key: `{'bedrock_api_key': 'key'}` or `{'aws_bearer_token_bedrock': 'key'}`\n - AWS Access Keys: `{'aws_access_key_id': 'id', 'aws_secret_access_key': 'key'}` \n - AWS Session: `{'aws_access_key_id': 'id', 'aws_secret_access_key': 'key', 'aws_session_token': 'token'}`\n- **secret_name**: Name of secret in AWS Secrets Manager (optional, cannot use with credentials)\n- **secret_region**: AWS region where secret is stored (default: us-west-2)\n- **regions**: List of AWS regions to use for load balancing (default: us-west-2, eu-central-1, ap-northeast-2)\n- **endpoints**: List of custom endpoint URLs for each region (optional, uses default AWS endpoints if not provided)\n - Must match the number of regions if specified\n - Uses HTTPS protocol and official AWS Bedrock endpoints by default\n- **max_workers**: Maximum number of worker threads for parallel processing (default: 10)\n - **Important for batch processing**: Higher values allow more concurrent requests\n- **default_model**: Default model to use (default: claude-4.0-sonnet)\n- **auth_type**: Force specific authentication type (optional, auto-detected from credentials)\n- **use_environment**: Whether to check environment variables for credentials (default: True)\n- **dotenv_path**: Path to .env file (optional, defaults to .env in current directory)\n- **use_dotenv**: Whether to load .env file (default: True)\n\n### New Batch Processing Methods\n\n- **`invoke_model_batch(model_id, bodies, check_availability=True)`**: Process multiple invoke_model requests in parallel\n - `bodies`: List of request body dictionaries\n - Returns: List of responses in same order as input\n - Uses round-robin distribution across available regions\n\n- **`converse_model_batch(model_id, message_lists, inference_config=None, ...)`**: Process multiple converse requests in parallel\n - `message_lists`: List of message lists (each list is one conversation)\n - Returns: List of responses in same order as input\n - Uses round-robin distribution across available regions\n\n### Environment Variables\n\n#### Region Configuration\n- **BEDROCK_REGIONS**: Comma-separated list of AWS regions (e.g., `us-west-2,eu-central-1,ap-northeast-2`)\n\n#### Authentication (Auto-detected in priority order)\n1. **AWS_BEARER_TOKEN_BEDROCK**: Bedrock API key (highest priority)\n2. **BEDROCK_API_KEY**: Alternative Bedrock API key name (supported for flexibility)\n3. **AWS_ACCESS_KEY_ID** + **AWS_SECRET_ACCESS_KEY** + **AWS_SESSION_TOKEN**: AWS session credentials\n4. **AWS_ACCESS_KEY_ID** + **AWS_SECRET_ACCESS_KEY**: AWS access keys\n5. Default AWS credential chain (IAM role, instance profile, etc.)\n\n## Error Handling\n\nThe package includes custom exceptions for better error handling:\n\n```python\nfrom bedrock_region_balancer import (\n BedrockBalancerError,\n ModelNotAvailableError,\n RegionNotAvailableError,\n SecretsManagerError,\n AuthType\n)\n\ntry:\n # Example with explicit auth type\n balancer = BedrockRegionBalancer(\n credentials={'bedrock_api_key': 'your-key'},\n auth_type=AuthType.BEDROCK_API_KEY # Optional: force auth type\n )\n response = await balancer.invoke_model(model_id, body)\nexcept ModelNotAvailableError as e:\n print(f\"Model not available: {e}\")\nexcept RegionNotAvailableError as e:\n print(f\"Region not available: {e}\")\nexcept SecretsManagerError as e:\n print(f\"Secrets Manager error: {e}\")\nexcept ValueError as e:\n print(f\"Authentication error: {e}\")\nexcept BedrockBalancerError as e:\n print(f\"General error: {e}\")\n```\n\n## Supported Models\n\nThe balancer supports all AWS Bedrock models with automatic ID mapping:\n\n### Short Names to Cross Region Inference Profile IDs\n\n**US West 2 (us-west-2):**\n- `claude-4.0-sonnet` \u2192 `us.anthropic.claude-3-7-sonnet-20250219-v1:0`\n- `claude-opus-4` \u2192 `us.anthropic.claude-opus-4-20250514-v1:0`\n- `claude-opus-4.1` \u2192 `us.anthropic.claude-opus-4-1-20250805-v1:0`\n- `claude-sonnet-4` \u2192 `us.anthropic.claude-sonnet-4-20250514-v1:0`\n\n**EU Central 1 (eu-central-1):**\n- `claude-4.0-sonnet` \u2192 `eu.anthropic.claude-3-7-sonnet-20250219-v1:0`\n- `claude-sonnet-4` \u2192 `eu.anthropic.claude-sonnet-4-20250514-v1:0`\n\n**Asia Pacific Northeast 2 (ap-northeast-2):**\n- `claude-4.0-sonnet` \u2192 `apac.anthropic.claude-3-7-sonnet-20250219-v1:0`\n- `claude-sonnet-4` \u2192 `apac.anthropic.claude-sonnet-4-20250514-v1:0`\n\n## Performance Considerations\n\n### Batch Processing Performance\n\nBatch processing can provide significant performance improvements:\n\n- **Parallel Execution**: Multiple requests processed simultaneously instead of sequentially\n- **Round-Robin Distribution**: Automatic load balancing across available regions\n- **Reduced Latency**: Lower overall response time for multiple requests\n- **Better Resource Utilization**: More efficient use of network connections\n\n### Recommended Settings\n\n```python\n# For high-throughput batch processing\nbalancer = BedrockRegionBalancer(\n max_workers=20, # Increase for more concurrent requests\n regions=['us-west-2', 'eu-central-1', 'ap-northeast-2'] # Use all available regions\n)\n\n# Process large batches\nbatch_size = 10 # Adjust based on your needs\nbatch_responses = await balancer.invoke_model_batch(model_id, batch_bodies)\n```\n\n## Requirements\n\n- Python 3.8+\n- boto3>=1.40.0 (for Bedrock support)\n- botocore>=1.40.0\n\n## Development\n\n### Setup Development Environment\n\n```bash\n# Clone the repository\ngit clone https://github.com/yourusername/bedrock-region-balancer.git\ncd bedrock-region-balancer\n\n# Create virtual environment\npython -m venv venv\nsource venv/bin/activate # On Windows: venv\\Scripts\\activate\n\n# Install development dependencies\npip install -e .[dev]\n```\n\n### Run Tests\n\n```bash\n# Run mock tests (no AWS credentials required)\npython test_round_robin_mock.py\n\n# Run connection test (requires AWS credentials)\npython simple_bedrock_test.py\n\n# Run basic functionality test\npython test_basic.py\n\n# Run round-robin test with actual API\npython test_round_robin.py\n\n# Test batch processing capabilities\npython examples/basic_usage.py # Includes batch processing examples\npython examples/advanced_usage.py # Advanced batch processing demos\npython examples/batch_vs_sequential.py # Performance comparison between batch and sequential processing\n```\n\n### Code Quality\n\n```bash\n# Format code\nblack bedrock_region_balancer\n\n# Lint code\nflake8 bedrock_region_balancer\n\n# Type checking\nmypy bedrock_region_balancer\n```\n\n### Publishing to PyPI\n\n#### Prerequisites\n\n1. Create an account on [PyPI](https://pypi.org/) and [Test PyPI](https://test.pypi.org/)\n2. Install build and upload tools:\n ```bash\n pip install build twine\n ```\n3. Configure PyPI credentials in `~/.pypirc`:\n ```ini\n [distutils]\n index-servers =\n pypi\n testpypi\n\n [pypi]\n username = __token__\n password = pypi-your-api-token-here\n\n [testpypi]\n username = __token__\n password = pypi-your-test-api-token-here\n ```\n\n#### Build and Upload\n\n1. **Clean previous builds**:\n ```bash\n rm -rf dist/ build/ *.egg-info/\n ```\n\n2. **Build the package**:\n ```bash\n python -m build\n ```\n\n3. **Test upload to Test PyPI** (recommended):\n ```bash\n python -m twine upload --repository testpypi dist/*\n \n # Test installation from Test PyPI\n pip install --index-url https://test.pypi.org/simple/ bedrock-region-balancer\n ```\n\n4. **Upload to PyPI**:\n ```bash\n python -m twine upload dist/*\n ```\n\n5. **Verify installation**:\n ```bash\n pip install bedrock-region-balancer\n ```\n\n#### Version Management\n\n1. Update version in `setup.py`:\n ```python\n version=\"0.1.1\" # Increment version number\n ```\n\n2. Create a git tag:\n ```bash\n git tag -a v0.1.1 -m \"Release version 0.1.1\"\n git push origin v0.1.1\n ```\n\n3. Update CHANGELOG.md with release notes\n\n## License\n\nThis project is licensed under the MIT License - see the LICENSE file for details.\n\n## Contributing\n\nContributions are welcome! Please feel free to submit a Pull Request.\n\n## Support\n\nFor issues and feature requests, please use the [GitHub issue tracker](https://github.com/yourusername/bedrock-region-balancer/issues).\n",
"bugtrack_url": null,
"license": "MIT",
"summary": "AWS Bedrock region load balancer with round-robin distribution and batch processing",
"version": "0.5.0",
"project_urls": {
"Bug Reports": "https://github.com/spero84/bedrock-region-balancer/issues",
"Homepage": "https://github.com/spero84/bedrock-region-balancer",
"Source": "https://github.com/spero84/bedrock-region-balancer"
},
"split_keywords": [
"aws",
" bedrock",
" load-balancer",
" round-robin",
" ai",
" llm"
],
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "bd96304235ea1c9d4e954a4b2b52508340dfe221a2d14360051b92be7dc1e75c",
"md5": "b7d8ef83de42471245d9417c8c3b53a8",
"sha256": "d0eeb5d698782761d4726aa190f6b3be4a5466bf1db993d5ce53aef3ddfa5d16"
},
"downloads": -1,
"filename": "bedrock_region_balancer-0.5.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "b7d8ef83de42471245d9417c8c3b53a8",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.8",
"size": 38759,
"upload_time": "2025-09-08T02:10:55",
"upload_time_iso_8601": "2025-09-08T02:10:55.679083Z",
"url": "https://files.pythonhosted.org/packages/bd/96/304235ea1c9d4e954a4b2b52508340dfe221a2d14360051b92be7dc1e75c/bedrock_region_balancer-0.5.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "05c39a0541590381fa8222ec2ffb95c41e4a51a23dbe8876cd2584c18f9b5813",
"md5": "b14c63cd4f3f51cdb4871f077e8072a6",
"sha256": "249b7d371c2f15154afd9bd65ae323a186a12b8c9da165013c16093189741cd6"
},
"downloads": -1,
"filename": "bedrock_region_balancer-0.5.0.tar.gz",
"has_sig": false,
"md5_digest": "b14c63cd4f3f51cdb4871f077e8072a6",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.8",
"size": 58104,
"upload_time": "2025-09-08T02:10:57",
"upload_time_iso_8601": "2025-09-08T02:10:57.347118Z",
"url": "https://files.pythonhosted.org/packages/05/c3/9a0541590381fa8222ec2ffb95c41e4a51a23dbe8876cd2584c18f9b5813/bedrock_region_balancer-0.5.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2025-09-08 02:10:57",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "spero84",
"github_project": "bedrock-region-balancer",
"github_not_found": true,
"lcname": "bedrock-region-balancer"
}