# Catanatron Gym
For reinforcement learning purposes, we provide an Open AI Gym environment. To use:
```
pip install catanatron_gym
```
Make your training loop, ensuring to respect `env.get_valid_actions()`.
```python
import random
import gymnasium as gym
env = gym.make("catanatron_gym:catanatron-v1")
observation, info = env.reset()
for _ in range(1000):
# your agent here (this takes random actions)
action = random.choice(env.unwrapped.get_valid_actions())
observation, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if done:
observation, info = env.reset()
env.close()
```
For `action` documentation see [here](https://catanatron.readthedocs.io/en/latest/catanatron_gym.envs.html#catanatron_gym.envs.catanatron_env.CatanatronEnv.action_space).
For `observation` documentation see [here](https://catanatron.readthedocs.io/en/latest/catanatron_gym.envs.html#catanatron_gym.envs.catanatron_env.CatanatronEnv.observation_space).
You can access `env.game.state` and build your own "observation" (features) vector as well.
## Stable-Baselines3 Example
Catanatron works well with SB3, and better with the Maskable models of the [SB3 Contrib](https://stable-baselines3.readthedocs.io/en/master/guide/sb3_contrib.html) repo. Here a small example of how it may work.
```python
import gymnasium as gym
import numpy as np
from sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy
from sb3_contrib.common.wrappers import ActionMasker
from sb3_contrib.ppo_mask import MaskablePPO
def mask_fn(env) -> np.ndarray:
valid_actions = env.get_valid_actions()
mask = np.zeros(env.action_space.n, dtype=np.float32)
mask[valid_actions] = 1
return np.array([bool(i) for i in mask])
# Init Environment and Model
env = gym.make("catanatron_gym:catanatron-v1")
env = ActionMasker(env, mask_fn) # Wrap to enable masking
model = MaskablePPO(MaskableActorCriticPolicy, env, verbose=1)
# Train
model.learn(total_timesteps=1_000_000)
```
## Configuration
You can also configure what map to use, how many vps to win, among other variables in the environment,
with the `config` keyword argument. See source for details.
```python
from catanatron import Color
from catanatron.players.weighted_random import WeightedRandomPlayer
def my_reward_function(game, p0_color):
winning_color = game.winning_color()
if p0_color == winning_color:
return 100
elif winning_color is None:
return 0
else:
return -100
# 3-player catan on a "Mini" map (7 tiles) until 6 points.
env = gym.make(
"catanatron_gym:catanatron-v1",
config={
"map_type": "MINI",
"vps_to_win": 6,
"enemies": [WeightedRandomPlayer(Color.RED), WeightedRandomPlayer(Color.ORANGE)],
"reward_function": my_reward_function,
"representation": "mixed",
},
)
```
Raw data
{
"_id": null,
"home_page": "https://github.com/bcollazo/catanatron",
"name": "catanatron-gym",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.6",
"maintainer_email": "",
"keywords": "",
"author": "Bryan Collazo",
"author_email": "bcollazo2010@gmail.com",
"download_url": "https://files.pythonhosted.org/packages/c6/f1/b72ab3a8a996180238a86ab6bea198c4c38eb52cd9711ce0b28727956455/catanatron_gym-4.0.0.tar.gz",
"platform": null,
"description": "# Catanatron Gym\n\nFor reinforcement learning purposes, we provide an Open AI Gym environment. To use:\n\n```\npip install catanatron_gym\n```\n\nMake your training loop, ensuring to respect `env.get_valid_actions()`.\n\n```python\nimport random\nimport gymnasium as gym\n\nenv = gym.make(\"catanatron_gym:catanatron-v1\")\nobservation, info = env.reset()\nfor _ in range(1000):\n # your agent here (this takes random actions)\n action = random.choice(env.unwrapped.get_valid_actions())\n observation, reward, terminated, truncated, info = env.step(action)\n done = terminated or truncated\n if done:\n observation, info = env.reset()\nenv.close()\n```\n\nFor `action` documentation see [here](https://catanatron.readthedocs.io/en/latest/catanatron_gym.envs.html#catanatron_gym.envs.catanatron_env.CatanatronEnv.action_space).\n\nFor `observation` documentation see [here](https://catanatron.readthedocs.io/en/latest/catanatron_gym.envs.html#catanatron_gym.envs.catanatron_env.CatanatronEnv.observation_space).\n\nYou can access `env.game.state` and build your own \"observation\" (features) vector as well.\n\n## Stable-Baselines3 Example\n\nCatanatron works well with SB3, and better with the Maskable models of the [SB3 Contrib](https://stable-baselines3.readthedocs.io/en/master/guide/sb3_contrib.html) repo. Here a small example of how it may work.\n\n```python\nimport gymnasium as gym\nimport numpy as np\nfrom sb3_contrib.common.maskable.policies import MaskableActorCriticPolicy\nfrom sb3_contrib.common.wrappers import ActionMasker\nfrom sb3_contrib.ppo_mask import MaskablePPO\n\ndef mask_fn(env) -> np.ndarray:\n valid_actions = env.get_valid_actions()\n mask = np.zeros(env.action_space.n, dtype=np.float32)\n mask[valid_actions] = 1\n\n return np.array([bool(i) for i in mask])\n\n\n# Init Environment and Model\nenv = gym.make(\"catanatron_gym:catanatron-v1\")\nenv = ActionMasker(env, mask_fn) # Wrap to enable masking\nmodel = MaskablePPO(MaskableActorCriticPolicy, env, verbose=1)\n\n# Train\nmodel.learn(total_timesteps=1_000_000)\n```\n\n## Configuration\n\nYou can also configure what map to use, how many vps to win, among other variables in the environment,\nwith the `config` keyword argument. See source for details.\n\n```python\nfrom catanatron import Color\nfrom catanatron.players.weighted_random import WeightedRandomPlayer\n\n\ndef my_reward_function(game, p0_color):\n winning_color = game.winning_color()\n if p0_color == winning_color:\n return 100\n elif winning_color is None:\n return 0\n else:\n return -100\n\n# 3-player catan on a \"Mini\" map (7 tiles) until 6 points.\nenv = gym.make(\n \"catanatron_gym:catanatron-v1\",\n config={\n \"map_type\": \"MINI\",\n \"vps_to_win\": 6,\n \"enemies\": [WeightedRandomPlayer(Color.RED), WeightedRandomPlayer(Color.ORANGE)],\n \"reward_function\": my_reward_function,\n \"representation\": \"mixed\",\n },\n)\n```\n",
"bugtrack_url": null,
"license": "",
"summary": "Open AI Gym to play 1v1 Catan against a random bot",
"version": "4.0.0",
"project_urls": {
"Homepage": "https://github.com/bcollazo/catanatron"
},
"split_keywords": [],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "83b9e425bb49a92ef9db8776d5c782ffa1219f1400378874cb965ec3c02f1148",
"md5": "c6c2aa82b549b03654882997442e2a01",
"sha256": "1fba36b0778d3e44317f2594b678f01338ef9c18462ea3e2ad351a8ef981f157"
},
"downloads": -1,
"filename": "catanatron_gym-4.0.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "c6c2aa82b549b03654882997442e2a01",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.6",
"size": 14691,
"upload_time": "2024-03-07T02:43:16",
"upload_time_iso_8601": "2024-03-07T02:43:16.086170Z",
"url": "https://files.pythonhosted.org/packages/83/b9/e425bb49a92ef9db8776d5c782ffa1219f1400378874cb965ec3c02f1148/catanatron_gym-4.0.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "c6f1b72ab3a8a996180238a86ab6bea198c4c38eb52cd9711ce0b28727956455",
"md5": "03f2cd3aefff0a26308b9b76560d52f5",
"sha256": "e2d2afa7a061bacd2d01300929f9a1fb53d0598463c8b63b6023ec429d81a719"
},
"downloads": -1,
"filename": "catanatron_gym-4.0.0.tar.gz",
"has_sig": false,
"md5_digest": "03f2cd3aefff0a26308b9b76560d52f5",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.6",
"size": 14442,
"upload_time": "2024-03-07T02:43:17",
"upload_time_iso_8601": "2024-03-07T02:43:17.207614Z",
"url": "https://files.pythonhosted.org/packages/c6/f1/b72ab3a8a996180238a86ab6bea198c4c38eb52cd9711ce0b28727956455/catanatron_gym-4.0.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2024-03-07 02:43:17",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "bcollazo",
"github_project": "catanatron",
"travis_ci": false,
"coveralls": false,
"github_actions": true,
"requirements": [],
"lcname": "catanatron-gym"
}