Source code for pokebattle_rl_env.pokebattle_env

from math import exp

import numpy as np
from gym import Env
from gym.envs.registration import EnvSpec
from gym.spaces import Box

from pokebattle_rl_env.showdown_simulator import ShowdownSimulator

TURN_THRESHOLD = 10


def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)


def sigmoid(x):
    return 1 / (1 + exp(-x))


[docs]class PokeBattleEnv(Env):
    """The Pokemon battle Reinforecement Learning environment.

    A subclass of :class:`gym.core.Env`, which is compatible with most Reinforcement Learning frameworks.
    :class:`PokeBattleEnv` uses a :class:`pokebattle_rl_env.battle_simulator.BattleSimulator` to simulate the battles.

    Attributes:
        simulator (:class:`pokebattle_rl_env.battle_simulator.BattleSimulator`): The simulator to run battles in. Uses
            :class:`pokebattle_rl_env.showdown_simulator.ShowdownSimulator` by default.
    """
    def __init__(self, simulator=ShowdownSimulator()):
        self.__version__ = "0.1.0"
        self._spec = EnvSpec('PokeBattleEnv-v0')
        self.simulator = simulator
        num_actions = len(self.simulator.get_available_actions()) + len(self.simulator.get_available_modifiers())
        self.action_space = Box(low=0.0, high=1.0, shape=(num_actions,), dtype=np.float32)
        state_dimensions = len(self.simulator.state.to_array())
        self.observation_space = Box(low=0, high=1000, shape=(state_dimensions,), dtype=np.float32)
        self.reward_range = (-1, 1)
        self.metadata['render.modes'] = ['human']
        self.metadata['semantics.autoreset'] = False

    def get_action(self, action_probs):
        valid_actions = self.simulator.get_available_actions()
        if len(valid_actions) == 0:
            from pickle import dump
            from pokebattle_rl_env.util import generate_token
            with open(generate_token(5), 'wb') as file:
                dump(self.simulator.state, file)
        estimates = []
        for valid_action in valid_actions:
            if valid_action.mode == 'attack':
                action_ix = valid_action.number - 1
            elif valid_action.mode == 'switch':
                action_ix = valid_action.number + 2
            else:
                continue
            estimates.append(action_probs[action_ix])
        estimates = softmax(estimates)
        action = np.random.choice(valid_actions, p=estimates)
        return action

    def get_action_modifier(self, action_probs):
        valid_modifiers = self.simulator.get_available_modifiers()
        modifiers = []
        for valid_modifier in valid_modifiers:
            prob = 0
            if valid_modifier == 'mega':
                prob = action_probs[len(action_probs) - 1]
            prob = sigmoid(prob)
            if np.random.binomial(1, prob):
                modifiers.append(valid_modifier)
        return modifiers

    def compute_reward(self):
        if not (self.simulator.state.forfeited and self.simulator.state.turn < TURN_THRESHOLD):
            if self.simulator.state.state == 'win':
                return 1
            elif self.simulator.state.state == 'loss':
                return -1
        return 0

[docs]    def step(self, action):
        game_action = self.get_action(action)
        modifiers = self.get_action_modifier(action)
        self.simulator.act(game_action, modifiers)
        observation = self.simulator.state.to_array()
        reward = self.compute_reward()  # ToDo: Maybe negative reward for assigning probability to invalid action
        done = self.simulator.state.state in ['win', 'loss', 'tie']
        return observation, reward, done, None

[docs]    def reset(self):
        self.simulator.reset()
        return self.simulator.state.to_array()

[docs]    def render(self, mode='human'):
        if mode == 'rgb_array':
            raise NotImplementedError('rendering rgb_arrays not yet implemented')
        if mode is 'human':
            self.simulator.render()
        else:
            super().render(mode=mode)

[docs]    def close(self):
        self.simulator.close()

[docs]    def seed(self, seed=None):
        pass
Source code for pokebattle_rl_env.pokebattle_env

Pokemon battle RL environment

Navigation

Related Topics