windowsでTensorFlow その10

3597 ワード

強化学習 TensorFlow1.0 OpenAIGym 強化学習テキストリンク

概要

windowsのTensorFlowの環境で、OpenAiやってみた。
新しい環境、作って見た。

写真

環境
windows 7 sp1 64bit
anaconda3
tensorflow 1.0
pyqt5
OpenAi Gym 0.5

サンプルコード

落ちてくる果物を籠で受け取る。
フルーツバスケット

class FBEnvironment(core.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 1
    }
    def __init__(self):
        self.viewer = None
        self.state = None
        self.action_space = spaces.Discrete(3)
        high = np.array([1.0, 1.0])
        self.observation_space = gym.spaces.Box(low = -high, high = high)
        self._seed()
        self.x = 0
        self.y = 0
        self.z = 2
    def _get_ob(self):
        ban = np.zeros((10, 10))
        ban[self.x, self.y] = 1
        ban[9, self.z - 1] = 1
        ban[9, self.z] = 1
        ban[9, self.z + 1] = 1
        return np.reshape(ban, (-1, 100))
    def _seed(self, seed = None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    def _terminal(self):
        a = False
        if (self.y == 9):
            if (abs(self.x - self.z) <= 1):
                a = False
            else:
                a = True
        return a
    def _reset(self):
        self.x = random.randrange(0, 9)
        self.y = 0
        self.z = 4
        return self._get_ob()
    def _step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        if (action == 1):
            acton = -1
        elif (action == 2):
            acton = 1
        else:
            acton = 0
        self.z = min(max(1, self.z + acton), 8)
        self.y += 1
        terminal = self._terminal()
        reward = 0.0
        if (self.y == 9):
            if (abs(self.x - self.z) <= 1):
                reward = 1.0
                self.x = random.randrange(0, 9)
                self.y = 0
            else:
                reward = -1.0
        return (self._get_ob(), reward, terminal, {})
    def _render(self, mode = 'human', close = False):
        time.sleep(0.2)
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.5, 2.5, -2.5, 2.5)
        x = -1.8 + self.x * 0.4
        y = 2.0 - self.y * 0.4
        z = -1.8 + self.z * 0.4
        transform0 = rendering.Transform(translation = (x, y))
        transform1 = rendering.Transform(translation = (z, -1.8))
        self.viewer.draw_circle(0.2, 20, color = (1, 1, 0)).add_attr(transform0)
        self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_polygon([(-0.6, -0.2), (0.6, -0.2), (0.6, 0.2), (-0.6, 0.2)], color = (0, 1, 0)).add_attr(transform1)
        return self.viewer.render(return_rgb_array = mode == 'rgb_array')

Author And Source

この問題について(windowsでTensorFlow その10), 我々は、より多くの情報をここで見つけました https://qiita.com/ohisama@github/items/3acd7c49decccc7721f2

著者帰属：元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。

Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .