TensorFlowでTicTacToeに挑戦


概要

TnsorFlowでTicTacToeをやるために、OpenAiGymのTicTacToe環境作って見た。
手で確認用のサンプルコード、載せる。
誰かが、TensorFlowで負かせてくれる事を望む。

写真

環境

windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5

TicTacToe環境の概要

observe:

3*3の盤面を0から9までの配列にして返す

0 1 2
3 4 5
6 7 8

0 なし
1 白
-1 黒

reward:

勝ったら1
何も0
負けたら-1

gameOver:

盤が埋まったか、3つ並んだか。

action:

0から8で指定。
-1なら相手。

確認用のサンプルコード

from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path

class toeEnv(gym.Env):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second' : 10
    }
    def __init__(self):
        self.viewer = None
        self.state = np.empty(9, dtype = np.int8)
    def check(self, iro):
        res = 0
        if (self.state[0] == iro and self.state[1] == iro and self.state[2] == iro):
            res = -1
        if (self.state[3] == iro and self.state[4] == iro and self.state[5] == iro):
            res = -1
        if (self.state[6] == iro and self.state[7] == iro and self.state[8] == iro):
            res = -1
        if (self.state[0] == iro and self.state[3] == iro and self.state[6] == iro):
            res = -1
        if (self.state[1] == iro and self.state[4] == iro and self.state[7] == iro):
            res = -1
        if (self.state[2] == iro and self.state[5] == iro and self.state[8] == iro):
            res = -1
        if (self.state[0] == iro and self.state[4] == iro and self.state[8] == iro):
            res = -1
        if (self.state[2] == iro and self.state[4] == iro and self.state[6] == iro):
            res = -1
        return res
    def oku(self, put, iro):
        res = 0
        if self.state[put] == 0:
            self.state[put] = iro
            res = -1
        return res
    def sasu(self):
        if (self.state[0] == 1 and self.state[1] == 1 and self.state[2] == 0):
            return 2
        if (self.state[3] == 1 and self.state[4] == 1 and self.state[5] == 0):
            return 5
        if (self.state[6] == 1 and self.state[7] == 1 and self.state[8] == 0):
            return 8
        if (self.state[0] == 1 and self.state[2] == 1 and self.state[1] == 0):
            return 1
        if (self.state[3] == 1 and self.state[5] == 1 and self.state[4] == 0):
            return 4
        if (self.state[6] == 1 and self.state[8] == 1 and self.state[7] == 0):
            return 7
        if (self.state[1] == 1 and self.state[2] == 1 and self.state[0] == 0):
            return 0
        if (self.state[4] == 1 and self.state[5] == 1 and self.state[3] == 0):
            return 3
        if (self.state[7] == 1 and self.state[8] == 1 and self.state[6] == 0):
            return 6
        if (self.state[0] == 1 and self.state[3] == 1 and self.state[6] == 0):
            return 6
        if (self.state[1] == 1 and self.state[4] == 1 and self.state[7] == 0):
            return 7
        if (self.state[2] == 1 and self.state[5] == 1 and self.state[8] == 0):
            return 8
        if (self.state[0] == 1 and self.state[6] == 1 and self.state[3] == 0):
            return 3
        if (self.state[1] == 1 and self.state[7] == 1 and self.state[4] == 0):
            return 4
        if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
            return 5
        if (self.state[3] == 1 and self.state[6] == 1 and self.state[0] == 0):
            return 0
        if (self.state[4] == 1 and self.state[7] == 1 and self.state[1] == 0):
            return 1
        if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
            return 5
        if (self.state[0] == 1 and self.state[4] == 1 and self.state[8] == 0):
            return 8
        if (self.state[0] == 1 and self.state[8] == 1 and self.state[4] == 0):
            return 4
        if (self.state[4] == 1 and self.state[8] == 1 and self.state[0] == 0):
            return 0
        if (self.state[2] == 1 and self.state[4] == 1 and self.state[6] == 0):
            return 6
        if (self.state[2] == 1 and self.state[6] == 1 and self.state[4] == 0):
            return 4
        if (self.state[4] == 1 and self.state[6] == 1 and self.state[2] == 0):
            return 2
        suji = [4, 1, 3, 5, 7, 0, 2, 6, 8]
        res = -1
        for j in range(9):
            put = suji[j]
            if self.state[put] == 0:
                return put
        return res
    def getState(self):
        return self.state
    def getReward(self):
        res = 0
        if (self.check(1) == -1):
            return 1
        if (self.check(-1) == -1):
            return -1
        return res
    def isGameOver(self):
        end = True
        for i in range(9):
            if self.state[i] == 0:
                end = False
        if (self.check(1) == -1):
            return True
        if (self.check(-1) == -1):
            return True
        return end
    def updateState(self, action):
        if action < -1:
            return
        if action > 8:
            return
        if action == -1:
            ai = self.sasu()
            self.oku(ai, -1)
        else:
            self.oku(action, 1)
    def observe(self):
        return self.state
    def _reset(self):
        self.state = np.zeros(9, dtype = np.int8)
        return self.observe()
    def _step(self, action):
        self.updateState(action)
        reward = self.getReward()
        gameOver = self.isGameOver()
        return self.observe(), reward, gameOver, {}
    def _render(self, mode = 'human', close = False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        from gym.envs.classic_control import rendering
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
        self.viewer.draw_polygon([(-2.0, -2.0), (1.0, -2.0), (1.0, 1.0), (-2.0, 1.0)], color = (0, 1, 0))
        self.viewer.draw_line((-2.0, 1.0), (1.0, 1.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, 1.0), (-2.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((1.0, 1.0), (1.0, -2.0), color = (0, 0, 0))
        self.viewer.draw_line((-2.0, -2.0), (1.0, -2.0), color = (0, 0, 0))
        for i in range(2):
            x0 = i * 1.0 - 1.0
            y0 = -2.0
            x1 = i * 1.0 - 1.0
            y1 = 1.0
            self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
            self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
        for i in range(9):
            if self.state[i] == 1:
                x = (i % 3) * 1.0 - 1.5
                y = 0.5 - (math.floor(i / 3)) * 1.0
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.4, 20, color = (1, 1, 1)).add_attr(transform0)
            if self.state[i] == -1:
                x = (i % 3) * 1.0 - 1.5
                y = 0.5 - (math.floor(i / 3)) * 1.0
                transform0 = rendering.Transform(translation = (x, y))
                self.viewer.draw_circle(0.4, 20, color = (0, 0, 0)).add_attr(transform0)
        return self.viewer.render(return_rgb_array = mode == 'rgb_array')

env = toeEnv()

def test(env):
    obser = env.reset()
    env.render()
    while True:
        i = int(input())
        observe, reward, gameOver, info = env.step(i)
        print (observe, reward, gameOver)
        env.render()
        if gameOver:
            print ("game over!")
            if reward == 1:
                print ("you win!!")
            elif reward == -1:
                print ("win is ai")
            else:
                print ("draw")
            break

test(env)