TensorFlowでTicTacToeに挑戦
概要
TnsorFlowでTicTacToeをやるために、OpenAiGymのTicTacToe環境作って見た。
手で確認用のサンプルコード、載せる。
誰かが、TensorFlowで負かせてくれる事を望む。
写真
環境
windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5
TicTacToe環境の概要
observe:
3*3の盤面を0から9までの配列にして返す
0 1 2
3 4 5
6 7 8
0 なし
1 白
-1 黒
reward:
勝ったら1
何も0
負けたら-1
gameOver:
盤が埋まったか、3つ並んだか。
action:
0から8で指定。
-1なら相手。
確認用のサンプルコード
from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path
class toeEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 10
}
def __init__(self):
self.viewer = None
self.state = np.empty(9, dtype = np.int8)
def check(self, iro):
res = 0
if (self.state[0] == iro and self.state[1] == iro and self.state[2] == iro):
res = -1
if (self.state[3] == iro and self.state[4] == iro and self.state[5] == iro):
res = -1
if (self.state[6] == iro and self.state[7] == iro and self.state[8] == iro):
res = -1
if (self.state[0] == iro and self.state[3] == iro and self.state[6] == iro):
res = -1
if (self.state[1] == iro and self.state[4] == iro and self.state[7] == iro):
res = -1
if (self.state[2] == iro and self.state[5] == iro and self.state[8] == iro):
res = -1
if (self.state[0] == iro and self.state[4] == iro and self.state[8] == iro):
res = -1
if (self.state[2] == iro and self.state[4] == iro and self.state[6] == iro):
res = -1
return res
def oku(self, put, iro):
res = 0
if self.state[put] == 0:
self.state[put] = iro
res = -1
return res
def sasu(self):
if (self.state[0] == 1 and self.state[1] == 1 and self.state[2] == 0):
return 2
if (self.state[3] == 1 and self.state[4] == 1 and self.state[5] == 0):
return 5
if (self.state[6] == 1 and self.state[7] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[2] == 1 and self.state[1] == 0):
return 1
if (self.state[3] == 1 and self.state[5] == 1 and self.state[4] == 0):
return 4
if (self.state[6] == 1 and self.state[8] == 1 and self.state[7] == 0):
return 7
if (self.state[1] == 1 and self.state[2] == 1 and self.state[0] == 0):
return 0
if (self.state[4] == 1 and self.state[5] == 1 and self.state[3] == 0):
return 3
if (self.state[7] == 1 and self.state[8] == 1 and self.state[6] == 0):
return 6
if (self.state[0] == 1 and self.state[3] == 1 and self.state[6] == 0):
return 6
if (self.state[1] == 1 and self.state[4] == 1 and self.state[7] == 0):
return 7
if (self.state[2] == 1 and self.state[5] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[6] == 1 and self.state[3] == 0):
return 3
if (self.state[1] == 1 and self.state[7] == 1 and self.state[4] == 0):
return 4
if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
return 5
if (self.state[3] == 1 and self.state[6] == 1 and self.state[0] == 0):
return 0
if (self.state[4] == 1 and self.state[7] == 1 and self.state[1] == 0):
return 1
if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
return 5
if (self.state[0] == 1 and self.state[4] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[8] == 1 and self.state[4] == 0):
return 4
if (self.state[4] == 1 and self.state[8] == 1 and self.state[0] == 0):
return 0
if (self.state[2] == 1 and self.state[4] == 1 and self.state[6] == 0):
return 6
if (self.state[2] == 1 and self.state[6] == 1 and self.state[4] == 0):
return 4
if (self.state[4] == 1 and self.state[6] == 1 and self.state[2] == 0):
return 2
suji = [4, 1, 3, 5, 7, 0, 2, 6, 8]
res = -1
for j in range(9):
put = suji[j]
if self.state[put] == 0:
return put
return res
def getState(self):
return self.state
def getReward(self):
res = 0
if (self.check(1) == -1):
return 1
if (self.check(-1) == -1):
return -1
return res
def isGameOver(self):
end = True
for i in range(9):
if self.state[i] == 0:
end = False
if (self.check(1) == -1):
return True
if (self.check(-1) == -1):
return True
return end
def updateState(self, action):
if action < -1:
return
if action > 8:
return
if action == -1:
ai = self.sasu()
self.oku(ai, -1)
else:
self.oku(action, 1)
def observe(self):
return self.state
def _reset(self):
self.state = np.zeros(9, dtype = np.int8)
return self.observe()
def _step(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, {}
def _render(self, mode = 'human', close = False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
self.viewer.draw_polygon([(-2.0, -2.0), (1.0, -2.0), (1.0, 1.0), (-2.0, 1.0)], color = (0, 1, 0))
self.viewer.draw_line((-2.0, 1.0), (1.0, 1.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, 1.0), (-2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((1.0, 1.0), (1.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, -2.0), (1.0, -2.0), color = (0, 0, 0))
for i in range(2):
x0 = i * 1.0 - 1.0
y0 = -2.0
x1 = i * 1.0 - 1.0
y1 = 1.0
self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
for i in range(9):
if self.state[i] == 1:
x = (i % 3) * 1.0 - 1.5
y = 0.5 - (math.floor(i / 3)) * 1.0
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.4, 20, color = (1, 1, 1)).add_attr(transform0)
if self.state[i] == -1:
x = (i % 3) * 1.0 - 1.5
y = 0.5 - (math.floor(i / 3)) * 1.0
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.4, 20, color = (0, 0, 0)).add_attr(transform0)
return self.viewer.render(return_rgb_array = mode == 'rgb_array')
env = toeEnv()
def test(env):
obser = env.reset()
env.render()
while True:
i = int(input())
observe, reward, gameOver, info = env.step(i)
print (observe, reward, gameOver)
env.render()
if gameOver:
print ("game over!")
if reward == 1:
print ("you win!!")
elif reward == -1:
print ("win is ai")
else:
print ("draw")
break
test(env)
from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path
class toeEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 10
}
def __init__(self):
self.viewer = None
self.state = np.empty(9, dtype = np.int8)
def check(self, iro):
res = 0
if (self.state[0] == iro and self.state[1] == iro and self.state[2] == iro):
res = -1
if (self.state[3] == iro and self.state[4] == iro and self.state[5] == iro):
res = -1
if (self.state[6] == iro and self.state[7] == iro and self.state[8] == iro):
res = -1
if (self.state[0] == iro and self.state[3] == iro and self.state[6] == iro):
res = -1
if (self.state[1] == iro and self.state[4] == iro and self.state[7] == iro):
res = -1
if (self.state[2] == iro and self.state[5] == iro and self.state[8] == iro):
res = -1
if (self.state[0] == iro and self.state[4] == iro and self.state[8] == iro):
res = -1
if (self.state[2] == iro and self.state[4] == iro and self.state[6] == iro):
res = -1
return res
def oku(self, put, iro):
res = 0
if self.state[put] == 0:
self.state[put] = iro
res = -1
return res
def sasu(self):
if (self.state[0] == 1 and self.state[1] == 1 and self.state[2] == 0):
return 2
if (self.state[3] == 1 and self.state[4] == 1 and self.state[5] == 0):
return 5
if (self.state[6] == 1 and self.state[7] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[2] == 1 and self.state[1] == 0):
return 1
if (self.state[3] == 1 and self.state[5] == 1 and self.state[4] == 0):
return 4
if (self.state[6] == 1 and self.state[8] == 1 and self.state[7] == 0):
return 7
if (self.state[1] == 1 and self.state[2] == 1 and self.state[0] == 0):
return 0
if (self.state[4] == 1 and self.state[5] == 1 and self.state[3] == 0):
return 3
if (self.state[7] == 1 and self.state[8] == 1 and self.state[6] == 0):
return 6
if (self.state[0] == 1 and self.state[3] == 1 and self.state[6] == 0):
return 6
if (self.state[1] == 1 and self.state[4] == 1 and self.state[7] == 0):
return 7
if (self.state[2] == 1 and self.state[5] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[6] == 1 and self.state[3] == 0):
return 3
if (self.state[1] == 1 and self.state[7] == 1 and self.state[4] == 0):
return 4
if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
return 5
if (self.state[3] == 1 and self.state[6] == 1 and self.state[0] == 0):
return 0
if (self.state[4] == 1 and self.state[7] == 1 and self.state[1] == 0):
return 1
if (self.state[2] == 1 and self.state[8] == 1 and self.state[5] == 0):
return 5
if (self.state[0] == 1 and self.state[4] == 1 and self.state[8] == 0):
return 8
if (self.state[0] == 1 and self.state[8] == 1 and self.state[4] == 0):
return 4
if (self.state[4] == 1 and self.state[8] == 1 and self.state[0] == 0):
return 0
if (self.state[2] == 1 and self.state[4] == 1 and self.state[6] == 0):
return 6
if (self.state[2] == 1 and self.state[6] == 1 and self.state[4] == 0):
return 4
if (self.state[4] == 1 and self.state[6] == 1 and self.state[2] == 0):
return 2
suji = [4, 1, 3, 5, 7, 0, 2, 6, 8]
res = -1
for j in range(9):
put = suji[j]
if self.state[put] == 0:
return put
return res
def getState(self):
return self.state
def getReward(self):
res = 0
if (self.check(1) == -1):
return 1
if (self.check(-1) == -1):
return -1
return res
def isGameOver(self):
end = True
for i in range(9):
if self.state[i] == 0:
end = False
if (self.check(1) == -1):
return True
if (self.check(-1) == -1):
return True
return end
def updateState(self, action):
if action < -1:
return
if action > 8:
return
if action == -1:
ai = self.sasu()
self.oku(ai, -1)
else:
self.oku(action, 1)
def observe(self):
return self.state
def _reset(self):
self.state = np.zeros(9, dtype = np.int8)
return self.observe()
def _step(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, {}
def _render(self, mode = 'human', close = False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
self.viewer.draw_polygon([(-2.0, -2.0), (1.0, -2.0), (1.0, 1.0), (-2.0, 1.0)], color = (0, 1, 0))
self.viewer.draw_line((-2.0, 1.0), (1.0, 1.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, 1.0), (-2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((1.0, 1.0), (1.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, -2.0), (1.0, -2.0), color = (0, 0, 0))
for i in range(2):
x0 = i * 1.0 - 1.0
y0 = -2.0
x1 = i * 1.0 - 1.0
y1 = 1.0
self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
for i in range(9):
if self.state[i] == 1:
x = (i % 3) * 1.0 - 1.5
y = 0.5 - (math.floor(i / 3)) * 1.0
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.4, 20, color = (1, 1, 1)).add_attr(transform0)
if self.state[i] == -1:
x = (i % 3) * 1.0 - 1.5
y = 0.5 - (math.floor(i / 3)) * 1.0
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.4, 20, color = (0, 0, 0)).add_attr(transform0)
return self.viewer.render(return_rgb_array = mode == 'rgb_array')
env = toeEnv()
def test(env):
obser = env.reset()
env.render()
while True:
i = int(input())
observe, reward, gameOver, info = env.step(i)
print (observe, reward, gameOver)
env.render()
if gameOver:
print ("game over!")
if reward == 1:
print ("you win!!")
elif reward == -1:
print ("win is ai")
else:
print ("draw")
break
test(env)
Author And Source
この問題について(TensorFlowでTicTacToeに挑戦), 我々は、より多くの情報をここで見つけました https://qiita.com/ohisama@github/items/f58ee96eab2de44ba1a6著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .