TensorFlowでオセロに挑戦
概要
TnsorFlowでオセロをやるために、OpenAiGymのオセロ環境作って見た。
手で確認用のサンプルコード、載せる。
誰かが、TensorFlowで負かせてくれる事を望む。
写真
環境
windows 7 sp1 64bit
anaconda3
tensorflow 1.0
OpenAi Gym 0.5
オセロ環境の概要
observe:
8*8の盤面を0から63までの配列にして返す
0 1 2 3 4 5 6 7
8 9 10 11 12 13 14 15
16 17 18 19 20 21 22 23
24 25 26 27 28 29 30 31
32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47
48 49 50 51 52 53 54 55
56 57 58 59 60 61 62 63
0 なし
1 白
-1 黒
reward:
獲った石の数
gameOver:
石が無くなったか、盤が埋まった。
action:
0から63で指定。
-1なら相手。
確認用のサンプルコード
from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path
class oseroEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 10
}
def __init__(self):
self.viewer = None
self.gridSize = 8
self.nbStates = self.gridSize * self.gridSize
self.state = np.empty(self.nbStates, dtype = np.int8)
self.count = 0
def check(self, put, d):
res = 0
x = put % 8
y = math.floor(put / 8)
if (x == 0 and (d == -9 or d == -1 or d == 7)):
res = 1
if (x == 7 and (d == -7 or d == 1 or d == 9)):
res = 1
if (y == 0 and (d == -9 or d == -8 or d == -7)):
res = 1
if (y == 7 and (d == 7 or d == 8 or d == 9)):
res = 1
i = put + d
if i < 0:
res = 1
if i > 63:
res = 1
return res
def oku(self, put, iro):
res = 0
turn = 1
if iro == 1:
turn = -1
dir = [-9, -8, -7, -1, 1, 7, 8, 9]
if self.state[put] == 0:
for i in range(8):
count = 0
tugi = put
while True:
if self.check(tugi, dir[i]) == 1:
break
count += 1
tugi += dir[i]
if self.state[tugi] != turn:
break
if (count > 1) and (self.state[tugi] == iro):
res = -1
tugi = put
while True:
self.state[tugi] = iro
tugi += dir[i]
if self.state[tugi] != turn:
break
self.count += 1
return res
def sasu(self):
suji = [0, 7, 56, 63, 18, 21, 42, 45, 2, 16, 5, 23, 40, 58, 47, 61, 3, 4, 11, 12, 19, 20, 24, 25, 26, 32, 33, 34, 29, 30, 31, 37, 38, 39, 43, 44, 51, 52, 59, 60, 1, 8, 9, 10, 17, 6, 13, 14, 15, 22, 41, 48, 49, 50, 57, 46, 53, 54, 55, 62]
res = 0
all = 0
iro = -1
turn = 1
dir = [-9, 9, -7, 7, -1, 1, -8, 8]
for j in range(60):
put = suji[j]
if self.state[put] == 0:
for i in range(8):
count = 0;
if self.check(put, dir[i]) == 0:
tugi = put + dir[i]
while True:
if self.state[tugi] == turn:
count += 1
if self.check(tugi, dir[i]) == 1:
break
else:
tugi += dir[i]
else:
break
if (count > 0) and (self.state[tugi] == iro):
all += count;
if all > 0:
res = put
break
return res
def getState(self):
return self.state
def getReward(self):
return self.count
def isGameOver(self):
siro = 0
kuro = 0
end = True
for i in range(64):
if self.state[i] == 0:
end = False
if self.state[i] == 1:
siro = 1
if self.state[i] == -1:
kuro = 1
if siro == 0:
end = True
if kuro == 0:
end = True
return end
def updateState(self, action):
if action < -1:
return
if action > 63:
return
if action == -1:
ai = self.sasu()
self.count = 0
self.oku(ai, -1)
else:
self.count = 0
self.oku(action, 1)
def observe(self):
return self.state
def _reset(self):
self.state = np.zeros(self.nbStates, dtype = np.int8)
self.state[27] = 1
self.state[28] = -1
self.state[35] = -1
self.state[36] = 1
self.count = 0
return self.observe()
def _step(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, {}
def _render(self, mode = 'human', close = False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
self.viewer.draw_polygon([(-2.0, -2.0), (2.0, -2.0), (2.0, 2.0), (-2.0, 2.0)], color = (0, 1, 0))
self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
for i in range(7):
x0 = i * 0.5 - 1.5
y0 = -2.0
x1 = i * 0.5 - 1.5
y1 = 2.0
self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
for i in range(64):
if self.state[i] == 1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (1, 1, 1)).add_attr(transform0)
if self.state[i] == -1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (0, 0, 0)).add_attr(transform0)
return self.viewer.render(return_rgb_array = mode == 'rgb_array')
env = oseroEnv()
def test(env):
obser = env.reset()
env.render()
while True:
i = int(input())
observe, reward, gameOver, info = env.step(i)
print (observe, reward)
env.render()
if gameOver:
print ("game over!")
break
test(env)
from __future__ import print_function
import math
import sys
import gym
import gym.spaces
import numpy as np
from gym import core, spaces
from gym.utils import seeding
import time
import random
import os.path
class oseroEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 10
}
def __init__(self):
self.viewer = None
self.gridSize = 8
self.nbStates = self.gridSize * self.gridSize
self.state = np.empty(self.nbStates, dtype = np.int8)
self.count = 0
def check(self, put, d):
res = 0
x = put % 8
y = math.floor(put / 8)
if (x == 0 and (d == -9 or d == -1 or d == 7)):
res = 1
if (x == 7 and (d == -7 or d == 1 or d == 9)):
res = 1
if (y == 0 and (d == -9 or d == -8 or d == -7)):
res = 1
if (y == 7 and (d == 7 or d == 8 or d == 9)):
res = 1
i = put + d
if i < 0:
res = 1
if i > 63:
res = 1
return res
def oku(self, put, iro):
res = 0
turn = 1
if iro == 1:
turn = -1
dir = [-9, -8, -7, -1, 1, 7, 8, 9]
if self.state[put] == 0:
for i in range(8):
count = 0
tugi = put
while True:
if self.check(tugi, dir[i]) == 1:
break
count += 1
tugi += dir[i]
if self.state[tugi] != turn:
break
if (count > 1) and (self.state[tugi] == iro):
res = -1
tugi = put
while True:
self.state[tugi] = iro
tugi += dir[i]
if self.state[tugi] != turn:
break
self.count += 1
return res
def sasu(self):
suji = [0, 7, 56, 63, 18, 21, 42, 45, 2, 16, 5, 23, 40, 58, 47, 61, 3, 4, 11, 12, 19, 20, 24, 25, 26, 32, 33, 34, 29, 30, 31, 37, 38, 39, 43, 44, 51, 52, 59, 60, 1, 8, 9, 10, 17, 6, 13, 14, 15, 22, 41, 48, 49, 50, 57, 46, 53, 54, 55, 62]
res = 0
all = 0
iro = -1
turn = 1
dir = [-9, 9, -7, 7, -1, 1, -8, 8]
for j in range(60):
put = suji[j]
if self.state[put] == 0:
for i in range(8):
count = 0;
if self.check(put, dir[i]) == 0:
tugi = put + dir[i]
while True:
if self.state[tugi] == turn:
count += 1
if self.check(tugi, dir[i]) == 1:
break
else:
tugi += dir[i]
else:
break
if (count > 0) and (self.state[tugi] == iro):
all += count;
if all > 0:
res = put
break
return res
def getState(self):
return self.state
def getReward(self):
return self.count
def isGameOver(self):
siro = 0
kuro = 0
end = True
for i in range(64):
if self.state[i] == 0:
end = False
if self.state[i] == 1:
siro = 1
if self.state[i] == -1:
kuro = 1
if siro == 0:
end = True
if kuro == 0:
end = True
return end
def updateState(self, action):
if action < -1:
return
if action > 63:
return
if action == -1:
ai = self.sasu()
self.count = 0
self.oku(ai, -1)
else:
self.count = 0
self.oku(action, 1)
def observe(self):
return self.state
def _reset(self):
self.state = np.zeros(self.nbStates, dtype = np.int8)
self.state[27] = 1
self.state[28] = -1
self.state[35] = -1
self.state[36] = 1
self.count = 0
return self.observe()
def _step(self, action):
self.updateState(action)
reward = self.getReward()
gameOver = self.isGameOver()
return self.observe(), reward, gameOver, {}
def _render(self, mode = 'human', close = False):
if close:
if self.viewer is not None:
self.viewer.close()
self.viewer = None
return
from gym.envs.classic_control import rendering
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-2.2, 2.2, -2.2, 2.2)
self.viewer.draw_polygon([(-2.0, -2.0), (2.0, -2.0), (2.0, 2.0), (-2.0, 2.0)], color = (0, 1, 0))
self.viewer.draw_line((-2.0, 2.0), (2.0, 2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, 2.0), (-2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((2.0, 2.0), (2.0, -2.0), color = (0, 0, 0))
self.viewer.draw_line((-2.0, -2.0), (2.0, -2.0), color = (0, 0, 0))
for i in range(7):
x0 = i * 0.5 - 1.5
y0 = -2.0
x1 = i * 0.5 - 1.5
y1 = 2.0
self.viewer.draw_line((x0, y0), (x1, y1), color = (0, 0, 0))
self.viewer.draw_line((y0, x0), (y1, x1), color = (0, 0, 0))
for i in range(64):
if self.state[i] == 1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (1, 1, 1)).add_attr(transform0)
if self.state[i] == -1:
x = (i % 8) * 0.5 - 1.75
y = 1.75 - (math.floor(i / 8)) * 0.5
transform0 = rendering.Transform(translation = (x, y))
self.viewer.draw_circle(0.2, 20, color = (0, 0, 0)).add_attr(transform0)
return self.viewer.render(return_rgb_array = mode == 'rgb_array')
env = oseroEnv()
def test(env):
obser = env.reset()
env.render()
while True:
i = int(input())
observe, reward, gameOver, info = env.step(i)
print (observe, reward)
env.render()
if gameOver:
print ("game over!")
break
test(env)
Author And Source
この問題について(TensorFlowでオセロに挑戦), 我々は、より多くの情報をここで見つけました https://qiita.com/ohisama@github/items/c2708c19c8eb45498ec3著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .