作者 主題: 記得電影裡的終結者機器人暴君嗎?現在用TensorFlow應該可以實現  (閱讀 7250 次)

0 會員 與 1 訪客 正在閱讀本文。

小徒兒

  • 鑽研的研究生
  • *****
  • 文章數: 622
    • 檢視個人資料
讓我們來玩吧!用深度強化學習方法與機器學習實現終結者機器暴君開發

使用TensorFlow開源軟體庫,用於理解任務的機器學習
深度價值網路+深度決策網路+Monte Carlo Tree+GPU = TERMINATOR
Let’s use Google's Deep Learning TensorFlow Tool to develop Real Terminator
Within Linear Regression to Reinforcement Learning

"""用 deepChem.rl.environment 來教機器人玩井字遊戲 """
import numpy as np
import copy
import random
import deepchem


class TicTacToeEnvironment(deepchem.rl.Environment):
X = np.array([1.0, 0.0])
O = np.array([0.0, 1.0])
EMPTY = np.array([0.0, 0.0])

ILLEGAL_MOVE_PENALTY = -3.0
LOSS_PENALTY = -3.0
NOT_LOSS = 0.1
DRAW_REWARD = 5.0
WIN_REWARD = 10.0

def __init__(self):
super(TicTacToeEnvironment, self).__init__([(3, 3, 2)], 9)
self.reset()

def reset(self):
self._terminated = False
self._state = [np.zeros(shape=(3, 3, 2), dtype=np.float32)]

# Randomize who goes first
if random.randint(0, 1) == 1:
move = self.get_O_move()
self._state[0][move[0]][move[1]] = TicTacToeEnvironment.O

def step(self, action):
self._state = copy.deepcopy(self._state)
row = action // 3
col = action % 3

# Illegal move -- the square is not empty
if not np.all(self._state[0][row][col] == TicTacToeEnvironment.EMPTY):
self._terminated = True
return TicTacToeEnvironment.ILLEGAL_MOVE_PENALTY

# Move X
self._state[0][row][col] = TicTacToeEnvironment.X

# Did X Win
if self.check_winner(TicTacToeEnvironment.X):
self._terminated = True
return TicTacToeEnvironment.WIN_REWARD

if self.game_over():
self._terminated = True
return TicTacToeEnvironment.DRAW_REWARD

move = self.get_O_move()
self._state[0][move[0]][move[1]] = TicTacToeEnvironment.O

# Did O Win
if self.check_winner(TicTacToeEnvironment.O):
self._terminated = True
return TicTacToeEnvironment.LOSS_PENALTY

if self.game_over():
self._terminated = True
return TicTacToeEnvironment.DRAW_REWARD
return TicTacToeEnvironment.NOT_LOSS

def get_O_move(self):
empty_squares = []
for row in range(3):
for col in range(3):
if np.all(self._state[0][row][col] == TicTacToeEnvironment.EMPTY):
empty_squares.append((row, col))
return random.choice(empty_squares)

def check_winner(self, player):
for i in range(3):
row = np.sum(self._state[0][:], axis=0)
if np.all(row == player * 3):
return True
col = np.sum(self._state[0][:], axis=0)
if np.all(col == player * 3):
return True

diag1 = self._state[0][0][0] + self._state[0][1][1] + self._state[0][2][2]
if np.all(diag1 == player * 3):
return True
diag2 = self._state[0][0][2] + self._state[0][1][1] + self._state[0][2][0]
if np.all(diag2 == player * 3):
return True
return False

def game_over(self):
for i in range(3):
for j in range(3):
if np.all(self._state[0][j] == TicTacToeEnvironment.EMPTY):
return False
return True

def display(self):
state = self._state[0]
s = ""
for row in range(3):
for col in range(3):
if np.all(state[row][col] == TicTacToeEnvironment.EMPTY):
s += "_"
if np.all(state[row][col] == TicTacToeEnvironment.X):
s += "X"
if np.all(state[row][col] == TicTacToeEnvironment.O):
s += "O"
s += "\n"
return s


"""用 deepChem.rl.GymEnvironment 來教機器人玩乒乓球 """import deepchem as dc
import numpy as np

class PongEnv(dc.rl.GymEnvironment):
def __init__(self):
super(PongEnv, self).__init__('Pong-v0')
self._state_shape = (80, 80)

@property
def state(self):
# Crop everything outside the play area, reduce the image size,
# and convert it to black and white.
cropped = np.array(self._state)[34:194, :, :]
reduced = cropped[0:-1:2, 0:-1:2]
grayscale = np.sum(reduced, axis=2)
bw = np.zeros(grayscale.shape)
bw[grayscale != 233] = 1
return bw

def __deepcopy__(self, memo):
return PongEnv()

env = PongEnv()