policy_player.py y = self.model(x) y is the value of the output layer before passing through the activation function.
logits = y.data[0] Assign the value of the output layer before passing through the activation function to a variable called logits. The word logit means the value of the output layer before passing through the activation function.
Meaning of [0] An example of y.data [[-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]] y.data[0] [-4.137782 0.12063725 -4.907426 ... -5.663455 -6.104148 -7.8398824 ]
When generating x, features are enclosed in [] and then made into np.array. x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32))) So is y.data in the form of [[]]? What is the meaning of []?
The number of elements in y.data [0] is (20 + 7) * 9 * 9 = 2187 20 is the direction of movement (UP, DOWN, ...), 7 is the type of piece you have. The number of moves including all legal and illegal moves.
In the value network that appears in Chapter 10, x is generated without enclosing it in []. x = Variable(cuda.to_gpu(np.array(features, dtype=np.float32))) In Chapter 10, we filtered by legal hand first, and it's a little different. It is confusing when compared simply.
probabilities = F.softmax(y).data[0] The probabilities are [1.3974859e-04 9.8799672e-03 6.4728469e-05 ... 3.0391777e-05 1.9559853e-05 3.4478303e-06]
Make it run on both iMac and Colab.
#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# google colab  :random
# iMac          : xxxxxxxx
# Lenovo        : yyyyyyyy
# env
# 0: google colab
# 1: iMac (no GPU)
# 2: Lenovo (no GPU)
# gpu_en
# 0: disable
# 1: enable
if host == 'xxxxxxxx':
    env = 1
    gpu_en = 0
elif host == 'yyyyyyyy':
    env = 2
    gpu_en = 0
else:
    env = 0
    gpu_en = 1
if gpu_en == 1:
    from chainer import cuda, Variable
    def __init__(self):
        super().__init__()
        if env == 0:
            self.modelfile = '/content/drive/My Drive/・ ・ ・/python-dlshogi/model/model_policy'
        elif env == 1:
            self.modelfile = r'/Users/・ ・ ・/python-dlshogi/model/model_policy' #Measures created by learning Network model
        elif env == 2:
            self.modelfile = r"C:\Users\・ ・ ・\python-dlshogi\model\model_policy"
        self.model = None
            if gpu_en == 1:
                self.model.to_gpu()
        if gpu_en == 1:
            x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32)))
        elif gpu_en == 0:
            x = np.array([features], dtype=np.float32)
            if gpu_en == 1:
                logits = cuda.to_cpu(y.data)[0]
                probabilities = cuda.to_cpu(F.softmax(y).data)[0]
            elif gpu_en == 0:
                logits = y.data[0]
                probabilities = F.softmax(y).data[0]
Try to choose between the Greedy strategy and the Softmax strategy. It was difficult to understand how to write a book, so I rewrote it.
#strategy
# 'greedy':Greedy Strategy
# 'boltzmann':Softmax strategy
algorithm ='boltzmann'
        if algorithm == 'greedy':
            #(1) Select the move with the highest probability (greedy strategy) Simply return the element with the highest probability.
            selected_index = greedy(legal_logits)
        elif algorithm =='boltzmann':
            #(2) Choose a hand according to the probability (Softmax strategy) Randomly return elements with a high probability.
            selected_index = boltzmann(np.array(legal_logits, dtype=np.float32), 0.5)
python-dlshogi\pydlshogi\player\policy_player.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#Environmental setting
#-----------------------------
import socket
host = socket.gethostname()
#Get an IP address
# google colab  :random
# iMac          : xxxxxxxx
# Lenovo        : yyyyyyyy
# env
# 0: google colab
# 1: iMac (no GPU)
# 2: Lenovo (no GPU)
# gpu_en
# 0: disable
# 1: enable
if host == 'xxxxxxxx':
    env = 1
    gpu_en = 0
elif host == 'yyyyyyyy':
    env = 2
    gpu_en = 0
else:
    env = 0
    gpu_en = 1
#strategy
# 'greedy':Greedy Strategy
# 'boltzmann':Softmax strategy
algorithm ='boltzmann'
#-----------------------------
import numpy as np
import chainer
from chainer import serializers
import chainer.functions as F
if gpu_en == 1:
    from chainer import cuda, Variable
import shogi
from pydlshogi.common import *
from pydlshogi.features import *
from pydlshogi.network.policy import *
from pydlshogi.player.base_player import *
def greedy(logits): #Returns the index of the element with the maximum value among the elements of the list specified in the argument
                    #In a neural network, logits are the values before passing through the activation function.
    return logits.index(max(logits)) #list.index returns the number element of the list itself that the value specified in the argument is.
def boltzmann(logits, temperature):
    logits /= temperature # a /=b is a= a /Meaning of b
    logits -= logits.max() # a -=b is a= a -The meaning of b. It will be a negative value. The maximum value is 0.
    probabilities = np.exp(logits) # x =<0 exp function
    probabilities /= probabilities.sum()
    return np.random.choice(len(logits), p=probabilities) # choice(i, p=b)Is 0 to i-Randomly returns numbers up to 1 with a probability of b
class PolicyPlayer(BasePlayer):
    def __init__(self):
        super().__init__()
        if env == 0:
            self.modelfile = '/content/drive/My Drive/・ ・ ・/python-dlshogi/model/model_policy'
        elif env == 1:
            self.modelfile = r'/Users/・ ・ ・/python-dlshogi/model/model_policy' #Measures created by learning Network model
        elif env == 2:
            self.modelfile = r"C:\Users\・ ・ ・\python-dlshogi\model\model_policy"
        self.model = None
    def usi(self): #GUI software side: Send USI command after startup. USI side: Returns id (and option) and usiok.
        print('id name policy_player')
        print('option name modelfile type string default ' + self.modelfile)
        print('usiok')
    def setoption(self, option):
        if option[1] == 'modelfile':
            self.modelfile = option[3]
    def isready(self): #GUI software side: Send is ready command before the game starts. USI side: Initializes and returns ready ok.
        if self.model is None:
            self.model = PolicyNetwork()
            if gpu_en == 1:
                self.model.to_gpu()
        serializers.load_npz(self.modelfile, self.model)
        print('readyok')
    def go(self):
        if self.board.is_game_over():
            print('bestmove resign')
            return
        features = make_input_features_from_board(self.board)
        if gpu_en == 1:
            x = Variable(cuda.to_gpu(np.array([features], dtype=np.float32)))
        elif gpu_en == 0:
            x = np.array([features], dtype=np.float32)
        with chainer.no_backprop_mode():
            y = self.model(x)
            if gpu_en == 1:
                logits = cuda.to_cpu(y.data)[0]
                probabilities = cuda.to_cpu(F.softmax(y).data)[0]
            elif gpu_en == 0:
                logits = y.data[0] #Assign the value before passing through the activation function to the variable. Take out the first element as shown below.
                                    # y.data is[[-4.137782    0.12063725 -4.907426   ... -5.663455   -6.104148  -7.8398824 ]]
                                    # y.data[0]Is[-4.137782    0.12063725 -4.907426   ... -5.663455   -6.104148  -7.8398824 ]
                                    #By the way, y.data[0]The number of elements of(20 + 7) * 9 * 9 = 2187
                probabilities = F.softmax(y).data[0]
                                    #probabilities[1.3974859e-04 9.8799672e-03 6.4728469e-05 ... 3.0391777e-05 1.9559853e-05 3.4478303e-06]
        #About all legal hands
        legal_moves = []
        legal_logits = []
        for move in self.board.legal_moves:
            #Convert to label
            label = make_output_label(move, self.board.turn) #Direction of movement+Substitute 27 of the possession piece and 9x9 of the destination to label
            #Probability of legal move and its move(logits)Store
            legal_moves.append(move)
            legal_logits.append(logits[label]) #label represents the index of the move. Legal the probability of that move_Assign to logits.
            #Show probability
            print('info string {:5} : {:.5f}'.format(move.usi(), probabilities[label]))
        if algorithm == 'greedy':
            #(1) Select the move with the highest probability (greedy strategy) Simply return the element with the highest probability.
            selected_index = greedy(legal_logits)
        elif algorithm =='boltzmann':
            #(2) Choose a hand according to the probability (Softmax strategy) Randomly return elements with a high probability.
            selected_index = boltzmann(np.array(legal_logits, dtype=np.float32), 0.5)
        bestmove = legal_moves[selected_index]
        print('bestmove', bestmove.usi())
2g2f (2 6 steps) 0.48551
7g7f (7 six steps) 0.40747
I pointed to 2 six steps. There seems to be no problem.

This time I pointed to 76 steps. It seems that the softmax strategy randomly points to a hand with a high probability. No problem.


Recommended Posts