diff --git a/docker/requirements.txt b/docker/requirements.txt index 3d1ba23..41cc91f 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -3,3 +3,9 @@ tqdm python-socketio eventlet python-socketio[client] +matplotlib +seaborn +torch +pytorch_lightning +tensorboard +gym \ No newline at end of file diff --git a/python/custom_ai.py b/python/custom_ai.py new file mode 100644 index 0000000..2ddd2d7 --- /dev/null +++ b/python/custom_ai.py @@ -0,0 +1,204 @@ +import glob +import os + +import numpy as np +import pytorch_lightning as pl +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.data +from mjx import Action, Observation, State +from pytorch_lightning import Trainer +from torch import Tensor, nn, optim, utils +from torch.utils.data import DataLoader, TensorDataset + + +def make_dataset(): + dir_path = "json" + batch_size = 10 # バッチサイズの設定 + data_set = [] + + files = glob.glob("./datas/*") + files = files[:1300] + num_files = len(files) + + for i in range(0, num_files, batch_size): + batch_files = files[i:i+batch_size] + + batch_data = [] + for file in batch_files: + with open(file) as f: + lines = f.readlines() + for line in lines: + state = State(line) + for cpp_obs, cpp_act in state._cpp_obj.past_decisions(): + obs = Observation._from_cpp_obj(cpp_obs) + # feature = obs.to_features(feature_name="mjx-large-v0").ravel() + feature = obs.to_features(feature_name="mjx-small-v0").ravel() + + action = Action._from_cpp_obj(cpp_act) + action_idx = action.to_idx() + data = np.append(feature, action_idx) + batch_data.append(data) + + batch_data = np.array(batch_data) + data_set.append(batch_data) + + data_set = np.concatenate(data_set) + np.save("dataset/small-1300", data_set) + + +def int_to_binary_vector(num): + binary = bin(num)[2:].zfill(8) # 8桁の2進数表現を取得 + vector = np.array(list(binary)).astype(int) # 1次元配列に変換 + return vector + + +def binary_vector_to_int(vector): + binary_str = ''.join(vector.astype(str)) # 1次元配列を文字列に変換 + num = int(binary_str, 2) # 2進数表現の文字列を10進数の整数値に変換 + return num + + +# 学習データに対する処理 +class TrainNet(pl.LightningModule): + + def train_dataloader(self): + return torch.utils.data.DataLoader(train, self.batch_size, shuffle=True) + + def training_step(self, batch, batch_nb): + x, t = batch + y = self.forward(x) + loss = self.lossfun(y, t) + results = {'loss': loss} + return results + + +# 検証データに対する処理 +class ValidationNet(pl.LightningModule): + + def val_dataloader(self): + return torch.utils.data.DataLoader(val, self.batch_size) + + def validation_step(self, batch, batch_nb): + x, t = batch + y = self.forward(x) + loss = self.lossfun(y, t) + results = {'val_loss': loss} + return results + + def validation_end(self, outputs): + avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + results = {'val_loss': avg_loss} + return results + + +# テストデータに対する処理 +class TestNet(pl.LightningModule): + + def test_dataloader(self): + return torch.utils.data.DataLoader(test, self.batch_size) + + def test_step(self, batch, batch_nb): + x, t = batch + y = self.forward(x) + loss = self.lossfun(y, t) + results = {'test_loss': loss} + return results + + def test_end(self, outputs): + avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean() + results = {'test_loss': avg_loss} + return results + + +# 学習データ、検証データ、テストデータへの処理を継承したクラス +class Net(TrainNet, ValidationNet, TestNet): + + def __init__(self, input_size=544, hidden_size=544, output_size=181, batch_size=100): + super(Net, self).__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.fc2 = nn.Linear(hidden_size, output_size) + self.batch_size = batch_size + + def forward(self, x): + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + return x + + # New: 平均ニ乗誤差 + def lossfun(self, y, t): + return F.mse_loss(y, t) + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) + # return torch.optim.SGD(self.parameters(), lr=0.1) + return optimizer + + +# 学習データ、検証データ、テストデータへの処理を継承したクラス +class Net2(TrainNet, ValidationNet, TestNet): + + def __init__(self, input_size=544, hidden_size=544, output_size=181, batch_size=100): + super(Net2, self).__init__() + self.fc1 = nn.Linear(input_size, hidden_size) + self.fc2 = nn.Linear(hidden_size, 362) + self.fc3 = nn.Linear(362, output_size) + self.batch_size = batch_size + + def forward(self, x): + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + return x + + # New: 平均ニ乗誤差 + def lossfun(self, y, t): + return F.mse_loss(y, t) + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=1e-3) + # return torch.optim.SGD(self.parameters(), lr=0.1) + return optimizer + + +if __name__ == "__main__": + # make_dataset() + dataset = np.load("dataset/small-1300.npy") + x = dataset[:, :-1] + y = dataset[:, -1] + + y = np.eye(181)[y] # one hot表現に変換 + + x = torch.tensor(x, dtype=torch.float32) + y = torch.tensor(y, dtype=torch.float32) + dataset = torch.utils.data.TensorDataset(x, y) + + n_train = int(len(dataset) * 0.7) + n_val = int(len(dataset) * 0.2) + n_test = len(dataset) - n_train - n_val + + # ランダムに分割を行うため、シードを固定して再現性を確保 + torch.manual_seed(0) + + # データセットの分割 + train, val, test = torch.utils.data.random_split( + dataset, [n_train, n_val, n_test]) + + torch.manual_seed(0) + + # インスタンス化 + net = Net2() + trainer = Trainer(max_epochs=30) + + # 学習の実行 + trainer.fit(net) + + trainer.test() + + # torch.save(net, 'models/second_model.pth') + # model = torch.load("models/second_model.pth") + torch.save(net.state_dict(), 'models/fifth_model.pth') diff --git a/python/custom_client_sakoma33.py b/python/custom_client_sakoma33.py new file mode 100644 index 0000000..1b5757f --- /dev/null +++ b/python/custom_client_sakoma33.py @@ -0,0 +1,79 @@ +import random + +import mjx +import torch +from client.agent import CustomAgentBase +from client.client import SocketIOClient +from custom_ai import Net, Net2 +from torch import Tensor, nn, optim, utils + + +# CustomAgentBase を継承して, +# custom_act()を編集して麻雀AIを実装してください. +class MyAgent(CustomAgentBase): + def __init__(self): + super().__init__() + # self.model = torch.load('models/second_model.pth') + self.model = Net2() + self.model.load_state_dict(torch.load("models/fifth_model.pth")) + + def custom_act(self, obs: mjx.Observation) -> mjx.Action: + """盤面情報と取れる行動を受け取って,行動を決定して返す関数.参加者が各自で実装. + + Args: + obs (mjx.Observation): 盤面情報と取れる行動(obs.legal_actions()) + + Returns: + mjx.Action: 実際に取る行動 + """ + # // 0~33: Discard m1~rd + # // 34,35,36: Discard m5(red), p5(red), s5(red) + # // 37~70: Tsumogiri m1~rd + # // 71,72,73: Tsumogiri m5(red), p5(red), s5(red) + # // 74~94: Chi m1m2m3 ~ s7s8s9 + # // 95,96,97: Chi m3m4m5(red), m4m5(red)m6, m5(red)m6m7 + # // 98,99,100: Chi p3p4p5(red), p4p5(red)p6, p5(red)p6p7 + # // 101,102,103: Chi s3s4s5(red), s4s5(red)s6, s5(red)s6s7 + # // 104~137: Pon m1~rd + # // 138,139,140: Pon m5(w/ red), s5(w/ red), p5(w/ red) + # // 141~174: Kan m1~rd + # // 175: Tsumo + # // 176: Ron + # // 177: Riichi + # // 178: Kyuushu + # // 179: No + # // 180: Dummy + + legal_actions = obs.legal_actions() + if len(legal_actions) == 1: + return legal_actions[0] + + # 予測 + feature = obs.to_features(feature_name="mjx-small-v0") + # feature = obs.to_features(feature_name="mjx-large-v0") + self.model.eval() + with torch.no_grad(): + action_logit = self.model(Tensor(feature.ravel())) + action_proba = torch.sigmoid(action_logit).numpy() + + # アクション決定 + mask = obs.action_mask() + action_idx = (mask * action_proba).argmax() + return mjx.Action.select_from(action_idx, legal_actions) + + +if __name__ == "__main__": + # 4人で対局する場合は,4つのSocketIOClientで同一のサーバーに接続する. + my_agent = MyAgent() # 参加者が実装したプレイヤーをインスタンス化 + + sio_client = SocketIOClient( + ip='localhost', + port=5000, + namespace='/test', + query='secret', + agent=my_agent, # プレイヤーの指定 + room_id=123, # 部屋のID.4人で対局させる時は,同じIDを指定する. + ) + # SocketIO Client インスタンスを実行 + sio_client.run() + sio_client.enter_room() diff --git a/python/custom_evaluate.py b/python/custom_evaluate.py new file mode 100644 index 0000000..ec9a843 --- /dev/null +++ b/python/custom_evaluate.py @@ -0,0 +1,40 @@ +import json +import os + +import numpy as np + + +def evaluate_from_nall_logs(): + # ディレクトリのパスを指定 + dir_path = "logs" + + dir_list = os.listdir(dir_path) + + # print(dir_list) + + dir_paths = [os.path.join(dir_path, dir_name, "player_0.json") + for dir_name in dir_list] + final_tens = [] + + for dir_path in dir_paths: + with open(dir_path, mode="r") as f: + d = json.load(f) + who = d.get("who", 0) + final_tens.append(d["roundTerminal"]["finalScore"]["tens"][who]) + print(final_tens) + print(np.array(final_tens).mean()) + + +def evaluate_from_arg(dir_list): + + dir_paths = [os.path.join(dir_path, "player_0.json") + for dir_path in dir_list] + final_tens = [] + + for dir_path in dir_paths: + with open(dir_path, mode="r") as f: + d = json.load(f) + who = d.get("who", 0) + final_tens.append(d["roundTerminal"]["finalScore"]["tens"][who]) + print(final_tens) + print(np.array(final_tens).mean()) diff --git a/python/custom_trial.py b/python/custom_trial.py new file mode 100644 index 0000000..ce183dd --- /dev/null +++ b/python/custom_trial.py @@ -0,0 +1,269 @@ +"""対局を指定した回数行うスクリプト.対局結果の出力も可能. +""" + +import argparse +import json +import os +import random +from datetime import datetime + +import custom_evaluate +import mjx +import mjx.agents +import torch +from client.agent import CustomAgentBase +from custom_ai import Net, Net2 +from mjx.const import ActionType +from server import convert_log +from torch import Tensor, nn, optim, utils + +# CustomAgentBase を継承して, +# custom_act()を編集して麻雀AIを実装してください. + + +class MyAgent(CustomAgentBase): + def __init__(self): + super().__init__() + # self.model = torch.load('models/second_model.pth') + self.model = Net2() + self.model.load_state_dict(torch.load("models/fifth_model.pth")) + + def custom_act(self, obs: mjx.Observation) -> mjx.Action: + """盤面情報と取れる行動を受け取って,行動を決定して返す関数.参加者が各自で実装. + + Args: + obs (mjx.Observation): 盤面情報と取れる行動(obs.legal_actions()) + + Returns: + mjx.Action: 実際に取る行動 + """ + # // 0~33: Discard m1~rd + # // 34,35,36: Discard m5(red), p5(red), s5(red) + # // 37~70: Tsumogiri m1~rd + # // 71,72,73: Tsumogiri m5(red), p5(red), s5(red) + # // 74~94: Chi m1m2m3 ~ s7s8s9 + # // 95,96,97: Chi m3m4m5(red), m4m5(red)m6, m5(red)m6m7 + # // 98,99,100: Chi p3p4p5(red), p4p5(red)p6, p5(red)p6p7 + # // 101,102,103: Chi s3s4s5(red), s4s5(red)s6, s5(red)s6s7 + # // 104~137: Pon m1~rd + # // 138,139,140: Pon m5(w/ red), s5(w/ red), p5(w/ red) + # // 141~174: Kan m1~rd + # // 175: Tsumo + # // 176: Ron + # // 177: Riichi + # // 178: Kyuushu + # // 179: No + # // 180: Dummy + + # def find_action(legal_action_idx): + # legal_actions_idx = [element.to_idx() for element in obs.legal_actions()] + # if legal_action_idx in legal_actions_idx: + # action_index = legal_actions_idx.index(legal_action_idx) + # return obs.legal_actions()[action_index] + + # return_action = find_action(175) + # legal_actions = obs.legal_actions() + # legal_actions_idx = [element.to_idx() for element in obs.legal_actions()] + # print(obs.action_mask()) + # print(legal_actions_idx) + + # if 175 in legal_actions_idx: # Tsumo + # action_index = legal_actions_idx.index(175) + # print("ツモ") + # print(obs.legal_actions()[action_index].type()) + # return obs.legal_actions()[action_index] + # elif 176 in legal_actions_idx: # Ron + # action_index = legal_actions_idx.index(176) + # print("ロン") + # print(obs.legal_actions()[action_index].type()) + # return obs.legal_actions()[action_index] + # elif 177 in legal_actions_idx: # Riichi + # action_index = legal_actions_idx.index(177) + # print("リーチ") + # print(obs.legal_actions()[action_index].type()) + # return obs.legal_actions()[action_index] + # elif not set(legal_actions_idx).isdisjoint(set(range(74, 175))): + # legal_actions = [element for element in legal_actions if element.to_idx() not in range(74, 175)] + # return_action = random.choice(legal_actions) + # # print("除外") + # # print(return_action.type()) + # return return_action + # elif not set(legal_actions_idx).isdisjoint(set(range(0, 74))): + # legal_actions = [element for element in legal_actions if element.to_idx() in range(0, 74)] + # effective_discard_types = obs.curr_hand().effective_discard_types() + # effective_discards = [ + # a for a in legal_actions if a.tile().type() in effective_discard_types + # ] + # return_action = random.choice(effective_discards) + # # print("除外") + # # print(return_action.type()) + # return return_action + + # print(obs.legal_actions()[0].to_json()) + # print(obs.legal_actions()) + # print([element.to_json() for element in obs.legal_actions()]) + # print([element.tile().type() for element in obs.legal_actions()]) + # print([element.tile().id() for element in obs.legal_actions()]) + # print([element.type() for element in obs.legal_actions()]) + # print([element.to_idx() for element in obs.legal_actions()]) + # # print([element.to_proto() for element in obs.legal_actions()]) + # # print(obs.to_features("mjx-large-v0")) + # # print(obs.MjxLargeV0().current_hand(obs)) + # print() + # print() + # legal_actions = observation.legal_actions() + # if len(legal_actions) == 1: + # return legal_actions[0] + + # # if it can win, just win + # win_actions = [a for a in legal_actions if a.type() in [ActionType.TSUMO, ActionType.RON]] + # if len(win_actions) >= 1: + # assert len(win_actions) == 1 + # return win_actions[0] + + # # if it can declare riichi, just declar riichi + # riichi_actions = [a for a in legal_actions if a.type() == ActionType.RIICHI] + # if len(riichi_actions) >= 1: + # assert len(riichi_actions) == 1 + # return riichi_actions[0] + + # # if it can apply chi/pon/open-kan, choose randomly + # steal_actions = [ + # a + # for a in legal_actions + # if a.type() in [ActionType.CHI, ActionType.PON, ActionType, ActionType.OPEN_KAN] + # ] + # if len(steal_actions) >= 1: + # return random.choice(steal_actions) + + # # if it can apply closed-kan/added-kan, choose randomly + # kan_actions = [ + # a for a in legal_actions if a.type() in [ActionType.CLOSED_KAN, ActionType.ADDED_KAN] + # ] + # if len(kan_actions) >= 1: + # return random.choice(kan_actions) + + # # discard an effective tile randomly + # legal_discards = [ + # a for a in legal_actions if a.type() in [ActionType.DISCARD, ActionType.TSUMOGIRI] + # ] + # effective_discard_types = observation.curr_hand().effective_discard_types() + # effective_discards = [ + # a for a in legal_discards if a.tile().type() in effective_discard_types + # ] + # if len(effective_discards) > 0: + # return random.choice(effective_discards) + + # # if no effective tile exists, discard randomly + # return random.choice(legal_discards) + + legal_actions = obs.legal_actions() + if len(legal_actions) == 1: + return legal_actions[0] + + # 予測 + feature = obs.to_features(feature_name="mjx-small-v0") + # feature = obs.to_features(feature_name="mjx-large-v0") + self.model.eval() + with torch.no_grad(): + action_logit = self.model(Tensor(feature.ravel())) + action_proba = torch.sigmoid(action_logit).numpy() + # print(action_logit) + + # アクション決定 + mask = obs.action_mask() + action_idx = (mask * action_proba).argmax() + return mjx.Action.select_from(action_idx, legal_actions) + + # ランダムに取れる行動をする + return random.choice(obs.legal_actions()) + + +def save_log(obs_dict, env, logs): + logdir = "logs" + if not os.path.exists(logdir): + os.mkdir(logdir) + + now = datetime.now().strftime('%Y%m%d%H%M%S%f') + + os.mkdir(os.path.join(logdir, now)) + for player_id, obs in obs_dict.items(): + with open(os.path.join(logdir, now, f"{player_id}.json"), "w") as f: + json.dump(json.loads(obs.to_json()), f) + with open(os.path.join(logdir, now, f"tenho.log"), "w") as f: + f.write(logs.get_url()) + env.state().save_svg(os.path.join(logdir, now, "finish.svg")) + with open(os.path.join(logdir, now, f"env.json"), "w") as f: + f.write(env.state().to_json()) + + dir_path = os.path.join(logdir, now) + return dir_path + + +if __name__ == "__main__": + """引数 + -n, --number (int): 何回対局するか + -l --log (flag): このオプションをつけると対局結果を保存する + """ + parser = argparse.ArgumentParser() + parser.add_argument("-n", "--number", type=int, default=32, + help="number of game iteration") + parser.add_argument("-l", "--log", action="store_true", + help="whether log will be stored") + args = parser.parse_args() + + logging = args.log + n_games = args.number + + player_names_to_idx = { + "player_0": 0, + "player_1": 1, + "player_2": 2, + "player_3": 3, + } + + agents = [ + MyAgent(), # 自作Agent + mjx.agents.ShantenAgent(), # mjxに実装されているAgent + mjx.agents.ShantenAgent(), # mjxに実装されているAgent + mjx.agents.ShantenAgent(), # mjxに実装されているAgent + ] + + # 卓の初期化 + # env_ = mjx.MjxEnv() + # obs_dict = env_.reset() + dir_paths = [] + logs = convert_log.ConvertLog() + for _ in range(n_games): + env_ = mjx.MjxEnv() + obs_dict = env_.reset() + while not env_.done(): + actions = {} + for player_id, obs in obs_dict.items(): + actions[player_id] = agents[player_names_to_idx[player_id]].act( + obs) + obs_dict = env_.step(actions) + if len(obs_dict.keys()) == 4: + logs.add_log(obs_dict) + returns = env_.rewards() + if logging: + dir_path = save_log(obs_dict, env_, logs) + dir_paths.append(dir_path) + print("game has ended") + if logging: + custom_evaluate.evaluate_from_arg(dir_paths) + + # from mjx import Action, Observation, State + # data_path = "json" + # with open(data_path) as f: + # lines = f.readlines() + + # for line in lines: + # state = State(line) + + # for cpp_obs, cpp_act in state._cpp_obj.past_decisions(): + # obs = Observation._from_cpp_obj(cpp_obs) + # feature = obs.to_features(feature_name="mjx-small-v0") + + # action = Action._from_cpp_obj(cpp_act) + # action_idx = action.to_idx() diff --git a/python/dataset/large.npy b/python/dataset/large.npy new file mode 100644 index 0000000..b82ceb6 Binary files /dev/null and b/python/dataset/large.npy differ diff --git a/python/models/fifth_model.pth b/python/models/fifth_model.pth new file mode 100644 index 0000000..5b6a98b Binary files /dev/null and b/python/models/fifth_model.pth differ diff --git a/python/models/fourth_model.pth b/python/models/fourth_model.pth new file mode 100644 index 0000000..d5e74be Binary files /dev/null and b/python/models/fourth_model.pth differ diff --git a/python/models/third_model.pth b/python/models/third_model.pth new file mode 100644 index 0000000..c5d5dc6 Binary files /dev/null and b/python/models/third_model.pth differ diff --git a/python/sample_trial.py b/python/sample_trial.py index 5fa95d3..526354b 100644 --- a/python/sample_trial.py +++ b/python/sample_trial.py @@ -2,16 +2,15 @@ """ import argparse -import os -from datetime import datetime import json +import os import random +from datetime import datetime import mjx import mjx.agents - -from server import convert_log from client.agent import CustomAgentBase +from server import convert_log # CustomAgentBase を継承して, @@ -66,7 +65,7 @@ def save_log(obs_dict, env, logs): logging = args.log n_games = args.number - player_names_to_idx ={ + player_names_to_idx = { "player_0": 0, "player_1": 1, "player_2": 2, @@ -78,7 +77,7 @@ def save_log(obs_dict, env, logs): mjx.agents.ShantenAgent(), # mjxに実装されているAgent mjx.agents.ShantenAgent(), # mjxに実装されているAgent mjx.agents.ShantenAgent(), # mjxに実装されているAgent - ] + ] # 卓の初期化 env_ = mjx.MjxEnv() @@ -89,12 +88,12 @@ def save_log(obs_dict, env, logs): while not env_.done(): actions = {} for player_id, obs in obs_dict.items(): - actions[player_id] = agents[player_names_to_idx[player_id]].act(obs) + actions[player_id] = agents[player_names_to_idx[player_id]].act( + obs) obs_dict = env_.step(actions) - if len(obs_dict.keys())==4: + if len(obs_dict.keys()) == 4: logs.add_log(obs_dict) returns = env_.rewards() if logging: save_log(obs_dict, env_, logs) print("game has ended") -