diff --git a/docker/requirements.txt b/docker/requirements.txt
index 3d1ba23..41cc91f 100644
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -3,3 +3,9 @@ tqdm
 python-socketio
 eventlet
 python-socketio[client]
+matplotlib
+seaborn
+torch
+pytorch_lightning
+tensorboard
+gym
\ No newline at end of file
diff --git a/python/custom_ai.py b/python/custom_ai.py
new file mode 100644
index 0000000..2ddd2d7
--- /dev/null
+++ b/python/custom_ai.py
@@ -0,0 +1,204 @@
+import glob
+import os
+
+import numpy as np
+import pytorch_lightning as pl
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.data
+from mjx import Action, Observation, State
+from pytorch_lightning import Trainer
+from torch import Tensor, nn, optim, utils
+from torch.utils.data import DataLoader, TensorDataset
+
+
+def make_dataset():
+    dir_path = "json"
+    batch_size = 10  # バッチサイズの設定
+    data_set = []
+
+    files = glob.glob("./datas/*")
+    files = files[:1300]
+    num_files = len(files)
+
+    for i in range(0, num_files, batch_size):
+        batch_files = files[i:i+batch_size]
+
+        batch_data = []
+        for file in batch_files:
+            with open(file) as f:
+                lines = f.readlines()
+                for line in lines:
+                    state = State(line)
+                    for cpp_obs, cpp_act in state._cpp_obj.past_decisions():
+                        obs = Observation._from_cpp_obj(cpp_obs)
+                        # feature = obs.to_features(feature_name="mjx-large-v0").ravel()
+                        feature = obs.to_features(feature_name="mjx-small-v0").ravel()
+
+                        action = Action._from_cpp_obj(cpp_act)
+                        action_idx = action.to_idx()
+                        data = np.append(feature, action_idx)
+                        batch_data.append(data)
+
+        batch_data = np.array(batch_data)
+        data_set.append(batch_data)
+
+    data_set = np.concatenate(data_set)
+    np.save("dataset/small-1300", data_set)
+
+
+def int_to_binary_vector(num):
+    binary = bin(num)[2:].zfill(8)  # 8桁の2進数表現を取得
+    vector = np.array(list(binary)).astype(int)  # 1次元配列に変換
+    return vector
+
+
+def binary_vector_to_int(vector):
+    binary_str = ''.join(vector.astype(str))  # 1次元配列を文字列に変換
+    num = int(binary_str, 2)  # 2進数表現の文字列を10進数の整数値に変換
+    return num
+
+
+# 学習データに対する処理
+class TrainNet(pl.LightningModule):
+
+    def train_dataloader(self):
+        return torch.utils.data.DataLoader(train, self.batch_size, shuffle=True)
+
+    def training_step(self, batch, batch_nb):
+        x, t = batch
+        y = self.forward(x)
+        loss = self.lossfun(y, t)
+        results = {'loss': loss}
+        return results
+
+
+# 検証データに対する処理
+class ValidationNet(pl.LightningModule):
+
+    def val_dataloader(self):
+        return torch.utils.data.DataLoader(val, self.batch_size)
+
+    def validation_step(self, batch, batch_nb):
+        x, t = batch
+        y = self.forward(x)
+        loss = self.lossfun(y, t)
+        results = {'val_loss': loss}
+        return results
+
+    def validation_end(self, outputs):
+        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
+        results = {'val_loss': avg_loss}
+        return results
+
+
+# テストデータに対する処理
+class TestNet(pl.LightningModule):
+
+    def test_dataloader(self):
+        return torch.utils.data.DataLoader(test, self.batch_size)
+
+    def test_step(self, batch, batch_nb):
+        x, t = batch
+        y = self.forward(x)
+        loss = self.lossfun(y, t)
+        results = {'test_loss': loss}
+        return results
+
+    def test_end(self, outputs):
+        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
+        results = {'test_loss': avg_loss}
+        return results
+
+
+# 学習データ、検証データ、テストデータへの処理を継承したクラス
+class Net(TrainNet, ValidationNet, TestNet):
+
+    def __init__(self, input_size=544, hidden_size=544, output_size=181, batch_size=100):
+        super(Net, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, output_size)
+        self.batch_size = batch_size
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        return x
+
+    # New: 平均ニ乗誤差
+    def lossfun(self, y, t):
+        return F.mse_loss(y, t)
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        # return torch.optim.SGD(self.parameters(), lr=0.1)
+        return optimizer
+
+
+# 学習データ、検証データ、テストデータへの処理を継承したクラス
+class Net2(TrainNet, ValidationNet, TestNet):
+
+    def __init__(self, input_size=544, hidden_size=544, output_size=181, batch_size=100):
+        super(Net2, self).__init__()
+        self.fc1 = nn.Linear(input_size, hidden_size)
+        self.fc2 = nn.Linear(hidden_size, 362)
+        self.fc3 = nn.Linear(362, output_size)
+        self.batch_size = batch_size
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+        x = self.fc3(x)
+        return x
+
+    # New: 平均ニ乗誤差
+    def lossfun(self, y, t):
+        return F.mse_loss(y, t)
+
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        # return torch.optim.SGD(self.parameters(), lr=0.1)
+        return optimizer
+
+
+if __name__ == "__main__":
+    # make_dataset()
+    dataset = np.load("dataset/small-1300.npy")
+    x = dataset[:, :-1]
+    y = dataset[:, -1]
+
+    y = np.eye(181)[y]   # one hot表現に変換
+
+    x = torch.tensor(x, dtype=torch.float32)
+    y = torch.tensor(y, dtype=torch.float32)
+    dataset = torch.utils.data.TensorDataset(x, y)
+
+    n_train = int(len(dataset) * 0.7)
+    n_val = int(len(dataset) * 0.2)
+    n_test = len(dataset) - n_train - n_val
+
+    # ランダムに分割を行うため、シードを固定して再現性を確保
+    torch.manual_seed(0)
+
+    # データセットの分割
+    train, val, test = torch.utils.data.random_split(
+        dataset, [n_train, n_val, n_test])
+
+    torch.manual_seed(0)
+
+    # インスタンス化
+    net = Net2()
+    trainer = Trainer(max_epochs=30)
+
+    # 学習の実行
+    trainer.fit(net)
+
+    trainer.test()
+
+    # torch.save(net, 'models/second_model.pth')
+    # model = torch.load("models/second_model.pth")
+    torch.save(net.state_dict(), 'models/fifth_model.pth')
diff --git a/python/custom_client_sakoma33.py b/python/custom_client_sakoma33.py
new file mode 100644
index 0000000..1b5757f
--- /dev/null
+++ b/python/custom_client_sakoma33.py
@@ -0,0 +1,79 @@
+import random
+
+import mjx
+import torch
+from client.agent import CustomAgentBase
+from client.client import SocketIOClient
+from custom_ai import Net, Net2
+from torch import Tensor, nn, optim, utils
+
+
+# CustomAgentBase を継承して，
+# custom_act()を編集して麻雀AIを実装してください．
+class MyAgent(CustomAgentBase):
+    def __init__(self):
+        super().__init__()
+        # self.model = torch.load('models/second_model.pth')
+        self.model = Net2()
+        self.model.load_state_dict(torch.load("models/fifth_model.pth"))
+
+    def custom_act(self, obs: mjx.Observation) -> mjx.Action:
+        """盤面情報と取れる行動を受け取って，行動を決定して返す関数．参加者が各自で実装．
+
+        Args:
+            obs (mjx.Observation): 盤面情報と取れる行動(obs.legal_actions())
+
+        Returns:
+            mjx.Action: 実際に取る行動
+        """
+        # // 0~33: Discard m1~rd
+        # // 34,35,36: Discard m5(red), p5(red), s5(red)
+        # // 37~70: Tsumogiri m1~rd
+        # // 71,72,73: Tsumogiri m5(red), p5(red), s5(red)
+        # // 74~94: Chi m1m2m3 ~ s7s8s9
+        # // 95,96,97: Chi m3m4m5(red), m4m5(red)m6, m5(red)m6m7
+        # // 98,99,100: Chi p3p4p5(red), p4p5(red)p6, p5(red)p6p7
+        # // 101,102,103: Chi s3s4s5(red), s4s5(red)s6, s5(red)s6s7
+        # // 104~137: Pon m1~rd
+        # // 138,139,140: Pon m5(w/ red), s5(w/ red), p5(w/ red)
+        # // 141~174: Kan m1~rd
+        # // 175: Tsumo
+        # // 176: Ron
+        # // 177: Riichi
+        # // 178: Kyuushu
+        # // 179: No
+        # // 180: Dummy
+
+        legal_actions = obs.legal_actions()
+        if len(legal_actions) == 1:
+            return legal_actions[0]
+
+        # 予測
+        feature = obs.to_features(feature_name="mjx-small-v0")
+        # feature = obs.to_features(feature_name="mjx-large-v0")
+        self.model.eval()
+        with torch.no_grad():
+            action_logit = self.model(Tensor(feature.ravel()))
+        action_proba = torch.sigmoid(action_logit).numpy()
+
+        # アクション決定
+        mask = obs.action_mask()
+        action_idx = (mask * action_proba).argmax()
+        return mjx.Action.select_from(action_idx, legal_actions)
+
+
+if __name__ == "__main__":
+    # 4人で対局する場合は，4つのSocketIOClientで同一のサーバーに接続する．
+    my_agent = MyAgent()  # 参加者が実装したプレイヤーをインスタンス化
+
+    sio_client = SocketIOClient(
+        ip='localhost',
+        port=5000,
+        namespace='/test',
+        query='secret',
+        agent=my_agent,  # プレイヤーの指定
+        room_id=123,  # 部屋のID．4人で対局させる時は，同じIDを指定する．
+    )
+    # SocketIO Client インスタンスを実行
+    sio_client.run()
+    sio_client.enter_room()
diff --git a/python/custom_evaluate.py b/python/custom_evaluate.py
new file mode 100644
index 0000000..ec9a843
--- /dev/null
+++ b/python/custom_evaluate.py
@@ -0,0 +1,40 @@
+import json
+import os
+
+import numpy as np
+
+
+def evaluate_from_nall_logs():
+    # ディレクトリのパスを指定
+    dir_path = "logs"
+
+    dir_list = os.listdir(dir_path)
+
+    # print(dir_list)
+
+    dir_paths = [os.path.join(dir_path, dir_name, "player_0.json")
+                 for dir_name in dir_list]
+    final_tens = []
+
+    for dir_path in dir_paths:
+        with open(dir_path, mode="r") as f:
+            d = json.load(f)
+            who = d.get("who", 0)
+            final_tens.append(d["roundTerminal"]["finalScore"]["tens"][who])
+    print(final_tens)
+    print(np.array(final_tens).mean())
+
+
+def evaluate_from_arg(dir_list):
+
+    dir_paths = [os.path.join(dir_path, "player_0.json")
+                 for dir_path in dir_list]
+    final_tens = []
+
+    for dir_path in dir_paths:
+        with open(dir_path, mode="r") as f:
+            d = json.load(f)
+            who = d.get("who", 0)
+            final_tens.append(d["roundTerminal"]["finalScore"]["tens"][who])
+    print(final_tens)
+    print(np.array(final_tens).mean())
diff --git a/python/custom_trial.py b/python/custom_trial.py
new file mode 100644
index 0000000..ce183dd
--- /dev/null
+++ b/python/custom_trial.py
@@ -0,0 +1,269 @@
+"""対局を指定した回数行うスクリプト．対局結果の出力も可能．
+"""
+
+import argparse
+import json
+import os
+import random
+from datetime import datetime
+
+import custom_evaluate
+import mjx
+import mjx.agents
+import torch
+from client.agent import CustomAgentBase
+from custom_ai import Net, Net2
+from mjx.const import ActionType
+from server import convert_log
+from torch import Tensor, nn, optim, utils
+
+# CustomAgentBase を継承して，
+# custom_act()を編集して麻雀AIを実装してください．
+
+
+class MyAgent(CustomAgentBase):
+    def __init__(self):
+        super().__init__()
+        # self.model = torch.load('models/second_model.pth')
+        self.model = Net2()
+        self.model.load_state_dict(torch.load("models/fifth_model.pth"))
+
+    def custom_act(self, obs: mjx.Observation) -> mjx.Action:
+        """盤面情報と取れる行動を受け取って，行動を決定して返す関数．参加者が各自で実装．
+
+        Args:
+            obs (mjx.Observation): 盤面情報と取れる行動(obs.legal_actions())
+
+        Returns:
+            mjx.Action: 実際に取る行動
+        """
+        # // 0~33: Discard m1~rd
+        # // 34,35,36: Discard m5(red), p5(red), s5(red)
+        # // 37~70: Tsumogiri m1~rd
+        # // 71,72,73: Tsumogiri m5(red), p5(red), s5(red)
+        # // 74~94: Chi m1m2m3 ~ s7s8s9
+        # // 95,96,97: Chi m3m4m5(red), m4m5(red)m6, m5(red)m6m7
+        # // 98,99,100: Chi p3p4p5(red), p4p5(red)p6, p5(red)p6p7
+        # // 101,102,103: Chi s3s4s5(red), s4s5(red)s6, s5(red)s6s7
+        # // 104~137: Pon m1~rd
+        # // 138,139,140: Pon m5(w/ red), s5(w/ red), p5(w/ red)
+        # // 141~174: Kan m1~rd
+        # // 175: Tsumo
+        # // 176: Ron
+        # // 177: Riichi
+        # // 178: Kyuushu
+        # // 179: No
+        # // 180: Dummy
+
+        # def find_action(legal_action_idx):
+        #     legal_actions_idx = [element.to_idx() for element in obs.legal_actions()]
+        #     if legal_action_idx in legal_actions_idx:
+        #         action_index = legal_actions_idx.index(legal_action_idx)
+        #     return obs.legal_actions()[action_index]
+
+        # return_action = find_action(175)
+        # legal_actions = obs.legal_actions()
+        # legal_actions_idx = [element.to_idx() for element in obs.legal_actions()]
+        # print(obs.action_mask())
+        # print(legal_actions_idx)
+
+        # if 175 in legal_actions_idx:  # Tsumo
+        #     action_index = legal_actions_idx.index(175)
+        #     print("ツモ")
+        #     print(obs.legal_actions()[action_index].type())
+        #     return obs.legal_actions()[action_index]
+        # elif 176 in legal_actions_idx:  # Ron
+        #     action_index = legal_actions_idx.index(176)
+        #     print("ロン")
+        #     print(obs.legal_actions()[action_index].type())
+        #     return obs.legal_actions()[action_index]
+        # elif 177 in legal_actions_idx:  # Riichi
+        #     action_index = legal_actions_idx.index(177)
+        #     print("リーチ")
+        #     print(obs.legal_actions()[action_index].type())
+        #     return obs.legal_actions()[action_index]
+        # elif not set(legal_actions_idx).isdisjoint(set(range(74, 175))):
+        #     legal_actions = [element for element in legal_actions if element.to_idx() not in range(74, 175)]
+        #     return_action = random.choice(legal_actions)
+        #     # print("除外")
+        #     # print(return_action.type())
+        #     return return_action
+        # elif not set(legal_actions_idx).isdisjoint(set(range(0, 74))):
+        #     legal_actions = [element for element in legal_actions if element.to_idx() in range(0, 74)]
+        #     effective_discard_types = obs.curr_hand().effective_discard_types()
+        #     effective_discards = [
+        #         a for a in legal_actions if a.tile().type() in effective_discard_types
+        #     ]
+        #     return_action = random.choice(effective_discards)
+        #     # print("除外")
+        #     # print(return_action.type())
+        #     return return_action
+
+        # print(obs.legal_actions()[0].to_json())
+        # print(obs.legal_actions())
+        # print([element.to_json() for element in obs.legal_actions()])
+        # print([element.tile().type() for element in obs.legal_actions()])
+        # print([element.tile().id() for element in obs.legal_actions()])
+        # print([element.type() for element in obs.legal_actions()])
+        # print([element.to_idx() for element in obs.legal_actions()])
+        # # print([element.to_proto() for element in obs.legal_actions()])
+        # # print(obs.to_features("mjx-large-v0"))
+        # # print(obs.MjxLargeV0().current_hand(obs))
+        # print()
+        # print()
+        # legal_actions = observation.legal_actions()
+        # if len(legal_actions) == 1:
+        #     return legal_actions[0]
+
+        # # if it can win, just win
+        # win_actions = [a for a in legal_actions if a.type() in [ActionType.TSUMO, ActionType.RON]]
+        # if len(win_actions) >= 1:
+        #     assert len(win_actions) == 1
+        #     return win_actions[0]
+
+        # # if it can declare riichi, just declar riichi
+        # riichi_actions = [a for a in legal_actions if a.type() == ActionType.RIICHI]
+        # if len(riichi_actions) >= 1:
+        #     assert len(riichi_actions) == 1
+        #     return riichi_actions[0]
+
+        # # if it can apply chi/pon/open-kan, choose randomly
+        # steal_actions = [
+        #     a
+        #     for a in legal_actions
+        #     if a.type() in [ActionType.CHI, ActionType.PON, ActionType, ActionType.OPEN_KAN]
+        # ]
+        # if len(steal_actions) >= 1:
+        #     return random.choice(steal_actions)
+
+        # # if it can apply closed-kan/added-kan, choose randomly
+        # kan_actions = [
+        #     a for a in legal_actions if a.type() in [ActionType.CLOSED_KAN, ActionType.ADDED_KAN]
+        # ]
+        # if len(kan_actions) >= 1:
+        #     return random.choice(kan_actions)
+
+        # # discard an effective tile randomly
+        # legal_discards = [
+        #     a for a in legal_actions if a.type() in [ActionType.DISCARD, ActionType.TSUMOGIRI]
+        # ]
+        # effective_discard_types = observation.curr_hand().effective_discard_types()
+        # effective_discards = [
+        #     a for a in legal_discards if a.tile().type() in effective_discard_types
+        # ]
+        # if len(effective_discards) > 0:
+        #     return random.choice(effective_discards)
+
+        # # if no effective tile exists, discard randomly
+        # return random.choice(legal_discards)
+
+        legal_actions = obs.legal_actions()
+        if len(legal_actions) == 1:
+            return legal_actions[0]
+
+        # 予測
+        feature = obs.to_features(feature_name="mjx-small-v0")
+        # feature = obs.to_features(feature_name="mjx-large-v0")
+        self.model.eval()
+        with torch.no_grad():
+            action_logit = self.model(Tensor(feature.ravel()))
+        action_proba = torch.sigmoid(action_logit).numpy()
+        # print(action_logit)
+
+        # アクション決定
+        mask = obs.action_mask()
+        action_idx = (mask * action_proba).argmax()
+        return mjx.Action.select_from(action_idx, legal_actions)
+
+        # ランダムに取れる行動をする
+        return random.choice(obs.legal_actions())
+
+
+def save_log(obs_dict, env, logs):
+    logdir = "logs"
+    if not os.path.exists(logdir):
+        os.mkdir(logdir)
+
+    now = datetime.now().strftime('%Y%m%d%H%M%S%f')
+
+    os.mkdir(os.path.join(logdir, now))
+    for player_id, obs in obs_dict.items():
+        with open(os.path.join(logdir, now, f"{player_id}.json"), "w") as f:
+            json.dump(json.loads(obs.to_json()), f)
+        with open(os.path.join(logdir, now, f"tenho.log"), "w") as f:
+            f.write(logs.get_url())
+    env.state().save_svg(os.path.join(logdir, now, "finish.svg"))
+    with open(os.path.join(logdir, now, f"env.json"), "w") as f:
+        f.write(env.state().to_json())
+
+    dir_path = os.path.join(logdir, now)
+    return dir_path
+
+
+if __name__ == "__main__":
+    """引数
+    -n, --number (int): 何回対局するか
+    -l --log (flag): このオプションをつけると対局結果を保存する
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-n", "--number", type=int, default=32,
+                        help="number of game iteration")
+    parser.add_argument("-l", "--log", action="store_true",
+                        help="whether log will be stored")
+    args = parser.parse_args()
+
+    logging = args.log
+    n_games = args.number
+
+    player_names_to_idx = {
+        "player_0": 0,
+        "player_1": 1,
+        "player_2": 2,
+        "player_3": 3,
+    }
+
+    agents = [
+        MyAgent(),                  # 自作Agent
+        mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
+        mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
+        mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
+    ]
+
+    # 卓の初期化
+    # env_ = mjx.MjxEnv()
+    # obs_dict = env_.reset()
+    dir_paths = []
+    logs = convert_log.ConvertLog()
+    for _ in range(n_games):
+        env_ = mjx.MjxEnv()
+        obs_dict = env_.reset()
+        while not env_.done():
+            actions = {}
+            for player_id, obs in obs_dict.items():
+                actions[player_id] = agents[player_names_to_idx[player_id]].act(
+                    obs)
+            obs_dict = env_.step(actions)
+            if len(obs_dict.keys()) == 4:
+                logs.add_log(obs_dict)
+        returns = env_.rewards()
+        if logging:
+            dir_path = save_log(obs_dict, env_, logs)
+            dir_paths.append(dir_path)
+    print("game has ended")
+    if logging:
+        custom_evaluate.evaluate_from_arg(dir_paths)
+
+    # from mjx import Action, Observation, State
+    # data_path = "json"
+    # with open(data_path) as f:
+    #     lines = f.readlines()
+
+    # for line in lines:
+    #     state = State(line)
+
+    #     for cpp_obs, cpp_act in state._cpp_obj.past_decisions():
+    #         obs = Observation._from_cpp_obj(cpp_obs)
+    #         feature = obs.to_features(feature_name="mjx-small-v0")
+
+    #         action = Action._from_cpp_obj(cpp_act)
+    #         action_idx = action.to_idx()
diff --git a/python/dataset/large.npy b/python/dataset/large.npy
new file mode 100644
index 0000000..b82ceb6
Binary files /dev/null and b/python/dataset/large.npy differ
diff --git a/python/models/fifth_model.pth b/python/models/fifth_model.pth
new file mode 100644
index 0000000..5b6a98b
Binary files /dev/null and b/python/models/fifth_model.pth differ
diff --git a/python/models/fourth_model.pth b/python/models/fourth_model.pth
new file mode 100644
index 0000000..d5e74be
Binary files /dev/null and b/python/models/fourth_model.pth differ
diff --git a/python/models/third_model.pth b/python/models/third_model.pth
new file mode 100644
index 0000000..c5d5dc6
Binary files /dev/null and b/python/models/third_model.pth differ
diff --git a/python/sample_trial.py b/python/sample_trial.py
index 5fa95d3..526354b 100644
--- a/python/sample_trial.py
+++ b/python/sample_trial.py
@@ -2,16 +2,15 @@
 """
 
 import argparse
-import os
-from datetime import datetime
 import json
+import os
 import random
+from datetime import datetime
 
 import mjx
 import mjx.agents
-
-from server import convert_log
 from client.agent import CustomAgentBase
+from server import convert_log
 
 
 # CustomAgentBase を継承して，
@@ -66,7 +65,7 @@ def save_log(obs_dict, env, logs):
     logging = args.log
     n_games = args.number
 
-    player_names_to_idx ={
+    player_names_to_idx = {
         "player_0": 0,
         "player_1": 1,
         "player_2": 2,
@@ -78,7 +77,7 @@ def save_log(obs_dict, env, logs):
         mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
         mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
         mjx.agents.ShantenAgent(),  # mjxに実装されているAgent
-        ]
+    ]
 
     # 卓の初期化
     env_ = mjx.MjxEnv()
@@ -89,12 +88,12 @@ def save_log(obs_dict, env, logs):
         while not env_.done():
             actions = {}
             for player_id, obs in obs_dict.items():
-                actions[player_id] = agents[player_names_to_idx[player_id]].act(obs)
+                actions[player_id] = agents[player_names_to_idx[player_id]].act(
+                    obs)
             obs_dict = env_.step(actions)
-            if len(obs_dict.keys())==4:
+            if len(obs_dict.keys()) == 4:
                 logs.add_log(obs_dict)
         returns = env_.rewards()
         if logging:
             save_log(obs_dict, env_, logs)
     print("game has ended")
-