-
Notifications
You must be signed in to change notification settings - Fork 38
Open
Description
Hello, when I am using MineCLIP, I don't understand why the results obtained from running the same text and video twice are different, and the numerical difference is huge. In one of my tests, it directly changed from 0.9 to -0.5. Here is my test script, and I have also confirmed the random seed. This result appears to be purely random.
import torch
import hydra
from omegaconf import OmegaConf
from mineclip import MineCLIP
from PIL import Image
from torchvision import transforms
import cv2
import numpy as np
import random
@torch.no_grad()
@hydra.main(config_name="conf", config_path=".", version_base="1.1")
def main(cfg):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
OmegaConf.set_struct(cfg, False)
cfg.pop("ckpt", None)
OmegaConf.set_struct(cfg, True)
model = MineCLIP(**cfg).to(device)
cap = cv2.VideoCapture("obs_0.mp4")
frames_list = []
while(cap.isOpened()):
ret,frame = cap.read()
if not ret:
break
else:
frame = cv2.resize(frame,(160,256))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames_list.append(frame)
cap.release()
result_frames = torch.as_tensor(np.stack(frames_list))
result_frames = result_frames.permute(0,3,1,2)
result_frames = result_frames.reshape(10,3,160,256)
video = result_frames.unsqueeze(0)
video = video.to(device)
image_feats = model.forward_image_features(video)
video_feats = model.forward_video_features(image_feats)
text = ["harvest 1 coal with stone pickaxe"]
text_feats = model.encode_text(text)
reward_scores, _ = model.forward_reward_head(video_feats, text_tokens=text_feats)
print("Reward score between the image and text:", reward_scores.item())
if __name__ == "__main__":
random.seed(0)
main()
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels