Skip to content

Commit dae7abe

Browse files
author
Your Name
committed
change data structure and video loader
1 parent 25174e4 commit dae7abe

5 files changed

Lines changed: 164 additions & 148 deletions

File tree

SyncNetInstance.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44

55
import torch
66
import numpy
7-
import time, pdb, argparse, subprocess, os
7+
import time, pdb, argparse, subprocess, os, math, glob
88
import cv2
99
import python_speech_features
1010

1111
from scipy import signal
1212
from scipy.io import wavfile
1313
from SyncNetModel import *
14+
from shutil import rmtree
1415

1516

1617
# ==================== Get OFFSET ====================
@@ -41,21 +42,33 @@ def __init__(self, dropout = 0, num_layers_in_fc_layers = 1024):
4142
def evaluate(self, opt, videofile):
4243

4344
self.__S__.eval();
45+
46+
# ========== ==========
47+
# Convert files
48+
# ========== ==========
49+
50+
if os.path.exists(os.path.join(opt.tmp_dir,opt.reference)):
51+
rmtree(os.path.join(opt.tmp_dir,opt.reference))
52+
53+
os.makedirs(os.path.join(opt.tmp_dir,opt.reference))
54+
55+
command = ("ffmpeg -y -i %s -threads 1 -f image2 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'%06d.jpg')))
56+
output = subprocess.call(command, shell=True, stdout=None)
57+
58+
command = ("ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'audio.wav')))
59+
output = subprocess.call(command, shell=True, stdout=None)
4460

4561
# ========== ==========
4662
# Load video
4763
# ========== ==========
48-
cap = cv2.VideoCapture(videofile)
4964

50-
frame_num = 1;
5165
images = []
52-
while frame_num:
53-
frame_num += 1
54-
ret, image = cap.read()
55-
if ret == 0:
56-
break
66+
67+
flist = glob.glob(os.path.join(opt.tmp_dir,opt.reference,'*.jpg'))
68+
flist.sort()
5769

58-
images.append(image)
70+
for fname in flist:
71+
images.append(cv2.imread(fname))
5972

6073
im = numpy.stack(images,axis=3)
6174
im = numpy.expand_dims(im,axis=0)
@@ -67,12 +80,7 @@ def evaluate(self, opt, videofile):
6780
# Load audio
6881
# ========== ==========
6982

70-
audiotmp = os.path.join(opt.tmp_dir,'audio.wav')
71-
72-
command = ("ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 %s" % (videofile,audiotmp))
73-
output = subprocess.call(command, shell=True, stdout=None)
74-
75-
sample_rate, audio = wavfile.read(audiotmp)
83+
sample_rate, audio = wavfile.read(os.path.join(opt.tmp_dir,opt.reference,'audio.wav'))
7684
mfcc = zip(*python_speech_features.mfcc(audio,sample_rate))
7785
mfcc = numpy.stack([numpy.array(i) for i in mfcc])
7886

@@ -83,15 +91,16 @@ def evaluate(self, opt, videofile):
8391
# Check audio and video input length
8492
# ========== ==========
8593

86-
if (float(len(audio))/16000) < (float(len(images))/25) :
87-
print(" *** WARNING: The audio (%.4fs) is shorter than the video (%.4fs). Type 'cont' to continue. *** "%(float(len(audio))/16000,float(len(images))/25))
88-
pdb.set_trace()
94+
if (float(len(audio))/16000) != (float(len(images))/25) :
95+
print("WARNING: Audio (%.4fs) and video (%.4fs) lengths are different."%(float(len(audio))/16000,float(len(images))/25))
96+
97+
min_length = min(len(images),math.floor(len(audio)/640))
8998

9099
# ========== ==========
91100
# Generate video and audio feats
92101
# ========== ==========
93102

94-
lastframe = len(images)-5
103+
lastframe = min_length-5
95104
im_feat = []
96105
cc_feat = []
97106

demo_syncnet.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
parser.add_argument('--batch_size', type=int, default='20', help='');
1515
parser.add_argument('--vshift', type=int, default='15', help='');
1616
parser.add_argument('--videofile', type=str, default="data/example.avi", help='');
17-
parser.add_argument('--tmp_dir', type=str, default="data", help='');
17+
parser.add_argument('--tmp_dir', type=str, default="data/work/pytmp", help='');
18+
parser.add_argument('--reference', type=str, default="demo", help='');
1819

1920
opt = parser.parse_args();
2021

0 commit comments

Comments
 (0)