initial commit
This commit is contained in:
153
miapia_own/pieinfer.py
Normal file
153
miapia_own/pieinfer.py
Normal file
@@ -0,0 +1,153 @@
|
||||
import librosa
|
||||
import numpy as np
|
||||
import argparse
|
||||
|
||||
from parse import parse
|
||||
from scipy.signal import savgol_filter
|
||||
import torch
|
||||
from model import EmoTalk
|
||||
import random
|
||||
import os, subprocess
|
||||
import shlex
|
||||
from munch import Munch
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def test(model, speech_array, sampling_rate):
|
||||
args = Munch(
|
||||
bs_dim=52,
|
||||
feature_dim=832,
|
||||
period=30,
|
||||
device="cuda",
|
||||
model_path="./pretrain_model/EmoTalk.pth",
|
||||
max_seq_len=5000,
|
||||
num_workers=0,
|
||||
batch_size=1,
|
||||
post_processing=True,
|
||||
blender_path="./blender/blender")
|
||||
|
||||
eye1 = np.array([0.36537236, 0.950235724, 0.95593375, 0.916715622, 0.367256105, 0.119113259, 0.025357503])
|
||||
eye2 = np.array([0.234776169, 0.909951985, 0.944758058, 0.777862132, 0.191071674, 0.235437036, 0.089163929])
|
||||
eye3 = np.array([0.870040774, 0.949833691, 0.949418545, 0.695911646, 0.191071674, 0.072576277, 0.007108896])
|
||||
eye4 = np.array([0.000307991, 0.556701422, 0.952656746, 0.942345619, 0.425857186, 0.148335218, 0.017659493])
|
||||
# speech_array, sampling_rate = librosa.load(os.path.join(wav_path), sr=16000)
|
||||
audio = torch.FloatTensor(speech_array).unsqueeze(0).to(args.device)
|
||||
level = torch.tensor([1]).to(args.device)
|
||||
person = torch.tensor([0]).to(args.device)
|
||||
prediction = model.predict(audio, level, person)
|
||||
prediction = prediction.squeeze().detach().cpu().numpy()
|
||||
if args.post_processing:
|
||||
output = np.zeros((prediction.shape[0], prediction.shape[1]))
|
||||
for i in range(prediction.shape[1]):
|
||||
output[:, i] = savgol_filter(prediction[:, i], 5, 2)
|
||||
output[:, 8] = 0
|
||||
output[:, 9] = 0
|
||||
i = random.randint(0, 60)
|
||||
while i < output.shape[0] - 7:
|
||||
eye_num = random.randint(1, 4)
|
||||
if eye_num == 1:
|
||||
output[i:i + 7, 8] = eye1
|
||||
output[i:i + 7, 9] = eye1
|
||||
elif eye_num == 2:
|
||||
output[i:i + 7, 8] = eye2
|
||||
output[i:i + 7, 9] = eye2
|
||||
elif eye_num == 3:
|
||||
output[i:i + 7, 8] = eye3
|
||||
output[i:i + 7, 9] = eye3
|
||||
else:
|
||||
output[i:i + 7, 8] = eye4
|
||||
output[i:i + 7, 9] = eye4
|
||||
time1 = random.randint(60, 180)
|
||||
i = i + time1
|
||||
return output
|
||||
else:
|
||||
return prediction
|
||||
|
||||
|
||||
def render_video(wav_name, model_name):
|
||||
args = Munch(
|
||||
bs_dim=52,
|
||||
feature_dim=832,
|
||||
period=30,
|
||||
device="cuda",
|
||||
model_path="./pretrain_model/EmoTalk.pth",
|
||||
max_seq_len=5000,
|
||||
num_workers=0,
|
||||
batch_size=1,
|
||||
post_processing=True,
|
||||
blender_path="./blender/blender")
|
||||
|
||||
# wav_name = args.wav_path.split('/')[-1].split('.')[0]
|
||||
image_path = os.path.join("./audio", wav_name)
|
||||
os.makedirs(image_path, exist_ok=True)
|
||||
blender_path = args.blender_path
|
||||
|
||||
python_path = f"./{model_name}.py"
|
||||
blend_path = f"./{model_name}.blend"
|
||||
print(python_path, blend_path)
|
||||
# python_path = "./render.py"
|
||||
# blend_path = "./render.blend"
|
||||
cmd = '{} -t 64 -b {} -P {} -- "{}" "{}" '.format(blender_path,
|
||||
blend_path,
|
||||
python_path,
|
||||
"./audio/",
|
||||
wav_name)
|
||||
cmd = shlex.split(cmd)
|
||||
p = subprocess.Popen(cmd,
|
||||
shell=False,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT)
|
||||
while p.poll() is None:
|
||||
line = p.stdout.readline().decode('utf-8')
|
||||
line = line.strip()
|
||||
if line and line.startswith('Saved: '):
|
||||
fname = parse("Saved: '{}'", line).fixed[0]
|
||||
yield fname
|
||||
else:
|
||||
print(line)
|
||||
|
||||
if p.returncode == 0:
|
||||
print('Subprogram success')
|
||||
else:
|
||||
print('Subprogram failed')
|
||||
|
||||
|
||||
def construct_video(wav_name):
|
||||
image_path = os.path.join("./audio", wav_name)
|
||||
os.makedirs(image_path, exist_ok=True)
|
||||
image_temp = image_path + "/%d.png"
|
||||
output_path = os.path.join("./audio", wav_name + ".mp4")
|
||||
cmd = 'ffmpeg -r 30 -i "{}" -i "{}" -pix_fmt yuv420p -s 512x768 "{}" -y'.format(image_temp,
|
||||
f"./audio/{wav_name}.wav",
|
||||
output_path)
|
||||
subprocess.call(cmd, shell=True)
|
||||
cmd = 'rm -rf "{}"'.format(image_path)
|
||||
subprocess.call(cmd, shell=True)
|
||||
|
||||
|
||||
class PieInfer(object):
|
||||
def __init__(self):
|
||||
args = Munch(
|
||||
bs_dim=52,
|
||||
feature_dim=832,
|
||||
period=30,
|
||||
device="cuda",
|
||||
model_path="./pretrain_model/EmoTalk.pth",
|
||||
max_seq_len=5000,
|
||||
num_workers=0,
|
||||
batch_size=1,
|
||||
post_processing=True,
|
||||
blender_path="./blender/blender")
|
||||
#"""
|
||||
model = EmoTalk(args)
|
||||
model.load_state_dict(torch.load(args.model_path, map_location=torch.device(args.device)), strict=False)
|
||||
model = model.to(args.device)
|
||||
model.eval()
|
||||
#"""
|
||||
# model = None
|
||||
self.model = model
|
||||
|
||||
def __call__(self,
|
||||
speech_array,
|
||||
sampling_rate):
|
||||
return test(self.model, speech_array, sampling_rate)
|
||||
Reference in New Issue
Block a user