first commit

This commit is contained in:
George Kasparyants
2024-06-14 00:47:32 +03:00
commit 7591784e34
31 changed files with 3029 additions and 0 deletions

View File

@@ -0,0 +1,164 @@
import random
import shutil
model_bsList = ["browDownLeft",
"browDownRight",
"browInnerUp",
"browOuterUpLeft",
"browOuterUpRight",
"cheekPuff",
"cheekSquintLeft",
"cheekSquintRight",
"eyeBlinkLeft",
"eyeBlinkRight",
"eyeLookDownLeft",
"eyeLookDownRight",
"eyeLookInLeft",
"eyeLookInRight",
"eyeLookOutLeft",
"eyeLookOutRight",
"eyeLookUpLeft",
"eyeLookUpRight",
"eyeSquintLeft",
"eyeSquintRight",
"eyeWideLeft",
"eyeWideRight",
"jawForward",
"jawLeft",
"jawOpen",
"jawRight",
"mouthClose",
"mouthDimpleLeft",
"mouthDimpleRight",
"mouthFrownLeft",
"mouthFrownRight",
"mouthFunnel",
"mouthLeft",
"mouthLowerDownLeft",
"mouthLowerDownRight",
"mouthPressLeft",
"mouthPressRight",
"mouthPucker",
"mouthRight",
"mouthRollLower",
"mouthRollUpper",
"mouthShrugLower",
"mouthShrugUpper",
"mouthSmileLeft",
"mouthSmileRight",
"mouthStretchLeft",
"mouthStretchRight",
"mouthUpperUpLeft",
"mouthUpperUpRight",
"noseSneerLeft",
"noseSneerRight",
"tongueOut"]
import bpy
import os
import numpy as np
import sys
filename = str(sys.argv[-1])
root_dir = str(sys.argv[-2])
object_name = "MFA_body"
obj = bpy.data.objects[object_name]
bpy.context.scene.render.engine = 'BLENDER_WORKBENCH'
bpy.context.scene.display.shading.light = 'MATCAP'
bpy.context.scene.display.render_aa = 'FXAA'
bpy.context.scene.render.resolution_x = int(512)
bpy.context.scene.render.resolution_y = int(768)
bpy.context.scene.render.fps = 30
bpy.context.scene.render.image_settings.file_format = 'PNG'
cam = bpy.data.objects['0Camera']
cam.scale = [2, 2, 2]
bpy.context.scene.camera = cam
"""
model_bsList = ['Basis',
'0',
'X_postrig',
'X_neck',
'X_head',
'X_eyesfix',
'X_breast',
'X_nails',
'X_pus_conf.1',
'X_pus_assym', 'X_jadafication',
'X_facetweak', 'X_eyeshape',
'A_nipple_in', 'A_nailsmax',
'A_pregnant', 'PAD_breathe',
'PAD_swallow', 'Head',
'cr_neck1', 'cr_neck2',
'cr_neck3.R', 'cr_neck3.L',
'cr_neck4.L', 'cr_neck4.R', 'cr_jaw1', 'cr_jaw2', 'sqz_jaw3', 'cr_brows_dwn', 'cr_brows_up',
'cr_eye_lookdown', 'cr_eye_open',
'cr_eye_look.L', 'cr_eye_look.R', 'cr_mouthmax.L', 'cr_mouthmax.R', 'cr_cheekin.L', 'cr_cheekin.R', 'Body', 'cr_spine',
'cr_spine2', 'cr_spine3', 'cr_spine2.L',
'cr_spine2.R', 'cr_spine4.L', 'cr_spine4.R',
'cr_spine5.L', 'cr_spine5.R', 'cr_lowerspine.bcw',
'cr_lowerspine.fwd', 'size_breastXL.L', 'size_breastXL.R',
'size_breastXS.L', 'size_breastXS.R', 'size_oreola.L',
'size_oreola.R', 'Legs', 'cr_hipout.L', 'cr_hipout.R',
'cr_hipin.L', 'cr_hipin.R', 'cr_pussyflattern',
'cr_hip0.L', 'cr_hip0.R', 'cr_hip1.L', 'cr_hip1.R',
'cr_hip45.L', 'cr_hip45.R', 'sqz_hip1max.L',
'sqz_hip1max.R', 'sqz_hip1vol.L', 'sqz_hip1vol.R',
'sqz_hip1squeeze.L', 'sqz_hip1squeeze.R', 'cr_hip2.L',
'cr_hip2.R', 'sqz_hip2.L', 'sqz_hip2.R', 'cr_hip3.L',
'cr_hip3.R', 'sqz_buttrest.L', 'sqz_buttrest.R',
'cr_knee45.L', 'cr_knee45.R', 'cr_knee.L', 'cr_knee.R',
'sqz_knee.L', 'sqz_knee.R', 'sqz_stance.L', 'sqz_stance.R',
'cr_buttheart.L', 'cr_buttheart.R', 'rest_buttcheek.L',
'rest_buttcheek.R', 'rest_knee.L', 'rest_knee.R', 'rest_knee_fat.L',
'rest_knee_fat.R', 'rest_hip.L', 'rest_hip.R', 'vol_butt.L',
'vol_butt.R', 'Feet', 'cr_feet1.L', 'cr_feet1.R', 'cr_feet2.L',
'cr_feet2.R', 'cr_feet3.L', 'cr_feet3.R', 'cr_toe1.L', 'cr_toe1.R',
'cr_toe2.L', 'cr_toe2.R', 'Arms', 'cr_arm-up.L', 'cr_arm-up.R',
'cr_arm-fwd.L', 'cr_arm-fwd.R', 'cr_arm-dwn.L', 'cr_arm-dwn.R',
'sqz_arm-fwd.L', 'sqz_arm-fwd.R', 'sqz_armpit.L', 'sqz_armpit.R',
'sqz_arm-bcw.L', 'sqz_arm-bcw.R', 'sqz_arm-bcw_max.L',
'sqz_arm-bcw_max.R', 'cr_arm-trc.L', 'cr_arm-trc.R',
'D_cr_elbow.L', 'U_cr_elbow.L', 'D_cr_elbow.R', 'U_cr_elbow.R',
'D_sqz_elbowMax.L', 'U_sqz_elbowMax.L', 'D_sqz_elbowMax.R',
'U_sqz_elbowMax.R', 'cr_armrest.L', 'cr_armrest.R',
'cr_shoulder_fwd.L', 'cr_shoulder_fwd.R', 'cr_shoulder_bcw.L',
'cr_shoulder_bcw.R', 'cr_shoulder_dwn.L', 'cr_shoulder_dwn.R',
'cr_shoulder_up.L', 'cr_shoulder_up.R', 'rest_elbow.L', 'rest_elbow.R',
'Hands', 'cr_hand1.L', 'cr_hand1.R',
'cr_hand2.L', 'cr_hand2.R', 'cr_handtwistU.L', 'cr_handtwistU.R',
'cr_handtwistD.L',
'cr_handtwistD.R',
'cr_thumb.01.L', 'cr_thumb.01.R',
'cr_f_index.01.L', 'cr_f_index.01.R', 'cr_f_index.02.L',
'cr_f_index.02.R',
'cr_f_middle.01.L', 'cr_f_middle.01.R', 'cr_f_middle.02.L',
'cr_f_middle.02.R', 'cr_f_ring.01.L', 'cr_f_ring.01.R',
'cr_f_ring.02.L', 'cr_f_ring.02.R', 'cr_f_pinky.01.L',
'cr_f_pinky.01.R', 'cr_f_pinky.02.L', 'cr_f_pinky.02.R', 'EM',
'em_eye_close.L', 'em_eye_close.R', 'em_eye_half.L', 'em_eye_half.R',
'em_smile_open', 'em_smile_close', 'em_kiss', 'em_disg', 'em_blow',
'em_surprise', 'em_sad', 'em_frown', 'PH', 'ph_+', 'ph_bpm',
'ph_fv', 'ph_ou',
'ph_e', 'ph_r', 'ph_ch', 'ph_th', 'ph_a']"""
model_bsList = list(obj.data.shape_keys.key_blocks.keys())
# print(obj.data.shape_keys.key_blocks.keys())
output_dir = root_dir + filename
blendshape_path = root_dir + filename + '.npy'
result = []
bs = np.load(blendshape_path)
for i in range(10):
for kp_name in model_bsList:
obj.data.shape_keys.key_blocks[kp_name].value = random.random()
bpy.context.scene.render.filepath = os.path.join(output_dir,
'{}.png'.format(i))
bpy.ops.render.render(write_still=True)

0
miapia_own/__init__.py Normal file
View File

57
miapia_own/a.py Normal file
View File

@@ -0,0 +1,57 @@
import bpy
import os
import numpy as np
import sys
filename = str(sys.argv[-1])
root_dir = str(sys.argv[-2])
object_name = "MFA_body"
obj = bpy.data.objects[object_name]
bpy.context.scene.render.engine = 'BLENDER_WORKBENCH'
bpy.context.scene.display.shading.light = 'MATCAP'
bpy.context.scene.display.render_aa = 'FXAA'
bpy.context.scene.render.resolution_x = int(512)
bpy.context.scene.render.resolution_y = int(768)
bpy.context.scene.render.fps = 30
bpy.context.scene.render.image_settings.file_format = 'PNG'
cam = bpy.data.objects['0Camera']
cam.scale = [2, 2, 2]
bpy.context.scene.camera = cam
model_bsList = ['Basis',
'0',
'X_postrig',
'X_neck',
'X_head',
'X_eyesfix',
'X_breast',
'X_nails',
'X_pus_conf.1',
'X_pus_assym', 'X_jadafication',
'X_facetweak', 'X_eyeshape',
'A_nipple_in', 'A_nailsmax',
'A_pregnant', 'PAD_breathe',
'PAD_swallow', 'Head',
'cr_neck1', 'cr_neck2',
'cr_neck3.R', 'cr_neck3.L',
'cr_neck4.L', 'cr_neck4.R', 'cr_jaw1', 'cr_jaw2', 'sqz_jaw3', 'cr_brows_dwn', 'cr_brows_up',
'cr_eye_lookdown', 'cr_eye_open',
'cr_eye_look.L', 'cr_eye_look.R', 'cr_mouthmax.L', 'cr_mouthmax.R', 'cr_cheekin.L', 'cr_cheekin.R', 'Body', 'cr_spine', 'cr_spine2', 'cr_spine3', 'cr_spine2.L', 'cr_spine2.R', 'cr_spine4.L', 'cr_spine4.R', 'cr_spine5.L', 'cr_spine5.R', 'cr_lowerspine.bcw', 'cr_lowerspine.fwd', 'size_breastXL.L', 'size_breastXL.R', 'size_breastXS.L', 'size_breastXS.R', 'size_oreola.L', 'size_oreola.R', 'Legs', 'cr_hipout.L', 'cr_hipout.R', 'cr_hipin.L', 'cr_hipin.R', 'cr_pussyflattern', 'cr_hip0.L', 'cr_hip0.R', 'cr_hip1.L', 'cr_hip1.R', 'cr_hip45.L', 'cr_hip45.R', 'sqz_hip1max.L', 'sqz_hip1max.R', 'sqz_hip1vol.L', 'sqz_hip1vol.R', 'sqz_hip1squeeze.L', 'sqz_hip1squeeze.R', 'cr_hip2.L', 'cr_hip2.R', 'sqz_hip2.L', 'sqz_hip2.R', 'cr_hip3.L', 'cr_hip3.R', 'sqz_buttrest.L', 'sqz_buttrest.R', 'cr_knee45.L', 'cr_knee45.R', 'cr_knee.L', 'cr_knee.R', 'sqz_knee.L', 'sqz_knee.R', 'sqz_stance.L', 'sqz_stance.R', 'cr_buttheart.L', 'cr_buttheart.R', 'rest_buttcheek.L', 'rest_buttcheek.R', 'rest_knee.L', 'rest_knee.R', 'rest_knee_fat.L', 'rest_knee_fat.R', 'rest_hip.L', 'rest_hip.R', 'vol_butt.L', 'vol_butt.R', 'Feet', 'cr_feet1.L', 'cr_feet1.R', 'cr_feet2.L', 'cr_feet2.R', 'cr_feet3.L', 'cr_feet3.R', 'cr_toe1.L', 'cr_toe1.R', 'cr_toe2.L', 'cr_toe2.R', 'Arms', 'cr_arm-up.L', 'cr_arm-up.R', 'cr_arm-fwd.L', 'cr_arm-fwd.R', 'cr_arm-dwn.L', 'cr_arm-dwn.R', 'sqz_arm-fwd.L', 'sqz_arm-fwd.R', 'sqz_armpit.L', 'sqz_armpit.R', 'sqz_arm-bcw.L', 'sqz_arm-bcw.R', 'sqz_arm-bcw_max.L', 'sqz_arm-bcw_max.R', 'cr_arm-trc.L', 'cr_arm-trc.R', 'D_cr_elbow.L', 'U_cr_elbow.L', 'D_cr_elbow.R', 'U_cr_elbow.R', 'D_sqz_elbowMax.L', 'U_sqz_elbowMax.L', 'D_sqz_elbowMax.R', 'U_sqz_elbowMax.R', 'cr_armrest.L', 'cr_armrest.R', 'cr_shoulder_fwd.L', 'cr_shoulder_fwd.R', 'cr_shoulder_bcw.L', 'cr_shoulder_bcw.R', 'cr_shoulder_dwn.L', 'cr_shoulder_dwn.R', 'cr_shoulder_up.L', 'cr_shoulder_up.R', 'rest_elbow.L', 'rest_elbow.R', 'Hands', 'cr_hand1.L', 'cr_hand1.R', 'cr_hand2.L', 'cr_hand2.R', 'cr_handtwistU.L', 'cr_handtwistU.R', 'cr_handtwistD.L', 'cr_handtwistD.R', 'cr_thumb.01.L', 'cr_thumb.01.R', 'cr_f_index.01.L', 'cr_f_index.01.R', 'cr_f_index.02.L', 'cr_f_index.02.R', 'cr_f_middle.01.L', 'cr_f_middle.01.R', 'cr_f_middle.02.L', 'cr_f_middle.02.R', 'cr_f_ring.01.L', 'cr_f_ring.01.R', 'cr_f_ring.02.L', 'cr_f_ring.02.R', 'cr_f_pinky.01.L', 'cr_f_pinky.01.R', 'cr_f_pinky.02.L', 'cr_f_pinky.02.R', 'EM', 'em_eye_close.L', 'em_eye_close.R', 'em_eye_half.L', 'em_eye_half.R', 'em_smile_open', 'em_smile_close', 'em_kiss', 'em_disg', 'em_blow', 'em_surprise', 'em_sad', 'em_frown', 'PH', 'ph_+', 'ph_bpm', 'ph_fv', 'ph_ou', 'ph_e', 'ph_r', 'ph_ch', 'ph_th', 'ph_a']
# print(obj.data.shape_keys.key_blocks.keys())
output_dir = root_dir + filename
blendshape_path = root_dir + filename + '.npy'
result = []
bs = np.load(blendshape_path)
for i in range(bs.shape[0]):
obj.data.shape_keys.key_blocks['cr_eye_open'].value = i / bs.shape[0]
bpy.context.scene.render.filepath = os.path.join(output_dir,
'{}.png'.format(i))
bpy.ops.render.render(write_still=True)

36
miapia_own/aihandler.py Normal file
View File

@@ -0,0 +1,36 @@
import requests
class AIHandler(object):
def __init__(self):
pass
def __call__(self, text):
resp = requests.post("https://fast-pia.avemio.technology/chat-completion",
json={
"session-id": "chatcmpl",
"user-location": "Zweibrücken",
"wheel-of-life": [
{
"personal_growth": 10,
"health_exercise": 5,
"familiy_friends": 5,
"romance_relationship": 5,
"career_work": 5,
"finances": 5,
"recreation_fun": 5,
"living_situation": 5}
],
"messages": [
{
"role": "user",
"content": text
}
]
})
resp = resp.json()
return {
"text": resp[0]['text'],
"emotion": resp[0]['emotion']
}

View File

@@ -0,0 +1,36 @@
from time import time
import openai
openai.api_key = "sk-proj-PdTVVVvYzcd6vs2qcRxpT3BlbkFJtq78XfSrzwEK2fqyOVHE"
import requests
class AIHandlerStream(object):
def __init__(self):
self.ai = openai.OpenAI(api_key="sk-proj-GaouEG2QuAcfAr1an2uBT3BlbkFJaIh0XVFXWrYQpJazlbeO")
def __call__(self, text):
out = ""
for chunk in self.ai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are PIA. You talk with short sentences. And help people."},
{"role": "user", "content": text}
], stream=True
):
delta = chunk.choices[0].delta.content
if delta is None:
continue
out += delta
if len(out) > 0 and out[-1] in ['.', '!', ',', '?']:
yield out
out = ""
if len(out) > 0:
yield out
if __name__ == "__main__":
aihandler = AIHandlerStream()
t1 = time()
for text in aihandler("Hello, how are you, what is your name?"):
print(time() - t1)
print(text)

571
miapia_own/main.py Normal file
View File

@@ -0,0 +1,571 @@
import json
import sys
import re
from time import sleep, time
import logging
from collections import defaultdict
import pandas as pd
from flask import redirect
import argparse
import base64
from flask import send_file, Response, request, jsonify
from flask_socketio import emit
from piedemo.fields.ajax_group import AjaxChatField, AjaxGroup
from piedemo.fields.grid import VStack, HStack, SpaceField
from piedemo.fields.inputs.hidden import InputHiddenField
from piedemo.fields.outputs.colored_text import ptext, OutputColoredTextField
from piedemo.fields.outputs.json import OutputJSONField
from piedemo.fields.outputs.progress import ProgressField
from piedemo.fields.outputs.video import OutputVideoField
from piedemo.hub.swagger_utils.method import describe, check_missing_keys
from piedemo.web import Web
import os
import io
from piedemo.page import Page
from piedemo.hub.svgpil import SVGImage
from piedemo.fields.outputs.table import OutputTableField
from piedemo.fields.inputs.int_list import InputIntListField
from piedemo.fields.navigation import Navigation
from piedemo.fields.inputs.chat import ChatField
import librosa
import uuid
import numpy as np
import redis
import argparse
from scipy.signal import savgol_filter
import torch
import random
import os, subprocess
import shlex
import uuid
from tqdm import tqdm
from aihandler import AIHandler
from aihandler_stream import AIHandlerStream
from pieinfer import PieInfer, render_video, construct_video
import torch
from TTS.api import TTS
logging.getLogger('socketio').setLevel(logging.ERROR)
logging.getLogger('engineio').setLevel(logging.ERROR)
target_names = [
"mouthSmileLeft",
"mouthSmileRight",
"mouthStretchLeft",
"mouthStretchRight",
"mouthUpperUpLeft",
"mouthUpperUpRight",
]
model_bsList = ["browDownLeft",
"browDownRight",
"browInnerUp",
"browOuterUpLeft",
"browOuterUpRight",
"cheekPuff",
"cheekSquintLeft",
"cheekSquintRight",
"eyeBlinkLeft",
"eyeBlinkRight",
"eyeLookDownLeft",
"eyeLookDownRight",
"eyeLookInLeft",
"eyeLookInRight",
"eyeLookOutLeft",
"eyeLookOutRight",
"eyeLookUpLeft",
"eyeLookUpRight",
"eyeSquintLeft",
"eyeSquintRight",
"eyeWideLeft",
"eyeWideRight",
"jawForward",
"jawLeft",
"jawOpen",
"jawRight",
"mouthClose",
"mouthDimpleLeft",
"mouthDimpleRight",
"mouthFrownLeft",
"mouthFrownRight",
"mouthFunnel",
"mouthLeft",
"mouthLowerDownLeft",
"mouthLowerDownRight",
"mouthPressLeft",
"mouthPressRight",
"mouthPucker",
"mouthRight",
"mouthRollLower",
"mouthRollUpper",
"mouthShrugLower",
"mouthShrugUpper",
"mouthSmileLeft",
"mouthSmileRight",
"mouthStretchLeft",
"mouthStretchRight",
"mouthUpperUpLeft",
"mouthUpperUpRight",
"noseSneerLeft",
"noseSneerRight",
"tongueOut"]
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
blendshapes_path = "./blendshapes"
def get_asset(fname):
return SVGImage.open(os.path.join(os.path.dirname(__file__),
"assets",
fname)).svg_content
class MainPage(Page):
def __init__(self, model_name: str):
super(MainPage, self).__init__()
self.infer = PieInfer()
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
self.r = redis.Redis(host='localhost', port=6379, decode_responses=True)
self.aihandler = AIHandler()
self.aihandler_stream = AIHandlerStream()
self.fields = Navigation(AjaxGroup("ChatGroup", VStack([
HStack([
AjaxChatField("Chat",
self.register_ajax(f"/refresh_{model_name}",
self.message_sent),
deps_names=["sid",
"session_id",
"Chat",
"Chat__piedemo__file"],
use_socketio_support=True,
nopie=True,
style={
"height": "100%"
}),
OutputColoredTextField("video",
nopie=True,
use_socketio_support=True),
], xs=[8, 4]),
ProgressField("progress",
nopie=True,
use_socketio_support=True),
InputHiddenField("session_id", None),
]), no_return=True), no_submit=True, page_title="MIA PIA", page_style={
})
self.fields.add_link("SIMPLE",
"/simple",
active=model_name == "render")
self.fields.add_link("MIA PIA",
"/nice",
active=model_name != "render")
self.model_name = model_name
def get_content(self, **kwargs):
fields = self.fields.copy()
fields.child_loc["Chat"].set_default_options(["Hello! What is your name?", "Say one word and stop."])
"""
fields.child_loc["Chat"].set_avatars({
"self": get_asset("avatar.svg"),
"ChatGPT": get_asset("dog.svg"),
})
"""
session_id = str(uuid.uuid4())
return self.fill(fields, {
"video": f"""
""",
"session_id": session_id,
})
def message_sent(self, **data):
sid = data['sid']
self.emit(self.fields.child_loc["Chat"].clear_input(),
to=sid)
self.emit(self.fields.child_loc["video"].update(f"""
"""))
data = self.parse(self.fields, data)
session_id = data['session_id']
messages_map = self.r.hgetall(f'user-session:{session_id}')
messages = [self.fields.child_loc["Chat"].format_message("self" if i % 2 == 0 else "ChatGPT",
messages_map[f"message_{i}"])
for i in range(len(messages_map))]
print("history: ", messages)
text = data['Chat']['text']
self.emit(self.fields.child_loc["Chat"].update(messages + [
self.fields.child_loc["Chat"].format_message("self", text),
self.fields.child_loc["Chat"].format_message("ChatGPT", "Generating text..."),
]), to=sid)
output = self.aihandler(text)
output_text = output['text']
output_emotion = output['emotion']
messages_map[f"message_{len(messages)}"] = text
messages_map[f"message_{len(messages) + 1}"] = output_text
self.r.hset(f'user-session:{session_id}', mapping=messages_map)
self.emit(self.fields.child_loc["Chat"].update(messages + [
self.fields.child_loc["Chat"].format_message("self", text),
self.fields.child_loc["Chat"].format_message("ChatGPT", "Generating audio..."),
]), to=sid)
self.tts.tts_to_file(text=output_text,
speaker_wav="/home/ubuntu/repo/of_couse_here.wav",
language="en",
emotion=output_emotion,
file_path=f"./audio/{session_id}.wav")
speech_array, sampling_rate = librosa.load(f"./audio/{session_id}.wav",
sr=16000)
output = self.infer(speech_array, sampling_rate)
np.save(os.path.join("./audio", "{}.npy".format(session_id)),
output)
self.emit(self.fields.child_loc["Chat"].update(messages + [
self.fields.child_loc["Chat"].format_message("self", text),
self.fields.child_loc["Chat"].format_message("ChatGPT", "Rendering..."),
]), to=sid)
n = output.shape[0]
for i, fname in enumerate(tqdm(render_video(f"{session_id}",
model_name=self.model_name),
total=n)):
print("Got frame: ", fname, file=sys.stderr)
self.emit(self.fields.child_loc["progress"].update(100 * i // n),
to=sid)
construct_video(session_id)
self.emit(self.fields.child_loc["video"].update(f"""
<video controls="1" autoplay="1" name="media" style="border-radius: 12px; height: 80%">
<source src="/api/video/{session_id}" type="video/mp4">
</video>
"""), to=sid)
'''self.emit(self.fields.child_loc["video"].update(f"""
<img name="media" style="border-radius: 12px; height: 80%" src="/api/video/stream/{session_id}"></img>
"""))'''
self.emit(self.fields.child_loc["Chat"].update(messages + [
self.fields.child_loc["Chat"].format_message("self", text),
self.fields.child_loc["Chat"].format_message("ChatGPT", output_text),
]), to=sid)
page = MainPage("render")
web = Web({
"": "simple",
"simple": page,
"nice": page,
}, use_socketio_support=True)
host = '0.0.0.0'
port = 8011
debug = False
app = web.get_app()
@app.route("/api/video/<session_id>", methods=["GET"])
def get_video(session_id):
return send_file("./audio/{}.mp4".format(session_id))
def gen(session_id):
for image_path in render_video(f"{session_id}"):
with open(image_path, 'rb') as f:
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + f.read() + b'\r\n')
construct_video(session_id)
@app.route("/api/video/stream/<session_id>", methods=["GET"])
def get_video_async(session_id):
return Response(gen(session_id),
mimetype='multipart/x-mixed-replace; boundary=frame')
speaker_path = "/home/ubuntu/repo/female.wav"
@app.route("/api/set_speaker", methods=["POST"])
@describe(["3dmodel"],
name="Set emotion for 3D model",
description="""Set speaker for 3D model""",
inputs={
"user_id": "This ID from article Unique Identifier for iPHONE",
"speaker": "voice1 or voice2"
},
outputs={
"status": "ok"
})
@check_missing_keys([
("user_id", {"status": "error", "status_code": "missing_user_id_error"}),
("speaker", {"status": "error", "status_code": "missing_emotion_error"}),
])
def set_speaker():
speaker = request.json.get("speaker")
user_id = request.json.get("user_id")
SPEAKER[user_id] = speaker
return jsonify({
'status': 'ok'
})
@app.route("/api/set_emotion", methods=["POST"])
@describe(["3dmodel"],
name="Set emotion for 3D model",
description="""Set emotion for 3D model""",
inputs={
"user_id": "This ID from article Unique Identifier for iPHONE",
"emotion": "sad"
},
outputs={
"status": "ok"
})
@check_missing_keys([
("user_id", {"status": "error", "status_code": "missing_user_id_error"}),
("emotion", {"status": "error", "status_code": "missing_emotion_error"}),
])
def set_emotion():
emotion = request.json.get("emotion")
user_id = request.json.get("user_id")
EMOTIONS[user_id] = emotion
return jsonify({
'status': 'ok'
})
@app.route("/api/get_texts", methods=["POST"])
@describe(["text"],
name="Get texts for user_id",
description="""This endpoint get all texts for current iPhone""",
inputs={
"user_id": "This ID from article Unique Identifier for iPHONE"
},
outputs={
"text": "Output",
"id": "bot or user",
})
@check_missing_keys([
("user_id", {"status": "error", "status_code": "missing_user_id_error"}),
])
def get_texts():
user_id = request.json.get("user_id")
return jsonify(TEXTS[user_id])
@app.route("/api/send_text", methods=["POST"])
@describe(["text"],
name="Sent text to miapia",
description="""This endpoint sends texts for client""",
inputs={
"text": "Hello, MIAPIA",
"user_id": "This ID from article Unique Identifier for iPHONE"
},
outputs={
"status": "ok"
})
@check_missing_keys([
("text", {"status": "error", "status_code": "missing_text_error"}),
("user_id", {"status": "error", "status_code": "missing_user_id_error"}),
])
def send_text():
user_id = request.json.get("user_id")
text = request.json.get("text", "")
TEXTS[user_id].append({
"id": 'user',
"text": text
})
output_texts = page.aihandler_stream(text)
bot_text = ""
for output_text in output_texts:
bot_text += " " + output_text
TEXTS[user_id].append({
"id": 'bot',
"text": bot_text
})
return jsonify({
"status": "ok",
"messages": TEXTS[user_id]
})
io = web.get_socketio(app,
engineio_logger=False)
head_memories = {}
TEXTS = defaultdict(list)
EMOTIONS = {}
SPEAKER = {}
def get_event(name, value, timestamp):
return {
"index": model_bsList.index(name),
"value": value,
"timestamp": timestamp
}
def get_value(events, name):
index = model_bsList.index(name)
events = [event for event in events
if event['index'] == index]
if len(events) == 0:
return None
return events[-1]['value']
def get_head_memory():
ids = [100, 101, 103, 104, 106, 107, 109, 110]
return [[0, 0, 1] for _ in range(len(ids))]
def get_head_rotations(alpha, duration, memory, sign):
ids = [100, 101, 103, 104, 106, 107, 109, 110]
for _ in range(3):
index = ids.index(random.choice(ids))
step = 0.01 * sign[index]
memory[index][0] += step
memory[index][0] = min(memory[index][0], memory[index][2])
memory[index][0] = max(memory[index][0], memory[index][1])
print(memory)
return [{
"index": j,
"value": memory[i][0],
"timestamp": float(duration * alpha)
} for i, j in enumerate(ids)], memory
def perform_on_text(output_text, sid, head_memory, sign, voice):
session_id = str(uuid.uuid4())
page.tts.tts_to_file(text=output_text,
speaker_wav="/home/ubuntu/repo/female.wav" if voice == "voice1" else "/home/ubuntu/repo/indian.wav",
language="en",
emotion="Happy",
file_path=f"./audio/{session_id}.wav")
audio_path = f"./audio/{session_id}.wav"
with open(audio_path, 'rb') as f:
audio_content = f.read()
encode_string = base64.b64encode(audio_content).decode('utf-8')
speech_array, sampling_rate = librosa.load(audio_path,
sr=16000)
duration = librosa.get_duration(y=speech_array,
sr=sampling_rate)
output = page.infer(speech_array, sampling_rate)
emit("io_push_audio_blob", {
"dataURL": f"base64,{encode_string}"
}, to=sid)
print("Sent audio.")
emit("io_set_size", {
"size": output.shape[0],
}, to=sid)
t1 = time()
for i in tqdm(range(output.shape[0])):
rots, head_memory = get_head_rotations((i / output.shape[0]), duration, head_memory, sign)
blendshapes_i = [{
"index": j,
"value": output[i, j],
"timestamp": float(duration * (i / output.shape[0]))
} for j in range(output.shape[1])] + rots
if max([get_value(blendshapes_i, target_name)
for target_name in target_names]) > 0.5:
os.makedirs(blendshapes_path,
exist_ok=True)
save_blendshapes_i = os.path.join(blendshapes_path,
str(uuid.uuid4()) + '.json')
with open(save_blendshapes_i, 'w') as f:
json.dump(blendshapes_i, f)
emit("io_set_coef", blendshapes_i, to=sid)
# sleep(0.1 * duration / output.shape[0])
t2 = time()
sleep(max(0., duration - (t2 - t1)))
return head_memory
def perform_surgery(sid, duration=5):
with open("../5-seconds-of-silence.wav", 'rb') as f:
audio_content = f.read()
encode_string = base64.b64encode(audio_content).decode('utf-8')
fps = 20
emit("io_push_audio_blob", {
"dataURL": f"base64,{encode_string}"
}, to=sid)
print("Sent audio.")
emit("io_set_size", {
"size": (fps * duration)
}, to=sid)
t1 = time()
for i in tqdm(range(fps * duration)):
alpha = float(i / (fps * duration))
emit("io_set_coef", [
get_event("eyeWideLeft",
0.3 - 0.3 * alpha,
float(duration * alpha)),
get_event("eyeWideRight",
0.3 - 0.3 * alpha,
float(duration * alpha))
], to=sid)
t2 = time()
sleep(max(0., duration - (t2 - t1)))
@io.on("io_set_text")
def io_set_text(data):
data = json.loads(data)
data = data[0]
sid = None
print(data, file=sys.stderr)
if "text" not in data:
emit("io_error", {"message": "Text not found"},
to=sid)
return
text = data["text"]
"""if "user_id" not in data:
emit("io_error", {"message": "User not found"},
to=sid)
return"""
user_id = data.get('user_id')
print(user_id)
TEXTS[user_id].append({
"id": "user",
"text": text
})
voice = SPEAKER.get(user_id, "voice1")
if sid not in head_memories:
head_memories[sid] = get_head_memory()
head_memory = head_memories[sid]
# output_texts = [page.aihandler(text)['text']]
output_texts = page.aihandler_stream(text)
bot_text = ""
for output_text in output_texts:
sign = [2 * (random.random() > 0.5) - 1
for _ in range(8)]
head_memory = perform_on_text(output_text, sid, head_memory,
sign=sign,
voice=voice)
bot_text += " " + output_text
print("SURGERY STARTED!")
# perform_surgery(sid)
print("SURGERY ENDED!")
TEXTS[user_id].append({
"id": "bot",
"text": bot_text
})
emit("io_finish", {}, to=sid)
io.run(app,
host=host, port=port, debug=debug,
allow_unsafe_werkzeug=True)

154
miapia_own/pieinfer.py Normal file
View File

@@ -0,0 +1,154 @@
import librosa
import numpy as np
import argparse
from torch import cuda
from parse import parse
from scipy.signal import savgol_filter
import torch
from model import EmoTalk
import random
import os, subprocess
import shlex
from munch import Munch
@torch.no_grad()
def test(model, speech_array, sampling_rate):
args = Munch(
bs_dim=52,
feature_dim=832,
period=30,
device="cuda",
model_path="./pretrain_model/EmoTalk.pth",
max_seq_len=5000,
num_workers=0,
batch_size=1,
post_processing=True,
blender_path="./blender/blender")
eye1 = np.array([0.36537236, 0.950235724, 0.95593375, 0.916715622, 0.367256105, 0.119113259, 0.025357503])
eye2 = np.array([0.234776169, 0.909951985, 0.944758058, 0.777862132, 0.191071674, 0.235437036, 0.089163929])
eye3 = np.array([0.870040774, 0.949833691, 0.949418545, 0.695911646, 0.191071674, 0.072576277, 0.007108896])
eye4 = np.array([0.000307991, 0.556701422, 0.952656746, 0.942345619, 0.425857186, 0.148335218, 0.017659493])
# speech_array, sampling_rate = librosa.load(os.path.join(wav_path), sr=16000)
audio = torch.FloatTensor(speech_array).unsqueeze(0).to(args.device)
level = torch.tensor([1]).to(args.device)
person = torch.tensor([0]).to(args.device)
prediction = model.predict(audio, level, person)
prediction = prediction.squeeze().detach().cpu().numpy()
if args.post_processing:
output = np.zeros((prediction.shape[0], prediction.shape[1]))
for i in range(prediction.shape[1]):
output[:, i] = savgol_filter(prediction[:, i], 5, 2)
output[:, 8] = 0
output[:, 9] = 0
i = random.randint(0, 60)
while i < output.shape[0] - 7:
eye_num = random.randint(1, 4)
if eye_num == 1:
output[i:i + 7, 8] = eye1
output[i:i + 7, 9] = eye1
elif eye_num == 2:
output[i:i + 7, 8] = eye2
output[i:i + 7, 9] = eye2
elif eye_num == 3:
output[i:i + 7, 8] = eye3
output[i:i + 7, 9] = eye3
else:
output[i:i + 7, 8] = eye4
output[i:i + 7, 9] = eye4
time1 = random.randint(60, 180)
i = i + time1
return output
else:
return prediction
def render_video(wav_name, model_name):
args = Munch(
bs_dim=52,
feature_dim=832,
period=30,
device="cuda",
model_path="./pretrain_model/EmoTalk.pth",
max_seq_len=5000,
num_workers=0,
batch_size=1,
post_processing=True,
blender_path="./blender/blender")
# wav_name = args.wav_path.split('/')[-1].split('.')[0]
image_path = os.path.join("./audio", wav_name)
os.makedirs(image_path, exist_ok=True)
blender_path = args.blender_path
python_path = f"./{model_name}.py"
blend_path = f"./{model_name}.blend"
print(python_path, blend_path)
# python_path = "./render.py"
# blend_path = "./render.blend"
cmd = '{} -t 64 -b {} -P {} -- "{}" "{}" '.format(blender_path,
blend_path,
python_path,
"./audio/",
wav_name)
cmd = shlex.split(cmd)
p = subprocess.Popen(cmd,
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
while p.poll() is None:
line = p.stdout.readline().decode('utf-8')
line = line.strip()
if line and line.startswith('Saved: '):
fname = parse("Saved: '{}'", line).fixed[0]
yield fname
else:
print(line)
if p.returncode == 0:
print('Subprogram success')
else:
print('Subprogram failed')
def construct_video(wav_name):
image_path = os.path.join("./audio", wav_name)
os.makedirs(image_path, exist_ok=True)
image_temp = image_path + "/%d.png"
output_path = os.path.join("./audio", wav_name + ".mp4")
cmd = 'ffmpeg -r 30 -i "{}" -i "{}" -pix_fmt yuv420p -s 512x768 "{}" -y'.format(image_temp,
f"./audio/{wav_name}.wav",
output_path)
subprocess.call(cmd, shell=True)
cmd = 'rm -rf "{}"'.format(image_path)
subprocess.call(cmd, shell=True)
class PieInfer(object):
def __init__(self):
args = Munch(
bs_dim=52,
feature_dim=832,
period=30,
device="cuda" if cuda.is_available() else "cpu",
model_path="./pretrain_model/EmoTalk.pth",
max_seq_len=5000,
num_workers=0,
batch_size=1,
post_processing=True,
blender_path="./blender/blender")
#"""
model = EmoTalk(args)
model.load_state_dict(torch.load(args.model_path, map_location=torch.device(args.device)), strict=False)
model = model.to(args.device)
model.eval()
#"""
# model = None
self.model = model
def __call__(self,
speech_array,
sampling_rate):
return test(self.model, speech_array, sampling_rate)

View File

@@ -0,0 +1,78 @@
import sys
import pandas as pd
import argparse
import base64
from flask import send_file, Response
from flask_socketio import emit
from piedemo.fields.ajax_group import AjaxChatField, AjaxGroup
from piedemo.fields.grid import VStack, HStack, SpaceField
from piedemo.fields.inputs.hidden import InputHiddenField
from piedemo.fields.outputs.colored_text import ptext, OutputColoredTextField
from piedemo.fields.outputs.json import OutputJSONField
from piedemo.fields.outputs.progress import ProgressField
from piedemo.fields.outputs.video import OutputVideoField
from piedemo.web import Web
from piedemo.page import Page
from piedemo.hub.svgpil import SVGImage
from piedemo.fields.outputs.table import OutputTableField
from piedemo.fields.inputs.int_list import InputIntListField
from piedemo.fields.navigation import Navigation
from piedemo.fields.inputs.chat import ChatField
import librosa
import uuid
import numpy as np
import redis
import argparse
from scipy.signal import savgol_filter
import torch
import random
import os, subprocess
import shlex
from tqdm import tqdm
class MainPage(Page):
def __init__(self, model_name: str):
super(MainPage, self).__init__()
web = Web({
"": "simple",
"simple": MainPage("render"),
# "nice": MainPage("FemAdv_b350_V2_050523"),
}, use_socketio_support=True)
host = '0.0.0.0'
port = 8011
debug = False
app = web.get_app()
io = web.get_socketio(app)
@io.on("io_set_text")
def io_set_text(data):
sid = None
if "text" not in data:
emit("io_error", {"message": "Text not found"},
to=sid)
encode_string = base64.b64encode(open("../feeling_good.wav", "rb").read())
for i in range(10):
j = random.randint(0, 2)
emit("io_set_coef", [{
"index": j,
"value": i / 10,
}], to=sid)
emit("io_push_audio_blob", {
"dataURL": f"base64,{encode_string}"
}, to=sid)
emit("io_finish", {}, to=sid)
io.run(app,
host=host, port=port, debug=debug,
allow_unsafe_werkzeug=True)