first commit

2024-06-14 00:47:32 +03:00
commit 7591784e34
31 changed files with 3029 additions and 0 deletions
--- a/audio2face.py
+++ b/audio2face.py
@@ -0,0 +1,50 @@
+# speech to Audio2Face module utilizing the gRPC protocal from audio2face_streaming_utils
+import io
+from pydub import AudioSegment
+from scipy.io.wavfile import read
+import numpy as np
+from audio2face_streaming_utils import push_audio_track
+
+
+class Audio2FaceService:
+    def __init__(self, sample_rate=44100):
+        """
+        :param sample_rate: sample rate
+        """
+        self.a2f_url = 'localhost:50051'   # Set it to the port of your local host 
+        self.sample_rate = 44100
+        self.avatar_instance = '/World/audio2face/PlayerStreaming'   # Set it to the name of your Audio2Face Streaming Instance
+
+    def tts_to_wav(self, tts_byte, framerate=22050) -> str:
+        """
+        :param tts_byte: tts data in byte
+        :param framerate: framerate
+        :return: wav byte
+        """
+        seg = AudioSegment.from_raw(io.BytesIO(tts_byte), sample_width=2, frame_rate=22050, channels=1)
+        wavIO = io.BytesIO()
+        seg.export(wavIO, format="wav")
+        rate, wav = read(io.BytesIO(wavIO.getvalue()))
+        return wav
+
+    def wav_to_numpy_float32(self, wav_byte) -> float:
+        """
+        :param wav_byte: wav byte
+        :return: float32
+        """
+        return wav_byte.astype(np.float32, order='C') / 32768.0
+
+    def get_tts_numpy_audio(self, audio) -> float:
+        """
+        :param audio: audio from tts_to_wav
+        :return: float32 of the audio
+        """
+        wav_byte = self.tts_to_wav(audio)
+        return self.wav_to_numpy_float32(wav_byte)
+
+    def make_avatar_speaks(self, audio) -> None:
+        """
+        :param audio: tts audio
+        :return: None
+        """
+        push_audio_track(self.a2f_url, self.get_tts_numpy_audio(audio), self.sample_rate, self.avatar_instance)