o
    0iM=                     @  s   d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	 ddl
ZddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZmZm Z m!Z! dd
l"m#Z# e$e%Z&G dd dZ'dS )zHigh-level TTS interface for Supertonic.

This module provides the main TTS class for easy text-to-speech synthesis
with automatic model loading and voice style management.
    )annotationsN)Path)OptionalUnion   )AVAILABLE_LANGUAGESAVAILABLE_MODELSDEFAULT_LANGUAGEDEFAULT_MAX_CHUNK_LENGTHDEFAULT_MAX_CHUNK_LENGTH_KODEFAULT_MODELDEFAULT_SILENCE_DURATIONDEFAULT_SPEEDDEFAULT_TOTAL_STEPSMAX_TEXT_LENGTHMAX_TOTAL_STEPSMIN_TOTAL_STEPSis_multilingual_model)Style)get_cache_dir list_available_voice_style_names
load_modelload_voice_style_from_json_fileload_voice_style_from_name)
chunk_textc                   @  st   e Zd ZdZeddddfd.ddZd/ddZd0ddZee	de
edfd1d$d%Zd2d*d+Zee	de
edfd1d,d-ZdS )3TTSa  High-level interface for Supertonic text-to-speech synthesis.

    Args:
        model: Model name to use ("supertonic" or "supertonic-2").
            Default is "supertonic-2" (multilingual support).
        model_dir: Directory containing model files. If None, uses default cache
            directory based on model name.
        auto_download: If True, automatically downloads model files from
            HuggingFace Hub if they're missing
        intra_op_num_threads: Number of threads for intra-op parallelism.
            If None (default), ONNX Runtime automatically determines optimal value based on your system.
            Can also be set via SUPERTONIC_INTRA_OP_THREADS environment variable
        inter_op_num_threads: Number of threads for inter-op parallelism.
            If None (default), ONNX Runtime automatically determines optimal value based on your system.
            Can also be set via SUPERTONIC_INTER_OP_THREADS environment variable

    Attributes:
        model (supertonic.core.Supertonic): The underlying Supertonic engine
        model_name (str): Name of the loaded model
        model_dir (pathlib.Path): Path to the model directory
        sample_rate (int): Audio sample rate in Hz
        voice_style_names (list[str]): List of available voice style names
        is_multilingual (bool): Whether the model supports multiple languages

    Example:
        ```python
        from supertonic import TTS

        # Use default model (supertonic-2 with multilingual support)
        tts = TTS()
        style = tts.get_voice_style("M1")
        wav, dur = tts.synthesize("Hello!", voice_style=style, lang="en")

        # Use specific model version
        tts_v1 = TTS(model="supertonic")  # English only
        tts_v2 = TTS(model="supertonic-2")  # Multilingual
        ```
    NTmodelstr	model_dirOptional[Union[Path, str]]auto_downloadboolintra_op_num_threadsOptional[int]inter_op_num_threadsc                 C  s   |t vrtd| ddt  || _t|| _|du r!t|}t|ts*t|}t	|||||| _
|| _| j
j| _t|| _dS )a  Initialize the TTS engine.

        Args:
            model: Model name ("supertonic" or "supertonic-2"). Default: "supertonic-2"
            model_dir (Union[Path, str]): Directory containing model files. If None, uses default
                cache directory based on model name
            auto_download: If True, automatically downloads missing model files
            intra_op_num_threads: Number of threads for intra-op parallelism.
                If None (default), ONNX Runtime automatically determines optimal value based on your system.
                Can also be set via SUPERTONIC_INTRA_OP_THREADS environment variable
            inter_op_num_threads: Number of threads for inter-op parallelism.
                If None (default), ONNX Runtime automatically determines optimal value based on your system.
                Can also be set via SUPERTONIC_INTER_OP_THREADS environment variable
        zInvalid model: 'z'. Available models: , N)r   
ValueErrorjoin
model_namer   is_multilingualr   
isinstancer   r   r   r   sample_rater   Zvoice_style_names)selfr   r   r    r"   r$    r-   M/home/kim/smarthome/.venv/lib/python3.10/site-packages/supertonic/pipeline.py__init__T   s    



zTTS.__init__
voice_namereturnr   c                 C  s   t | j|S )a-  Load a voice style by name. Avaliable voice style names can be listed with
            `list_available_voice_style_names()`.

        Args:
            voice_name: Name of the voice style (e.g., 'M1', 'F1', 'M2', 'F2')

        Returns:
            Style object containing voice style vectors
        )r   r   )r,   r0   r-   r-   r.   get_voice_style   s   
zTTS.get_voice_stylevoice_style_pathUnion[Path, str]c                 C  s   t |S )zLoad a voice style from a JSON file path.

        Args:
            voice_style_path: Path to the voice style JSON file (str or Path)

        Returns:
            Style object containing voice style vectors
        )r   )r,   r3   r-   r-   r.   get_voice_style_from_path   s   	zTTS.get_voice_style_from_pathFtextvoice_styletotal_stepsintspeedfloatmax_chunk_lengthsilence_durationlangverbosetuple[np.ndarray, np.ndarray]c	                 C  s  |r|  s
td| jr!|tvrtd| ddt |}	n|dkr3|r3td| j d| d d	}	|rUtd
t| d | jrLtd|  n	td| j d t|tkrhtdt| dt dt	|t
sxtdt|j d|tkr|tkstdt dt d| d|dk rtd| | jj|\}
}|
stdt| d| |d	u r|	dkrtnt}t||}|rtdt| d t|dkrt|d	d  D ]\}}td!|d  d"|d	d#  t|d#krd$nd%  qt|d krtd&t|d   d' td(| d)|d*d+| j d, g }g }t|D ]]\}}|r@td-|d  d.t| d/d%d0d1 td2|d  d.t|  | |g||||	\}}|ritd3|d d*d4 |jd dkrytd5|j || || q't|dkrt|dksJ d6tjdt|| j ftj d7}g }t|D ]\}}|| |t|d k r|| qtj!|dd8}t"|}|t|d  }|| }|r|jd }td9 td:|d d*d; td<|d= td>|j  ||fS )?u  Synthesize speech from text.

        This method automatically chunks long text into smaller segments
        and concatenates them with silence in between.

        Args:
            text: Text to synthesize
            voice_style: Voice style object
            total_steps: Number of synthesis steps (default: 5)
            speed: Speech speed multiplier (default: 1.05)
            max_chunk_length: Max characters per chunk. If None, automatically
                determined based on language (300 for most, 120 for Korean)
            silence_duration: Silence between chunks in seconds (default: 0.3)
            lang: Language code for synthesis. Supported languages:
                - "en": English (default)
                - "ko": Korean
                - "es": Spanish
                - "pt": Portuguese
                - "fr": French
            verbose: If True, print detailed progress information (default: False)

        Returns:
            Tuple of (waveform, duration):
                - waveform: Audio array of shape (1, num_samples)
                - duration: Total duration in seconds

        Example:
            ```python
            tts = TTS()
            style = tts.get_voice_style("M1")
            wav, dur = tts.synthesize("Hello, world!", voice_style=style, lang="en")
            wav_ko, dur_ko = tts.synthesize("안녕하세요!", voice_style=style, lang="ko")
            print(f"Generated {dur[0]:.2f}s of audio")
            ```
        zText cannot be emptyzInvalid language: 'z'. Supported languages: r%   enu   ⚠️  Model 'z"' is English-only. Ignoring lang='z'.Nu   📝 Input text length: z charactersu   🌐 Language: u   🌐 Model: z (English only)zText length (z") exceeds maximum allowed length (z.). Please split your text into smaller chunks.z(voice_style must be a Style object, got z(. Use get_voice_style() to load a style.ztotal_steps must be between z and z, got z,. Higher values = better quality but slower.r   z+silence_duration must be non-negative, got zFound z unsupported character(s): kozSplit into z	 chunk(s)r      zChunk z: <   z... z... and z more chunk(s)z&Synthesizing audio... Settings: steps=z, speed=z.2fzx, sample_rate=ZHzz   [/z] Processing chunk... T)endflushzProcessing chunk u   ✓ (zs)z%Expected wav shape (1, samples), got zNo audio generated)Zdtype)ZaxiszGeneration complete!zTotal duration: szTotal samples: ,zArray shape: )#stripr&   r)   r   r'   printr(   lenr   r*   r   	TypeErrortype__name__r   r   r   Ztext_processorZvalidate_textr   r
   r   	enumerater+   loggerdebugshapeRuntimeErrorappendnpZzerosr9   Zfloat32Zconcatenatesum)r,   r6   r7   r8   r:   r<   r=   r>   r?   Zeffective_langZis_validunsupportedZtext_chunksichunkZwav_listZdur_listZ
text_chunkwavZdur_onnxZsilenceZarrays_to_concatZwav_catZtotal_audio_durZtotal_silence_durZdur_catZtotal_samplesr-   r-   r.   
synthesize   s   /

6$
$


zTTS.synthesizer\   
np.ndarrayoutput_pathNonec              
   C  s   zddl }W n ty } z
td td|d}~ww t|}|jjddd t|jtj	s9t
d|j td|  |t|| | j td	 dS )
zSave synthesized audio to a WAV file.

        Args:
            wav: Audio waveform array from synthesize()
            output_path: Path where to save the WAV file
        r   Nzsoundfile not installedzSsoundfile library is required to save audio. Install it with: pip install soundfileT)parentsexist_okz#No write permission for directory: zSaving audio to zAudio saved successfully)Z	soundfileImportErrorrR   errorr   parentmkdirosaccessW_OKPermissionErrorinfowriter   Zsqueezer+   )r,   r\   r_   ZsfeZoutput_path_objr-   r-   r.   
save_audio>  s$   
zTTS.save_audioc	           	   
   C  s   | j ||||||||dS )u  Shorthand for synthesize(). Allows using tts(...) instead of tts.synthesize(...).

        Args:
            text: Text to synthesize
            voice_style: Voice style object
            total_steps: Number of synthesis steps (default: 5)
            speed: Speech speed multiplier (default: 1.05)
            max_chunk_length: Max characters per chunk. If None, automatically
                determined based on language (300 for most, 120 for Korean)
            silence_duration: Silence between chunks in seconds (default: 0.3)
            lang: Language code for synthesis (default: "en").
                Supported: "en", "ko", "es", "pt", "fr"
            verbose: If True, print detailed progress information (default: False)

        Returns:
            Tuple of (waveform, duration):
                - waveform: Audio array of shape (1, num_samples)
                - duration: Total duration in seconds

        Example:
            ```python
            tts = TTS()
            style = tts.get_voice_style("M1")
            wav, dur = tts("Hello, world!", voice_style=style, lang="en")
            wav_ko, dur_ko = tts("안녕하세요!", voice_style=style, lang="ko")
            print(f"Generated {dur[0]:.2f}s of audio")
            ```
        )r6   r7   r8   r:   r<   r=   r>   r?   )r]   )	r,   r6   r7   r8   r:   r<   r=   r>   r?   r-   r-   r.   __call___  s   'zTTS.__call__)
r   r   r   r   r    r!   r"   r#   r$   r#   )r0   r   r1   r   )r3   r4   r1   r   )r6   r   r7   r   r8   r9   r:   r;   r<   r#   r=   r;   r>   r   r?   r!   r1   r@   )r\   r^   r_   r   r1   r`   )rP   
__module____qualname____doc__r   r/   r2   r5   r   r   r   r	   r]   rn   ro   r-   r-   r-   r.   r   ,   s4    )
,
 
(%r   )(rr   
__future__r   loggingrg   pathlibr   typingr   r   numpyrW   configr   r   r	   r
   r   r   r   r   r   r   r   r   r   corer   loaderr   r   r   r   r   utilsr   	getLoggerrP   rR   r   r-   r-   r-   r.   <module>   s    <
