o
    0i_                     @  s   d Z ddlmZ ddlZddlZddlZddlZddlmZ e	e
ZdZd'd(ddZed)d*ddZd+ddZd,ddZd-ddZd.d!d"Zd/d0d%d&ZdS )1zUtility functions for Supertonic TTS.

This module provides various helper functions for text processing, file operations,
and timing operations used throughout the Supertonic TTS package.
    )annotationsN)contextmanagerz(?<!Mr\.)(?<!Mrs\.)(?<!Ms\.)(?<!Dr\.)(?<!Prof\.)(?<!Sr\.)(?<!Jr\.)(?<!Ph\.D\.)(?<!etc\.)(?<!e\.g\.)(?<!i\.e\.)(?<!vs\.)(?<!Inc\.)(?<!Ltd\.)(?<!Co\.)(?<!Corp\.)(?<!St\.)(?<!Ave\.)(?<!Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+2   textstrmax_lenintreturnc                 C  s   | d| }t dd|S )z
    Sanitize filename by replacing non-alphanumeric characters.

    Args:
        text: Input text to convert to filename
        max_len: Maximum length of filename

    Returns:
        Sanitized filename string
    Nz[^a-zA-Z0-9_-]_)resub)r   r   prefix r   J/home/kim/smarthome/.venv/lib/python3.10/site-packages/supertonic/utils.pysanitize_filename+   s   r   Tnameverboseboolc                 c  sT    |rt |  d t }dV  t | }|r(t |  d|dd dS dS )a(  
    Context manager for timing code execution.

    Args:
        name: Name of the operation being timed
        verbose: Whether to log timing information

    Example:
        ```python
        with timer("Processing"):
            # Your code here
            process_data()
        ```
    z...Nz completed in .2fs)loggerinfotime)r   r   startelapsedr   r   r   timer:   s   r   secondsfloatc                 C  sn   | dk r
| ddS | dk r!t | d }| d }| d|ddS t | d }t | d d }| d| dS )	z
    Format duration in seconds to human-readable string.

    Args:
        seconds: Duration in seconds

    Returns:
        Formatted string (e.g., "1.23s", "2m 30s")
    <   r   r   i  zm z.0fzh m)r   )r   minutesZsecshoursr   r   r   format_durationS   s   
r"   
wav_lengthsample_ratec                 C  s   | | S )z
    Calculate audio duration from waveform length.

    Args:
        wav_length: Number of samples in waveform
        sample_rate: Audio sample rate (Hz)

    Returns:
        Duration in seconds
    r   )r#   r$   r   r   r   get_audio_durationi   s   r%   pathc                 C  s   t j| dd t j| S )z
    Ensure directory exists, create if necessary.

    Args:
        path: Directory path

    Returns:
        Absolute path to directory
    T)exist_ok)osmakedirsr&   abspath)r&   r   r   r   
ensure_dirw   s   
r+   
style_datadictc                   sN   ddg}t  fdd|D sdS |D ]}d | vs!d | vr$ dS qdS )	z
    Validate voice style JSON format.

    Args:
        style_data: Voice style dictionary

    Returns:
        True if valid, False otherwise
    Z	style_ttlZstyle_dpc                 3  s    | ]}| v V  qd S )Nr   ).0keyr,   r   r   	<genexpr>   s    z.validate_voice_style_format.<locals>.<genexpr>FdimsdataT)all)r,   required_keysr/   r   r0   r   validate_voice_style_format   s   
r6   ,  	list[str]c           	      C  s   |dk rt d| ddd td|  D }g }|D ]L}| }|s&qtt|}d}|D ]+}| }|s9q0t|t| d |krP||rJd	nd| 7 }q0|rY||  |}q0|ri| ri||  q|S )
az  
    Split text into chunks by paragraphs and sentences.

    This function intelligently splits long text into smaller chunks suitable
    for TTS processing, respecting paragraph and sentence boundaries.

    Args:
        text: Input text to chunk
        max_len: Maximum length of each chunk in characters (default: 300)

    Returns:
        List of text chunks

    Example:
        ```python
        text = "This is a long paragraph. It has multiple sentences. " * 10
        chunks = chunk_text(text, max_len=100)
        for chunk in chunks:
            print(f"Chunk ({len(chunk)} chars): {chunk[:50]}...")
        ```
    
   z!max_len must be at least 10, got z4. Very small chunks may produce poor quality speech.c                 S  s   g | ]
}|  r|  qS r   )strip)r.   pr   r   r   
<listcomp>   s    zchunk_text.<locals>.<listcomp>z\n\s*\n+     )
ValueErrorr   splitr:   _COMMON_ABBREVIATIONS_PATTERNlenappend)	r   r   Z
paragraphschunksZ	paragraphZ	sentencesZcurrent_chunkZsentenceZsentence_strippedr   r   r   
chunk_text   s2   
rF   )r   )r   r   r   r   r	   r   )T)r   r   r   r   )r   r   r	   r   )r#   r   r$   r   r	   r   )r&   r   r	   r   )r,   r-   r	   r   )r7   )r   r   r   r   r	   r8   )__doc__
__future__r   loggingr(   r   r   
contextlibr   	getLogger__name__r   rB   r   r   r"   r%   r+   r6   rF   r   r   r   r   <module>   s$    




