a
     †‹gâ  ã                =   @   sT  d Z ddlZddlmZ ddlmZ g d¢Zg d¢Zg d¢Zg d¢Z	d	d
„ ee e e	 D ƒZ
e
 dd
„ eD ƒ¡ eedœdd„Zddddddddddddddddddd d!d"d#d$d%d"d&d&d'd(d)d#d$d%d"d&d&d'd(d)d*d+d,d-d.d/d0d1d1d2d-d.d/d0d3d3d4d5d6d7d8d9œ<Zd d&iZeee ¡ ƒee ¡ ƒ ƒZeed:d;Zeed<œd=d>„Zeed<œd?d@„ZdS )Az
Phonemes util
é    N)ÚTrie)Ú	Tokenizer))Úkr   zk^)õ   kÊ°Úkh)õ   Å‹Úngzng^)õ   tÉ•Úc)õ   tÉ•Ê°Úch)Úsr   )Újr   zj^)Údr   )ÚtÚyzt^)õ   tÊ°Úth)Únr   zn^)Úbr   )Úpr   zp^)õ   pÊ°Úph)Úfr   )Úmr   zm^)Úrr   )Úlr   )Úwr   zw^)Úhr   )ú?Úzzz^))Úir!   )Úer"   )õ   É›Úx)õ   É¤Úq)Úar'   )Úamzam^)Úajzaj^)Úawzaw^)Úur+   )Úor,   )õ   É”ú@)Úiir/   )Úeer0   )u   É›É›Úxx)u   É¯É¯Úvv)u   É¤É¤Zqq)Úaar3   )Úuur4   )Úoor5   )Ú z@@))Úiar7   )õ   É¯aÚva)Úuar:   )Úiiar;   )u   É¯É¯aZvva)Úuuar<   ))õ   Ë§Ú0)õ   Ë¨Ë©Ú1)õ   Ë¥Ë©Ú2)õ   Ë¦Ë¥Ú3)õ   Ë©Ë©Ë¦Ú4c                 C   s   i | ]}|d  |d “qS )é   r   © ©Ú.0r!   rH   rH   ú@/usr/local/lib/python3.9/dist-packages/pythainlp/util/phoneme.pyÚ
<dictcomp>L   s   ÿrL   c                 C   s&   i | ]}t |ƒd kr|d  |d “qS )é   r   )ÚlenrI   rH   rH   rK   rL   T   ó    )ÚpronunciationÚreturnc                 C   sH   |   d¡}g }|D ]*}|t ¡ v r2| t| ¡ q| |¡ qd |¡S )uõ  
    Convert NECTEC system to IPA system

    :param str pronunciation: NECTEC phoneme
    :return: IPA that is converted
    :rtype: str

    :Example:
    ::

        from pythainlp.util import nectec_to_ipa

        print(nectec_to_ipa("kl-uua-j^-2"))
        # output : 'kl uua j Ë¥Ë©'


    References
    ----------

    Pornpimon Palingoon, Sumonmas Thatphithakkul. Chapter 4 Speech processing         and Speech corpus. In: Handbook of Thai Electronic Corpus.         1st ed. p. 122â€“56.
    ú-ú )ÚsplitÚdict_nectec_to_ipaÚkeysÚappendÚjoin)rP   ÚpartsÚipaÚpartrH   rH   rK   Únectec_to_ipaX   s    
r\   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r6   r!   r'   r"   Zaer,   r+   ZueZoer7   r:   ZueaZaiZaoÚeoZaeoÚioZuiZoiZoeiZiaoZuaiZueaiÚ.)<r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   u   Ê”r   r'   r"   r#   r!   r,   r-   r+   u   É¯r%   u   aËu   eËu   É›Ëu   iËu   oËu   É”Ëu   uËu   É¯Ëu   É¤Ër7   r:   r8   r)   r*   Úewu   É›wÚiwu   É”jZuju   aËju   aËwu   eËwu   É›Ëwu   oËju   É”Ëju   É¤ËjZiawZuaju   É¯ajr_   Znewmm)Zcustom_dictZengine)rZ   rQ   c                 C   sš   g }t  | ¡}t|ƒD ]Z\}}|t|ƒd krJ|ttƒv rJ| t| ¡ q|ttƒv rf| t| ¡ q| |¡ qd |¡}t	 
d|¡ dd¡ d¡}|S )a  
    Convert IPA system to The Royal Thai General System of Transcription (RTGS)

    Docs: https://en.wikipedia.org/wiki/Help:IPA/Thai

    :param str ipa: IPA phoneme
    :return: The RTGS that is converted, according to rules listed in the Wikipedia page
    :rtype: str

    :Example:
    ::

        from pythainlp.util import ipa_to_rtgs

        print(ipa_to_rtgs("kluaj"))
        # output : 'kluai'

    rG   r6   ZNFKDÚasciiÚignorezutf-8)Úipa_cutZword_tokenizeÚ	enumeraterN   ÚlistÚdict_ipa_rtgs_finalrW   Údict_ipa_rtgsrX   ÚunicodedataÚ	normalizeÚencodeÚdecode)rZ   Z
rtgs_partsZ	ipa_partsr!   Zipa_partZrtgsrH   rH   rK   Úipa_to_rtgsÅ   s     

ÿþÿrm   c                 C   s"   g d¢}|D ]}|   |d¡} q| S )u/  
    Remove Thai Tones from IPA system

    :param str ipa: IPA phoneme
    :return: IPA phoneme with tones removed
    :rtype: str

    :Example:
    ::

        from pythainlp.util import remove_tone_ipa

        print(remove_tone_ipa("laËË¦Ë¥.saË¨Ë©.majË©Ë©Ë¦"))
        # output : laË.sa.maj

    )rE   rA   r?   rC   r=   r6   )Úreplace)rZ   Z
_list_toneZtonerH   rH   rK   Úremove_tone_ipaí   s    ro   )Ú__doc__ri   Zpythainlp.util.trier   Zpythainlp.tokenizer   Zconsonants_ipa_nectecZmonophthong_ipa_nectecZdiphthong_ipa_nectecZtones_ipa_nectecrU   ÚupdateÚstrr\   rh   rg   rf   rV   Ztrierd   rm   ro   rH   rH   rH   rK   Ú<module>   s¬   	ÿþýþÿ#½F(