a
     g                     @   s   d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlmZ ddlmZ ejej ej ZdZdZeed	d
dZdeeedddZefeeedddZeed	ddZeeeef dddZ ee!dddZ"dS )z
Check if it is Thai text
    N)Tuple)
thai_lead_vowelsthai_follow_vowelsthai_above_vowelsthai_below_vowelsthai_consonantsthai_vowelsthai_tonemarks
thai_signsthai_digitsthai_punctuations)pronunciate)tone_detectori   i  )chreturnc                 C   s(   t | }t|  krtkr$n ndS dS )u  Check if a character is a Thai character.

    :param ch: input character
    :type ch: str
    :return: True if ch is a Thai character, otherwise False.
    :rtype: bool

    :Example:
    ::

        from pythainlp.util import isthaichar

        isthaichar("ก")  # THAI CHARACTER KO KAI
        # output: True

        isthaichar("๕")  # THAI DIGIT FIVE
        # output: True
    TF)ord_TH_FIRST_CHAR_ASCII_TH_LAST_CHAR_ASCII)r   Zch_val r   =/usr/local/lib/python3.9/dist-packages/pythainlp/util/thai.py
isthaichar   s    r   .)textignore_charsr   c                 C   s,   |sd}| D ]}||vrt |s dS qdS )u  Check if every character in a string is a Thai character.

    :param text: input text
    :type text: str
    :param ignore_chars: characters to be ignored, defaults to "."
    :type ignore_chars: str, optional
    :return: True if every character in the input string is Thai,
             otherwise False.
    :rtype: bool

    :Example:
    ::

        from pythainlp.util import isthai

        isthai("กาลเวลา")
        # output: True

        isthai("กาลเวลา.")
        # output: True

        isthai("กาล-เวลา")
        # output: False

        isthai("กาล-เวลา +66", ignore_chars="01234567890+-.,")
        # output: True

     FT)r   )r   r   r   r   r   r   isthai7   s    r   c                 C   sr   | rt | tsdS |sd}d}d}| D ]&}||v r<|d7 }q&t|r&|d7 }q&t| | }|dkrfdS || d S )u  Find proportion of Thai characters in a given text

    :param text: input text
    :type text: str
    :param ignore_chars: characters to be ignored, defaults to whitespace,\
        digits, and punctuation marks.
    :type ignore_chars: str, optional
    :return: proportion of Thai characters in the text (percentage)
    :rtype: float

    :Example:
    ::

        from pythainlp.util import countthai

        countthai("ไทยเอ็นแอลพี 3.0")
        # output: 100.0

        countthai("PyThaiNLP 3.0")
        # output: 0.0

        countthai("ใช้งาน PyThaiNLP 3.0")
        # output: 40.0

        countthai("ใช้งาน PyThaiNLP 3.0", ignore_chars="")
        # output: 30.0
    g        r   r      d   )
isinstancestrr   len)r   r   Znum_thaiZ
num_ignorer   Z	num_countr   r   r   	countthai]   s    

r!   c                 C   s(   | t v s| tv s| dv r d|  S | S dS )u>  Prefix an underscore (_) to a high-position vowel or a tone mark,
    to ease readability.

    :param ch: input character
    :type ch: str
    :return: "_" + ch
    :rtype: str

    :Example:
    ::

        from pythainlp.util import display_thai_char

        display_thai_char("้")
        # output: "_้"
    u   ำ์ํ๎_N)r   r	   )r   r   r   r   display_thai_char   s    r#   )wordr   c                 C   s   t | d}dd |D S )uo  
    Thai tone detector for word.

    It uses pythainlp.transliterate.pronunciate for converting word to        pronunciation.

    :param str word: Thai word.
    :return: Thai pronunciation with tones in each syllable.        (l, m, h, r, f or empty if it cannot be detected)
    :rtype: Tuple[str, str]

    :Example:
    ::

        from pythainlp.util import thai_word_tone_detector

        print(thai_word_tone_detector("คนดี"))
        # output: [('คน', 'm'), ('ดี', 'm')]

        print(thai_word_tone_detector("มือถือ"))
        # output: [('มือ', 'm'), ('ถือ', 'r')]
    -c                 S   s    g | ]}|t |d dfqS )u   หฺu   ห)r   replace).0ir   r   r   
<listcomp>       z+thai_word_tone_detector.<locals>.<listcomp>)r   split)r$   Z_pronunciater   r   r   thai_word_tone_detector   s    r,   )r   r   c                 C   sB  dddddddddddd}| D ]}|t v r>|d  d7  < |tv rX|d  d7  < q |tv rr|d  d7  < q |tv r|d  d7  < q |tv r|d  d7  < q |tv r|d	  d7  < q |tv r|d
  d7  < q |tv r|d  d7  < q |tv r|d  d7  < q |t	v r,|d  d7  < q |d  d7  < q |S )u7  
    Count Thai characters by type

    This function will give you numbers of Thai characters by type        (consonants, vowels, lead_vowels, follow_vowels, above_vowels,        below_vowels, tonemarks, signs, thai_digits, punctuations, non_thai)

    :param str text: Text
    :return: Dict with numbers of Thai characters by type
    :rtype: dict

    :Example:
    ::

        from pythainlp.util import count_thai_chars

        count_thai_chars("ทดสอบภาษาไทย")
        # output: {
        # 'vowels': 3,
        # 'lead_vowels': 1,
        # 'follow_vowels': 2,
        # 'above_vowels': 0,
        # 'below_vowels': 0,
        # 'consonants': 9,
        # 'tonemarks': 0,
        # 'signs': 0,
        # 'thai_digits': 0,
        # 'punctuations': 0,
        # 'non_thai': 0
        # }
    r   )vowelslead_vowelsfollow_vowelsabove_vowelsbelow_vowels
consonants	tonemarkssignsr   punctuationsnon_thair-   r   r.   r/   r0   r1   r2   r3   r4   r   r5   r6   )
r   r   r   r   r   r   r	   r
   r   r   )r   _dictcr   r   r   count_thai_chars   sF    !


r9   )r   )#__doc__stringtypingr   Z	pythainlpr   r   r   r   r   r   r	   r
   r   r   Zpythainlp.transliterater   Zpythainlp.util.syllabler   
whitespacedigitspunctuationZ_DEFAULT_IGNORE_CHARSr   r   r   boolr   r   floatr!   r#   r,   dictr9   r   r   r   r   <module>   s   0&3