a
     †‹g²  ã                   @   sP   d Z ddlZddlmZ ddlmZmZ eedœdd„Zeee dœdd	„Z	dS )
zô
Wrapper for PyICU word segmentation. This wrapper module uses
:class:`icu.BreakIterator` with Thai as :class:`icu.Local`
to locate boundaries between words in the text.

:See Also:
    * `GitHub repository <https://github.com/ovalhub/pyicu>`_
é    N)ÚList)ÚBreakIteratorÚLocale)ÚtextÚreturnc                 c   s@   t  tdƒ¡}| | ¡ | ¡ }|D ]}| ||… V  |}q$d S )NÚth)r   ZcreateWordInstancer   ZsetTextÚfirst)r   ZbdÚpÚq© r   úB/usr/local/lib/python3.9/dist-packages/pythainlp/tokenize/pyicu.pyÚ
_gen_words   s    
r   c                 C   s,   | rt | tƒsg S t dd| ¡} tt| ƒƒS )zn
    :param str text: text to be tokenized into words
    :return: list of words, tokenized from the text
    u   ([^à¸€-à¹¿
 ]+)z \1 )Ú
isinstanceÚstrÚreÚsubÚlistr   )r   r   r   r   Úsegment   s    r   )
Ú__doc__r   Útypingr   Zicur   r   r   r   r   r   r   r   r   Ú<module>   s
   	