a
     gh                     @   sz   d Z ddlmZ ddlZddlmZ ddlmZ dd Zdd	 Z	d
Z
e Zeee
 ee eee  dddZdS )z
Clause segmenter
    )ListN)pos_tag)path_pythainlp_corpusc           	      C   s   | | d }| | d }||  | |d}|dkr| |d  d }| |d  d }||d< |  |d< | |d< ||d< nd|d	< |t| d k r| |d  d }| |d  d }||d
< |  |d< | |d< ||d< nd|d< |S )Nr      )zword.curr_wordzword.curr_isspacezword.curr_isdigitzword.curr_postagzword.prev_wordzword.prev_isspacezword.prev_isdigitzword.prev_postagTZBOSzword.next_wordzword.next_isspacezword.next_isdigitzword.next_postagZEOS)isspaceisdigitlen)	dociZ	curr_wordcurr_posfeaturesZ	prev_wordZprev_posZ	next_wordZnext_pos r   C/usr/local/lib/python3.9/dist-packages/pythainlp/tokenize/crfcls.py_doc2features   s0    

r   c                    s    fddt t D S )Nc                    s   g | ]}t  |qS r   )r   ).0r
   r	   r   r   
<listcomp>3       z%_extract_features.<locals>.<listcomp>)ranger   r   r   r   r   _extract_features2   s    r   zblackboard-cls_v1.0.crfsuite)r	   returnc                 C   s   t | dd}t|}tt| t|}g }g }t| d }t|D ]D\}}|\}	}
|
dksd||kr~||	 || g }qD||	 qD|S )NZ
blackboard)Zcorpusr   ZE_CLS)	r   r   listziptaggertagr   	enumerateappend)r	   Z	word_tagsr   Zword_markersZclausestempZlen_docr
   Zword_markerwordmarkerr   r   r   segment;   s    

r    )__doc__typingr   Z
pycrfsuiteZpythainlp.tagr   Zpythainlp.corpusr   r   r   Z_CORPUS_NAMEZTaggerr   openstrr    r   r   r   r   <module>   s   $