a
     g
                     @   s^  d Z dgZddlZddlZddlmZmZ ddlZddl	Z	ddl
T ddlT eejZeed ZeZeeed dZe	jjeZeed	eed
Ze eddde!edddgZ"e#j$eedge"d%d& j'ddZ(e()  dddddddddddddZ*d d!d"dd#Z+e,e(e-fe*dd$e+Z.e.j/f i e d(e0e1e2e3eee0 e0f d&d'dZ4dS ))z
Thai2fit: Thai Wikipeida Language Model for Text Generation

Codes are from
https://github.com/PyThaiNLP/tutorials/blob/master/source/notebooks/text_generation.ipynb
gen_sentence    N)ListUnion)*z	texts.csvZ
itos_fnamerbth)Ztok_funclangZ	pre_rulesZ
post_rulesi'  F)	tokenizer	chunksizeZmark_fieldsi`     )ZvocabZ	max_vocabZmin_freqtext)cols	processorg?@   )bsi  i        Tg      ?g?g{Gz?g333333?)Zemb_szZn_hidZn_layersZ	pad_tokenZqrnnZtie_weightsZout_biasZoutput_pZhidden_pZinput_pZembed_pZweight_pg?gQ?   )Z	drop_multZclipalphabeta)configZ
pretrainedMbP?)	start_seqNprob
output_strreturnc                 C   sB   | du rt tt} tj| |d|ddd}|r>d|S |S )u:  
    Text generator using Thai2fit

    :param str start_seq: word to begin sentence with
    :param int N: number of words
    :param bool output_str: output as string
    :param bool duplicate: allow duplicate words in sentence

    :return: list words or str words
    :rtype: List[str], str

    :Example:
    ::

      from pythainlp.generate.thai2fit import gen_sentence

      gen_sentence()
      # output: 'แคทรียา อิงลิช  (นักแสดง'

      gen_sentence("แมว")
      # output: 'แมว คุณหลวง '
    Ng?z-*-)ZtemperatureZmin_psep )randomchoicelistthwiki_itoslearnZpredictsplitjoin)r   r   r   r   Z	list_word r&   E/usr/local/lib/python3.9/dist-packages/pythainlp/generate/thai2fit.pyr   O   s    

)Nr   r   T)5__doc____all__r   pickletypingr   r   ZpandaspdZfastaiZfastai.textZpythainlp.ulmfitZ
untar_dataZURLsZIMDB_SAMPLEZimdbZread_csvZdummy_dfZTHWIKI_LSTMZthwikiloadopenr"   r   Z	transformZVocabZthwiki_vocab	TokenizerZThaiTokenizerZpre_rules_thZpost_rules_thttZTokenizeProcessorZNumericalizeProcessorr   ZTextListZfrom_dfZsplit_by_rand_pctZlabel_for_lmZ	databunchZdata_lmZsanity_checkr   Ztrn_argsZlanguage_model_learnerZAWD_LSTMr#   Zload_pretrainedstrintfloatboolr   r&   r&   r&   r'   <module>   s|   
    