a
     g                     @   s6   d Z ddlZddlmZ ddlmZ G dd dZdS )z9
Command line for PyThaiNLP's dataset/corpus management.
    N)corpus)get_pythainlp_data_pathc                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )Appc                 C   sV   t jdddt  dd}|jdtg ddd	 ||d
d }t| |j| d S )NdatazManage dataset/corpus.ae  thainlp data <subcommand>

subcommands:

catalog                show list of available datasets
info <dataset_name>    show information about the dataset
get <dataset_name>     download the dataset
rm <dataset_name>      remove the dataset
path                   show full path to data directory

Example:

thainlp data get thai2fit_wv

Current data path:

z

To change PyThaiNLP data path, set the operating system's
PYTHAINLP_DATA_DIR environment variable.

For more information about corpora that PyThaiNLP use, see:
https://github.com/PyThaiNLP/pythainlp-corpus/

--)progdescriptionusage
subcommand)cataloginfogetrmpathzaction on dataset/corpus)typechoiceshelp      )argparseArgumentParserr   add_argumentstr
parse_argsgetattrr	   selfargvparserargs r   </usr/local/lib/python3.9/dist-packages/pythainlp/cli/data.py__init__   s    
zApp.__init__c                 C   sR   t jddd}|jdtdd ||dd  }t|jrFtd ntd	 d S )
NzDownload a datasetzthainlp data get <dataset_name>r   r   dataset_namedataset/corpus's namer   r   r   zDownloaded successfully.
Not found.)	r   r   r   r   r   r   downloadr#   printr   r   r   r    r   .   s    
zApp.getc                 C   sR   t jddd}|jdtdd ||dd  }t|jrFtd ntd	 d S )
NzRemove a datasetzthainlp data rm <dataset_name>r"   r#   r$   r%   r   zRemoved successfully.r&   )	r   r   r   r   r   r   remover#   r(   r   r   r   r    r   >   s    
zApp.rmc                 C   sV   t jddd}|jdtdd ||dd  }t|j}|rJt| ntd d S )	Nz!Print information about a datasetz thainlp data info <dataset_name>r"   r#   r$   r%   r   r&   )	r   r   r   r   r   r   get_corpus_db_detailr#   r(   )r   r   r   r   r   r   r   r    r   N   s    
zApp.infoc                 C   s   t t  }| }t| }td |D ]N}td| d|| d  dd t |}|rvtd|d  d	 q.t  q.td
 dS )z,Print dataset/corpus available for download.z&Dataset/corpus available for download:z-  Zlatest_version )endz
  (Local: version)zO
Use subcommand 'get' to download a dataset.

Example: thainlp data get crfcut
N)r   Zget_corpus_dbZcorpus_db_urljsonsortedkeysr(   r*   )r   r   Z	corpus_dbZcorpus_namesnameZcorpus_infor   r   r    r
   _   s     
zApp.catalogc                 C   s   t t  dS )zPrint path of local dataset.N)r(   r   )r   r   r   r   r    r   r   s    zApp.pathN)	__name__
__module____qualname__r!   r   r   r   r
   r   r   r   r   r    r      s    r   )__doc__r   Z	pythainlpr   Zpythainlp.toolsr   r   r   r   r   r    <module>   s   