o
    TZhS1                     @   s&  d dl mZ d dlZd dlmZ d dlmZmZm	Z	 d dl
Z
ddlmZ ddlmZ dZe de ZeeZd	ed
e	eef fddZed\ZZg dZe	eee ee f ZG dd dZedG dd deZedkrd dlmZ eddZe d e! Z"ee"j#Z#e$e#Z%dS dS )    N)Path)AnyListTuple   )	resources)
deprecatedzDhttps://github.com/huggingface/datasets/tree/main/src/datasets/utils/resourcereturnc                 C   s$   t t| }t|t d|  fS )Nz/resources/)pkg_resources	read_textr   yamlZ	safe_loadBASE_REF_URL)r
   content r   L/var/www/html/lang_env/lib/python3.10/site-packages/datasets/utils/readme.pyload_yaml_resource   s   r   zreadme_structure.yaml)z[Needs More Information]z[More Information Needed]zj(https://github.com/huggingface/datasets/blob/main/CONTRIBUTING.md#how-to-contribute-to-the-dataset-cards)c                	   @   s\   e Zd Zddededee defddZddefd	d
Zdede	fddZ
defddZdS )SectionNFnamelevellinessuppress_parsing_errorsc                 C   sN   || _ || _|| _d| _d| _i | _g | _g | _| jd ur%| j|d d S d S )N Tr   )	r   r   r   textis_empty_textr   parsing_error_listparsing_warning_listparse)selfr   r   r   r   r   r   r   __init__$   s   
zSection.__init__c                 C   s  d}g }d}| j D ]l}|ddkrq	|dd d dkr"| }q	| d | jd krp|sp|dkrBt|| jd || j|< g }n|g kra|  jd| 7  _| jdkr_| jtvr_d| _	g }d| d	d  d}q	|
| q	|dkr|| jv r| j
d
| d t|| jd || j|< n|g kr|  jd| 7  _| jdkr| jtvrd| _	| jdkr|s| jg ks| jg krddd | j| j D  }}d| j d| }t|d S d S d S )Nr   F 
   z```r   # r   z)Multiple sections with the same heading `z:` have been found. Please keep only one of these sections.
c                 s   s    | ]}d | V  qdS )-	Nr   .0xr   r   r   	<genexpr>V   s    z Section.parse.<locals>.<genexpr>z=The following issues were found while parsing the README at ``:
)r   stripsplitr   r   r   r   joinFILLER_TEXTr   appendr   r   r   
ValueError)r    r   Zcurrent_sub_levelZcurrent_linesZ
code_startlineerrorserror_stringr   r   r   r   0   sH   


 zSection.parse	structurer   c              	   C   s  g }g }|d du r| j r| ji kr|d| j d |d du r/| j r/|d| j d |d d	ur| ji krYd
d |d D }|d| j dddd |D  d n_dd |d D }d}t|D ]6\}}|| jvr|d| j d| d d}qh| jdkrqh| j| |d | \}	}
}||
7 }||7 }qh|r| jD ]}||vr|d| j d| d q|ri ||fS |  ||fS )aC  Validates a Section class object recursively using the structure provided as a dictionary.

        Args:
            structute (:obj: `dict`): The dictionary representing expected structure.

        Returns:
            :obj: `ReadmeValidatorOutput`: The dictionary representation of the section, and the errors.
        Zallow_emptyFz"Expected some content in section `z` but it is empty.Zallow_empty_textzExpected some text in section `z4` but it is empty (text in subsections are ignored).subsectionsNc                 S      g | ]}|d  qS r   r   r)   Z
subsectionr   r   r   
<listcomp>x       z$Section.validate.<locals>.<listcomp>z	Section `z&` expected the following subsections: , c                 S      g | ]}d | d  qS `r   r(   r   r   r   r;   {       z. Found 'None'.c                 S   r8   r9   r   r:   r   r   r   r;      r<   z` is missing subsection: `z`.Tz###r@   z` has an extra subsection: `z[`. Skipping further validation checks for this subsection as expected structure is unknown.)	r   r   r1   r   r/   	enumerater   validateto_dict)r    r6   
error_listwarning_listvaluesZstructure_namesZhas_missing_subsectionsidxr   _Zsubsec_error_listZsubsec_warning_listr   r   r   rC   Z   sN   

"







zSection.validatec                 C   s$   | j | j| jdd | j D dS )z3Returns the dictionary representation of a section.c                 S   s   g | ]}|  qS r   )rD   )r)   valuer   r   r   r;      r<   z#Section.to_dict.<locals>.<listcomp>)r   r   r   r7   )r   r   r   r   rG   r    r   r   r   rD      s
   zSection.to_dictNFF)__name__
__module____qualname__strr   boolr!   r   dictReadmeValidatorOutputrC   rD   r   r   r   r   r   #   s
     *Gr   z*Use `huggingface_hub.DatasetCard` instead.c                
       s   e Zd Zddedee dedef fddZd	d
 Ze	dde
dedefddZe		ddedededefddZddef fddZdd Zdd Z  ZS )ReadMeNFr   r   r6   r   c                    sF   t  j|dd || _d| _d| _|| _| jd ur!| j|d d S d S )Nr   )r   r   r   r   )superr!   r6   yaml_tags_line_count	tag_countr   r   )r    r   r   r6   r   	__class__r   r   r!      s   
zReadMe.__init__c                 C   st   | j d u r| t\}}}n	| | j \}}}|g ks|g kr8ddd || D }d| j d| }t|d S )Nr&   c                 S   s   g | ]}d | qS )r'   r   r(   r   r   r   r;      r<   z#ReadMe.validate.<locals>.<listcomp>z3The following issues were found for the README at `r,   )r6   	_validatereadme_structurer/   r   r2   )r    r   rE   rF   r4   r5   r   r   r   rC      s   
zReadMe.validatepathc                 C   sD   t |dd}| }W d    n1 sw   Y  | ||||dS )Nzutf-8)encodingr   )open	readlines)clsr^   r6   r   fr   r   r   r   from_readme   s   
zReadMe.from_readmerootstring	root_namec                 C   s   | d}| ||||dS )Nr&   r   )r.   )rb   rf   r6   rg   r   r   r   r   r   from_string   s   
zReadMe.from_stringc                    s   d}| j D ]"}|  jd7  _|ddkr#|  jd7  _| jdkr# n|d7 }q| jdkr8| j |d d  | _ n	| j | jd  | _ t j|d d S )Nr   r   r"   z---   r   )r   rX   r-   rY   rW   r   )r    r   
line_countr3   rZ   r   r   r      s   



zReadMe.parsec                 C   s   t |  S )z>Returns the string of dictionary representation of the ReadMe.)rQ   rD   rK   r   r   r   __str__   s   zReadMe.__str__c           	   	   C   s  g }g }| j dkr|d n| jdkr|d n
| jdkr$|d t| j }|dkrF|dddd	 t| j D  d
 n5|dk rP|d n+t| j d }|drv| j| 	|d d \}}}||7 }||7 }n|d |ri ||fS | 
 ||fS )Nr   z-Empty YAML markers are present in the README.z*No YAML markers are present in the README.r   z2Only the start of YAML tags present in the README.z-The README has several first-level headings: r=   c                 S   r>   r?   r   r(   r   r   r   r;      rA   z$ReadMe._validate.<locals>.<listcomp>zL. Only one heading is expected. Skipping further validation for this README.zmThe README has no first-level headings. One heading is expected. Skipping further validation for this README.zDataset Card forr7   zuNo first-level heading starting with `Dataset Card for` found in README. Skipping further validation for this README.)rX   r1   rY   lenr   keysr/   list
startswithrC   rD   )	r    r]   rE   rF   Znum_first_level_keysZ	start_keyrI   Zsec_error_listZsec_warning_listr   r   r   r\      s<   



$





zReadMe._validaterL   )Nre   FrM   )rN   rO   rP   rQ   r   rS   rR   r!   rC   classmethodr   rd   rh   r   rk   r\   __classcell__r   r   rZ   r   rU      s&    $	
rU   __main__)ArgumentParserz?Validate the content (excluding YAML tags) of a README.md file.)usagereadme_filepath)&importlib.resourcesr   r   loggingpathlibr   typingr   r   r   r   r   Zdeprecation_utilsr   r   __file__Zthis_url	getLoggerrN   loggerrQ   r   r]   Zknown_readme_structure_urlr0   rS   rT   r   rU   argparsers   Zapadd_argument
parse_argsargsru   rd   Zreadmer   r   r   r   <module>   s4   
 	b


