o
    ީZh                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZ dedee fd	d
Zdee de jfddZdedeeee	f  ddfddZejdd fdee ddfddZedkroe  dS dS )    N)defaultdictdeque)chain)AnyDefaultDictDictList   )PDFp_strreturnc                 C   s8   d| v rt t| d\}}tt||d S t| gS )N-r	   )mapintsplitlistrange)r   startend r   E/var/www/html/lang_env/lib/python3.10/site-packages/pdfplumber/cli.pyparse_page_spec   s   
r   args_rawc                 C   s   t d}|jddt dtjjd | }|jdddd	 |jd
ddd	 |jdddgdd |jddd |jdddd |jdddd |jdtj	d |jdt
d |jddtd |jdt
dd || }|jd urytt|j |_|S ) NZ
pdfplumberinfile?rb)nargstypedefaultz--structurezoWrite the structure tree as JSON.  All other arguments except --pages, --laparams, and --indent will be ignored
store_true)helpactionz--structure-textzWrite the structure tree as JSON including text contents.  All other arguments except --pages, --laparams, and --indent will be ignoredz--formatcsvjson)choicesr   z--types+)r   z--include-attrsz1Include *only* these object attributes in output.)r   r    z--exclude-attrsz,Exclude these object attributes from output.z
--laparams)r   z--precisionz--pages)r   r   z--indentz&Indent level for JSON pretty-printing.)r   r    )argparseArgumentParseradd_argumentFileTypesysstdinbufferadd_mutually_exclusive_groupr#   loadsr   r   
parse_argspagesr   r   )r   parsergroupargsr   r   r   r/      sJ   


r/   pdfdatac           	         s   t dd }| jD ]!}||j  |jD ]}|d}|d u rq |  |d 7  < qq	t|}|rc| }d|v r@||d  |d}|d u rJq/||  d|v r_ fdd	|d D |d< |s1d S d S )
Nc                   S   s   t tS )N)r   strr   r   r   r   <lambda>I   s    z#add_text_to_mcids.<locals>.<lambda>mcidtextchildrenpage_numberZmcidsc                    s   g | ]} | qS r   r   ).0r8   Ztext_contentsr   r   
<listcomp>[   s    z%add_text_to_mcids.<locals>.<listcomp>)r   r0   r;   charsgetr   popleftextend)	r4   r5   Zpage_contentspagecr8   delZpagenor   r=   r   add_text_to_mcidsH   s*   




rG   c              	   C   s  t | }tj|j|j|jdm}|jrttj	|j
|jd nE|jr6|j
}t|| ttj	||jdd n6|jdkrK|jtj|j|j|j|jd n)|jtj|j|j|j|j|jd W d    d S W d    d S W d    d S W d    d S 1 sw   Y  d S )N)r0   laparams)indentF)rI   ensure_asciir"   )	precisioninclude_attrsexclude_attrs)rK   rL   rM   rI   )r/   r
   openr   r0   rH   Z	structureprintr#   dumpsZstructure_treerI   Zstructure_textrG   formatZto_csvr*   stdouttypesrK   rL   rM   to_json)r   r3   r4   treer   r   r   main^   s>   

"rV   __main__)r&   r#   r*   collectionsr   r   	itertoolsr   typingr   r   r   r   r4   r
   r6   r   r   	Namespacer/   rG   argvrV   __name__r   r   r   r   <module>   s   "4$
