o
    ۩Zh                     @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
mZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ eeZedZedZ G dd dZ!dS )    N)BinaryIO	ContainerDictIteratorListOptionalTuple)Rect   )settings)PDFDocumentPDFTextExtractionNotAllowedPDFNoPageLabels)	PDFParser)PDFObjectNotFound)
dict_value)	int_value)
list_value)resolve1)LITZPagePagesc                   @   s   e Zd ZdZdedededee ddf
dd	Zdefd
dZ	h dZ
ededed  fddZe					ddedeee  dededededed  fddZdS )PDFPageak  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes:
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).
    docpageidattrslabelreturnNc                 C   s   || _ || _t|| _|| _t| jd| _t| jdt | _	t| jd | _
d| jv r8t| jd | _n| j
| _t| jddd d | _| jd| _| jd	| _d
| jv ret| jd
 }ng }t|tso|g}|| _dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        ZLastModified	ResourcesMediaBoxCropBoxRotater   ih  ZAnnotsBZContentsN)r   r   r   r   r   r   getZlastmoddict	resourcesmediaboxZcropboxr   rotateZannotsZbeads
isinstancelistcontents)selfr   r   r   r   r)    r+   G/var/www/html/lang_env/lib/python3.10/site-packages/pdfminer/pdfpage.py__init__-   s*   





zPDFPage.__init__c                 C   s   d | j| jS )Nz(<PDFPage: Resources={!r}, MediaBox={!r}>)formatr$   r%   )r*   r+   r+   r,   __repr__O   s   zPDFPage.__repr__>   r   r    r   r   documentc           	      #   s"   dt dttt f dttttt tt t f f f  f fddz }W n ty6   t	d }Y nw d}dj
v rZj
d j
}|D ]\}} ||t|V  d}qI|sjD ]/}| D ](}z|}t|tr|d	tu r ||t|V  W qe ty   Y qew q_d S )
Nobjparentr   c                 3   s    t | tr| }t| }n	| j}t|  }| D ]\}}| jv r0||vr0|||< q|d}|d u rBt	j
sB|d}|tu red|v retd|d  t|d D ]
}||E d H  qXd S |tu rvtd| ||fV  d S d S )NTypetypeZKidszPages: Kids=%rzPage: %r)r'   intr   getobjcopyobjiditemsINHERITABLE_ATTRSr"   r   ZSTRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)r1   r2   r8   treekvZ	tree_typecclsr0   searchr+   r,   rE   X   s,   


z$PDFPage.create_pages.<locals>.searchFr   Tr3   )objectr   strr   r   r5   Zget_page_labelsr   	itertoolsrepeatcatalognextZxrefsZ
get_objidsr6   r'   r#   r"   r>   r   )	rD   r0   Zpage_labelsZpagesobjectsr8   r?   Zxrefr1   r+   rC   r,   create_pagesV   s@   



zPDFPage.create_pagesr    TFfppagenosmaxpagespasswordcachingcheck_extractablec                 c   s    t |}t|||d}|js"|rd| }	t|	d| }
t|
 t| |D ]\}}|r4||vr4q)|V  |rB||d krB d S q)d S )N)rR   rS   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this caser
   )r   r   Zis_extractabler   r<   warning	enumeraterM   )rD   rO   rP   rQ   rR   rS   rT   parserr   	error_msgZwarning_msgZpagenopager+   r+   r,   	get_pages   s*   
 zPDFPage.get_pages)Nr   rN   TF)__name__
__module____qualname____doc__r   rF   r   rG   r-   r/   r:   classmethodr   rM   r   r   r5   boolrZ   r+   r+   r+   r,   r      sL    
"3
r   )"rH   loggingtypingr   r   r   r   r   r   r   Zpdfminer.utilsr	   rN   r   Zpdfdocumentr   r   r   Z	pdfparserr   Zpdftypesr   r   r   r   r   Zpsparserr   	getLoggerr[   r<   r>   r;   r   r+   r+   r+   r,   <module>   s"    $
