o
     Zh                  
   @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlm Z! ddlm"Z"m#Z#m$Z$ ddlm%Z& ddlm'Z( ddlm)Z* ddlm+Z, ddlm-Z. ddl/m0Z0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZE ddlFmGZG ddlHmIZImJZJ ddlKmLZL deMdeNdeeNeedf f fddZOG dd de7ZPG dd  d ZQG d!d" d"eeef ZRdS )#    N)abstractmethod)datetime)AnyDict	GeneratorIterableIteratorListMappingOptionalTupleUnioncast   )
Encryption)
PageObject_VirtualList)index2label)deprecate_with_replacementlogger_warningparse_iso8824_date)CatalogAttributes)CatalogDictionary)CheckboxRadioButtonAttributesGoToActionArgumentsUserAccessPermissions)Core)DocumentInformationAttributes)FieldDictionaryAttributes)PageAttributes)PagesAttributes)PdfReadError
PyPdfError)ArrayObjectBooleanObjectByteStringObjectDestinationDictionaryObjectEncodedStreamObjectFieldFitFloatObjectIndirectObject
NameObject
NullObjectNumberObject	PdfObjectTextStringObject
TreeObjectViewerPreferencescreate_string_objectis_null_or_none)EmbeddedFile)OutlineTypePagemodeType)XmpInformationdsizereturn.c                 C   s4   |dkrt dd|  } | dd  } td| d S )N   zInvalid size in convert_to_ints           iz>qr   )r!   structunpack)r:   r;    r@   H/var/www/html/lang_env/lib/python3.10/site-packages/pypdf/_doc_common.pyconvert_to_int`   s
   rB   c                   @   s  e Zd ZdZd)ddZdedee fddZedee fd	d
Z	edee fddZ
edee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fddZedee fdd Zedee fd!d"Zedee fd#d$Zedee fd%d&Zedee fd'd(ZdS )*DocumentInformationa  
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through
    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, e.g. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where the
    metadata is being displayed. The raw property can sometimes return a
    ``ByteStringObject``, if pypdf was unable to decode the string's text
    encoding; this requires additional safety in the caller and therefore is not
    as commonly accessed.
    r<   Nc                 C   s   t |  d S N)r'   __init__selfr@   r@   rA   rE   w   s   zDocumentInformation.__init__keyc                 C   s0   |  |d }t|tr|S t|trt|S d S rD   )get
isinstancer1   r%   str)rG   rH   retvalr@   r@   rA   	_get_textz   s   

zDocumentInformation._get_textc                 C   s,   |  tjr| tjp|  tj S dS )z
        Read-only property accessing the document's title.

        Returns a ``TextStringObject`` or ``None`` if the title is not
        specified.
        N)rI   DITITLErM   
get_objectrF   r@   r@   rA   title   s
   

zDocumentInformation.titlec                 C      |  tjS )z>The "raw" version of title; can return a ``ByteStringObject``.)rI   rN   rO   rF   r@   r@   rA   	title_raw      zDocumentInformation.title_rawc                 C   rR   )z
        Read-only property accessing the document's author.

        Returns a ``TextStringObject`` or ``None`` if the author is not
        specified.
        )rM   rN   AUTHORrF   r@   r@   rA   author      zDocumentInformation.authorc                 C   rR   )z?The "raw" version of author; can return a ``ByteStringObject``.)rI   rN   rU   rF   r@   r@   rA   
author_raw   rT   zDocumentInformation.author_rawc                 C   rR   )z
        Read-only property accessing the document's subject.

        Returns a ``TextStringObject`` or ``None`` if the subject is not
        specified.
        )rM   rN   SUBJECTrF   r@   r@   rA   subject   rW   zDocumentInformation.subjectc                 C   rR   )z@The "raw" version of subject; can return a ``ByteStringObject``.)rI   rN   rY   rF   r@   r@   rA   subject_raw   rT   zDocumentInformation.subject_rawc                 C   rR   )ac  
        Read-only property accessing the document's creator.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a ``TextStringObject`` or
        ``None`` if the creator is not specified.
        )rM   rN   CREATORrF   r@   r@   rA   creator      
zDocumentInformation.creatorc                 C   rR   )z@The "raw" version of creator; can return a ``ByteStringObject``.)rI   rN   r\   rF   r@   r@   rA   creator_raw   rT   zDocumentInformation.creator_rawc                 C   rR   )aI  
        Read-only property accessing the document's producer.

        If the document was converted to PDF from another format, this is the
        name of the application (for example, macOS Quartz) that converted it to
        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
        specified.
        )rM   rN   PRODUCERrF   r@   r@   rA   producer   r^   zDocumentInformation.producerc                 C   rR   )zAThe "raw" version of producer; can return a ``ByteStringObject``.)rI   rN   r`   rF   r@   r@   rA   producer_raw   rT   z DocumentInformation.producer_rawc                 C      t | tjS )z:Read-only property accessing the document's creation date.)r   rM   rN   CREATION_DATErF   r@   r@   rA   creation_date   s   z!DocumentInformation.creation_datec                 C   rR   )z
        The "raw" version of creation date; can return a ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rI   rN   rd   rF   r@   r@   rA   creation_date_raw   rW   z%DocumentInformation.creation_date_rawc                 C   rc   )z
        Read-only property accessing the document's modification date.

        The date and time the document was most recently modified.
        )r   rM   rN   MOD_DATErF   r@   r@   rA   modification_date   s   z%DocumentInformation.modification_datec                 C   rR   )z
        The "raw" version of modification date; can return a
        ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        )rI   rN   rg   rF   r@   r@   rA   modification_date_raw   s   	z)DocumentInformation.modification_date_rawc                 C   rR   )z
        Read-only property accessing the document's keywords.

        Returns a ``TextStringObject`` or ``None`` if keywords are not
        specified.
        )rM   rN   KEYWORDSrF   r@   r@   rA   keywords   rW   zDocumentInformation.keywordsc                 C   rR   )zAThe "raw" version of keywords; can return a ``ByteStringObject``.)rI   rN   rj   rF   r@   r@   rA   keywords_raw  rT   z DocumentInformation.keywords_raw)r<   N)__name__
__module____qualname____doc__rE   rK   r   rM   propertyrQ   rS   rV   rX   rZ   r[   r]   r_   ra   rb   r   re   rf   rh   ri   rk   rl   r@   r@   r@   rA   rC   h   sH    
			
	rC   c                   @   s  e Zd ZU dZdZeed< dZee	e
  ed< dZee ed< dZeed< eedefd	d
ZeedefddZedeeef dee fddZedededefddZeedee fddZedee fddZedee fddZedee fddZ defddZ!dede
fdd Z"dede#eef fd!d"Z$ede%ee&f fd#d$Z'de(fd%d&Z)		dd'ee*df d(ee%ee&f  de%ee&f fd)d*Z+				dd'ee* d(ee%e,e,f  d+ee, d,ee	e  dee%ee,f  f
d-d.Z-d/edefd0d1Z.d2ee*ef d(e%e,e,f d+e,d3e,d,e	e ddfd4d5Z/d'ee*ef d(e,d+e,d,e	e ddf
d6d7Z0d+e,d2e,d3e,ddfd8d9Z1dd:ede%ee,f fd;d<Z2d2ee3eef de	e
 fd=d>Z4edede&e5e6f fd?d@Z7e7j8dAedee&e
f ddfdBd@Z7ede9fdCdDZ:	ddEee dFee, de9fdGdHZ;edee( fdIdJZ<ededee=ef dee fdKdLZ>dMe
dee fdNdOZ?dPe&dee fdQdRZ@dSedTee	eeAede=ef   de&fdUdVZBdEedee& fdWdXZCede	e
 fdYdZZDede	e fd[d\ZEedee fd]d^ZFedeeG fd_d`ZH				ddaedbedee
f dcee%ee,f  dee ddf
dddeZI	ddMeee
ef dfeddfdgdhZJdiedjedee fdkdlZKdmede%eef fdndoZLedeeM fdpdqZNeedefdrdsZOedee%ee,f  fdtduZPedeQee	eR f fdvdwZSedeTeUddf fdxdyZVde	e fdzd{ZWd|ede	eR fd}d~ZX	ddee de%eeeRe	eR f f fddZYe		ddedeZe f dedeZe f de%ee,f fddZ[dS )PdfDocCommonzm
    Common functions from PdfWriter and PdfReader objects.

    This root class is strongly abstracted.
    FstrictNflattened_pages_encryption	_readonlyr<   c                 C      d S rD   r@   rF   r@   r@   rA   root_object     zPdfDocCommon.root_objectc                 C   rw   rD   r@   rF   r@   r@   rA   
pdf_header  ry   zPdfDocCommon.pdf_headerindirect_referencec                 C   rw   rD   r@   rG   r{   r@   r@   rA   rP   !     zPdfDocCommon.get_objectindirectobjc                 C   rw   rD   r@   )rG   r~   r   r@   r@   rA   _replace_object'     zPdfDocCommon._replace_objectc                 C   rw   rD   r@   rF   r@   r@   rA   _info+  ry   zPdfDocCommon._infoc                 C   s$   t  }| jdu r
dS || j |S )a  
        Retrieve the PDF file's document information dictionary, if it exists.

        Note that some PDF files use metadata streams instead of document
        information dictionaries, and these metadata streams will not be
        accessed by this function.
        N)rC   r   update)rG   rL   r@   r@   rA   metadata0  s
   	
zPdfDocCommon.metadatac                 C   rw   rD   r@   rF   r@   r@   rA   xmp_metadata?  r   zPdfDocCommon.xmp_metadatac                 C   sf   | j tjd}|du rdS | }t|ts1t|}t|dr)| |j	| |S || j t
tj< |S )zCReturns the existing ViewerPreferences as an overloaded dictionary.Nr{   )rx   rI   CDZVIEWER_PREFERENCESrP   rJ   r3   hasattrr   r{   r-   )rG   or@   r@   rA   viewer_preferencesC  s   

zPdfDocCommon.viewer_preferencesc                 C   sB   | j r
| jd d S | jdu r| | j | jdusJ t| jS )z
        Calculate the number of pages in this PDF file.

        Returns:
            The number of pages of the parsed PDF file.

        Raises:
            PdfReadError: If restrictions prevent this action.

        /Pages/CountN)is_encryptedrx   rt   _flattenrv   lenrF   r@   r@   rA   get_num_pagesR  s   

zPdfDocCommon.get_num_pagespage_numberc                 C   s2   | j du r| | j | j dusJ d| j | S )a?  
        Retrieve a page by number from this PDF file.
        Most of the time ``.pages[page_number]`` is preferred.

        Args:
            page_number: The page number to retrieve
                (pages begin at zero)

        Returns:
            A :class:`PageObject<pypdf._page.PageObject>` instance.

        Nzhint for mypy)rt   r   rv   )rG   r   r@   r@   rA   get_pageg  s   

zPdfDocCommon.get_pagec                    sb   t t| jd dtdtdttt tf f fddd\}}t|ts-J d||fS )	z
        Retrieve the node and position within the /Kids containing the page.
        If page_number is greater than the number of pages, it returns the top node, -1.
        r   nodemir<   c                    s   t t| dd}| d dkr |kr| dfS d |d fS  | |kr1| kr+dfS d || fS tt t| d D ](\}}t t| }||\}}|d ur`|dk rZ| |f  S ||f  S |}q:td)	Nr   r   z/Type/Page/Kidsr   z"Unexpectedly cannot find the node.)r   intrI   	enumerater#   r'   rP   r"   )r   r   maidxkidnir   recursive_calltopr@   rA   r     s$   z6PdfDocCommon._get_page_in_node.<locals>.recursive_callr   Zmypy)r   r'   rx   r   r   r   r0   rJ   )rG   r   r   r   r@   r   rA   _get_page_in_nodey  s   zPdfDocCommon._get_page_in_nodec                 C      |   S )z8A read-only dictionary which maps names to destinations.)_get_named_destinationsrF   r@   r@   rA   named_destinations  s   zPdfDocCommon.named_destinationsc                 C   s6  t  }tj| jv rnt| jtj trntt| jtj }|j}tj|v rQt|tj trQtt|tj }|j}tj|v rEtt |tj }|S t  }||t	tj< |S t
| drlt }| |}||t	tj< ||t	tj< |S t
| drt }| |}|| jt	tj< t }| |}||t	tj< ||t	tj< |S )N_add_object)r#   CANAMESrx   rJ   r'   r   r{   DESTSr-   r   r   )rG   Z
named_destnamesZ	names_refZdestsZ	dests_refr@   r@   rA   get_named_dest_root  s<   





z PdfDocCommon.get_named_dest_roottreerL   c                 C   s  |du r1i }| j }tj|v rtt|tj }ntj|v r1tt|tj }tj|v r1tt|tj }|du r7|S tj|v rQtt	|tj D ]
}| 
| | qD|S tj|v rtt|tj }d}|t|k r||  }|d7 }t|ttfsxq`t|}z||  }	W n
 ty   Y |S w |d7 }t|	trd|	v r|	d }	nq`| ||	}
|
dur|
||< |t|k sf|S | D ]'\}}| }t|trd|v r|d  }nq| ||}
|
dur|
||< q|S )a  
        Retrieve the named destinations present in the document.

        Args:
            tree: The current tree.
            retval: The previously retrieved destinations for nested calls.

        Returns:
            A dictionary which maps names to destinations.

        Nr   r   /D)rx   r   r   r   r2   r   r'   PAKIDSr#   r   rP   r   rJ   bytesrK   
IndexError_build_destinationitems)rG   r   rL   catalogr   r   r   Zoriginal_keyrH   valuedestZk__Zv__valr@   r@   rA   r     sf   



#



z$PdfDocCommon._get_named_destinationsfileobjstackc           
         s   t  }|t  |du r(i }| j}g }tj|v r&ttt	 |tj  ndS  du r.|S |dus4J d v rStt
 d }|D ]}| }	| |	|||| qA|S t fdd|D rg|  |||| |S )a  
        Extract field data if this PDF contains interactive form fields.

        The *tree*, *retval*, *stack* parameters are for recursive use.

        Args:
            tree: Current object to parse.
            retval: In-progress list of fields.
            fileobj: A file object (usually a text file) to write
                a report to on all interactive form fields found.
            stack: List of already parsed objects.

        Returns:
            A dictionary where each key is a field name, and each
            value is a :class:`Field<pypdf.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.

        Nz/Fieldsc                 3   s    | ]}| v V  qd S rD   r@   .0attrr   r@   rA   	<genexpr>=      z*PdfDocCommon.get_fields.<locals>.<genexpr>)FAZattributes_dictr   r   rx   r   Z	ACRO_FORMr   r   r2   r#   rP   _build_fieldany)
rG   r   rL   r   r   field_attributesr   fieldsffieldr@   r   rA   
get_fields  s*   
zPdfDocCommon.get_fieldsparentc                 C   sZ   d|v rt t|d S d|v r$| t t|d d t t|dd S t t|ddS )N/TM/Parent./T )r   rK   _get_qualified_field_namer'   rI   )rG   r   r@   r@   rA   r   B  s   z&PdfDocCommon._get_qualified_field_namer   r   c                    s  t  fdddD rd S |  }|r | | | |d t ||< || j }|tj	ddkrC|t
tj || t
d< |tj	ddkrxd	|v rxtt|d	 d
  || t
d< d|| d vrw|| t
d t
d ns|tj	ddkr|tjdtjj@ dkrg }t||| t
d< |tji D ]&}	|	 }	t|	d	 d
  D ]}
|
|vr||
 qt||| t
d< q|tjdtjj@ dkrd|| d v r|| d || d d= |  ||| d S )Nc                 3   s    | ]}| vV  qd S rD   r@   r   r   r@   rA   r   W  r   z,PdfDocCommon._build_field.<locals>.<genexpr>)r   r   
r   /Chz	/_States_/Btnz/APz/Nz/Offr   )allr   _write_fieldwriter)   r{   rP   rI   r   FTr-   ZOptr#   listkeysappendZFfZFfBitsZRadioKidsZNoToggleToOffindex_check_kids)rG   r   rL   r   r   r   rH   r   Zstatesksr@   r   rA   r   O  s>   

,
zPdfDocCommon._build_fieldc                 C   sf   ||v rt | | dt d S || tj|v r/|tj D ]}| }| |||| q d S d S )Nz already parsed)r   r   rm   r   r   r   rP   r   )rG   r   rL   r   r   r   r@   r@   rA   r   y  s   

zPdfDocCommon._check_kidsc           	   	   C   s  t  }|t  }|D ]t}|t jt jfv rq|| }z\|t jkr>ddddd}|| |v r=|| d|||   d n8|t jkriz	|| t j }W n t	y\   || t j
 }Y nw || d| d n|| d||  d W q t	y   Y qw d S )NZButtonTextZChoice	Signature)r   /Txr   z/Sig: r   )r   
attributesr   r   ZAAr   r   ZParentZTMKeyErrorT)	rG   r   r   r   Zfield_attributes_tupler   	attr_nametypesnamer@   r@   rA   r     sB   


zPdfDocCommon._write_fieldfull_qualified_namec                 C   s   dt dtttf dt fdd}|  }|du ri S i }| D ]$\}}|ddkrC|r4|d	||< q|d	||tt |d
 |< q|S )a  
        Retrieve form fields from the document with textual data.

        Args:
            full_qualified_name: to get full name

        Returns:
            A dictionary. The key is the name of the form field,
            the value is the content of the field.

            If the document contains multiple form fields with the same name, the
            second and following will get the suffix .2, .3, ...

        r   r   r<   c                    s2    |vr S  d t t fdd|D d  S )Nr   c                 3   s"    | ]}|  d  rdV  qdS )r   r   N)
startswith)r   kkr   r@   rA   r     s     zIPdfDocCommon.get_form_text_fields.<locals>.indexed_key.<locals>.<genexpr>   )rK   sum)r   r   r@   r   rA   indexed_key  s   z6PdfDocCommon.get_form_text_fields.<locals>.indexed_keyN/FTr   z/Vr   )rK   r   r   r   r   rI   r   )rG   r   r   Z
formfieldsffr   r   r@   r@   rA   get_form_text_fields  s   
z!PdfDocCommon.get_form_text_fieldsc              
      s&  dt dtdtf fdd z
tt j W n ty( } ztd|d}~ww t dr4td	g }	d
ddkrUdv rJd  g}n@fddj
D }n5	dd}|D ], 	d
ddkrdvrdv r}|d  g7 }q]|fddj
D 7 }q]fdd|D S )a  
        Provides list of pages where the field is called.

        Args:
            field: Field Object, PdfObject or IndirectObject referencing a Field

        Returns:
            List of pages:
                - Empty list:
                    The field has no widgets attached
                    (either hidden field or ancestor field).
                - Single page list:
                    Page where the widget is present
                    (most common).
                - Multi-page list:
                    Field with multiple kids widgets
                    (example: radio buttons, field repeated on multiple pages).

        r   rH   r<   c                    s4   || v r| | S d| v r t t| d  |S d S )Nr   )r   r'   rP   )r   rH   )_get_inheritedr@   rA   r     s   z<PdfDocCommon.get_pages_showing_field.<locals>._get_inheritedzField type is invalidNr   zField is not validz/Subtyper   z/Widgetz/Pc                    "   g | ]} j |d dv r|qS z/Annotsr   r{   rI   r   pr   r@   rA   
<listcomp>  
    z8PdfDocCommon.get_pages_showing_field.<locals>.<listcomp>r   r@   r   c                    r   r   r   r   r   r@   rA   r     r   c                    s,   g | ]}t |tr|n j |j qS r@   )rJ   r   pages_get_page_number_by_indirectr{   )r   xrF   r@   rA   r     s    )r'   rK   r   r   r{   rP   	Exception
ValueErrorr5   rI   r   )rG   r   excretZkidsr@   )r   r   r   rG   rA   get_pages_showing_field  s:   	


z$PdfDocCommon.get_pages_showing_fieldc              
   C   s   d| j vrdS | j d }t|tr| }t|trt|S t|trMz|^}}}t|t|}t	d||W S  t
yL } z
t
d| d| d}~ww dS )z
        Property to access the opening destination (``/OpenAction`` entry in
        the PDF catalog). It returns ``None`` if the entry does not exist
        or is not set.

        Raises:
            Exception: If a destination is invalid.

        z/OpenActionNZ
OpenActionzInvalid Destination r   )rx   rJ   r   decoderK   r4   r#   r*   tupler&   r   )rG   Zoapagetyparrayfitr   r@   r@   rA   open_destination  s"   





zPdfDocCommon.open_destinationr   c                 C   s   t d)NzNo setter for open_destination)NotImplementedError)rG   r   r@   r@   rA   r  ;  s   c                 C   r   )z
        Read-only property for the outline present in the document
        (i.e., a collection of 'outline items' which are also known as
        'bookmarks').
        )_get_outlinerF   r@   r@   rA   outline?  s   zPdfDocCommon.outliner   r  c                 C   s   |d u r1g }| j }tj|v r,tt|tj }t|tr|S t|s,d|v r,tt|d }|  | _	|d u r7|S 	 | 
|}|rD|| d|v r\g }| tt|d | |r\|| d|vrc	 |S tt|d }q8)Nz/FirstTz/Next)rx   COZOUTLINESr   r'   rJ   r.   r5   r   _named_destinations_build_outline_itemr   r
  )rG   r   r  r   linesZoutline_objZsub_outliner@   r@   rA   r
  H  s4   





zPdfDocCommon._get_outlinec                 C   s$   | j }tj|v rtd|tj S dS )uN  
        Read-only property for the list of threads.

        See §12.4.3 from the PDF 1.7 or 2.0 specification.

        It is an array of dictionaries with "/F" (the first bead in the thread)
        and "/I" (a thread information dictionary containing information about
        the thread, such as its title, author, and creation date) properties or
        None if there are no articles.

        Since PDF 2.0 it can also contain an indirect reference to a metadata
        stream containing information about the thread, such as its title,
        author, and creation date.
        r#   N)rx   r  ZTHREADSr   )rG   r   r@   r@   rA   threadsq  s   
zPdfDocCommon.threadsc                 C   rw   rD   r@   r|   r@   r@   rA   r     r}   z)PdfDocCommon._get_page_number_by_indirectr  c                 C      |  |jS )a  
        Retrieve page number of a given PageObject.

        Args:
            page: The page to get page number. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`

        Returns:
            The page number or None if page is not found

        )r   r{   )rG   r  r@   r@   rA   get_page_number  s   zPdfDocCommon.get_page_numberdestinationc                 C   r  )z
        Retrieve page number of a given Destination object.

        Args:
            destination: The destination to get page number.

        Returns:
            The page number or None if page is not found

        )r   r  )rG   r  r@   r@   rA   get_destination_page_number  s   z(PdfDocCommon.get_destination_page_numberrQ   r  c                 C   s   d\}}t |ttfst |trt|dks|d u r%t }t||t S |^}}}zt||t||dW S  tyd   t	d| d| t
 | jrK | jd j}|d u rXt n|}t||t  Y S w )NNNr   )Zfit_typeZfit_argszUnknown destination:  )rJ   r.   rK   r#   r   r&   r*   r  r!   r   rm   rs   r   r{   )rG   rQ   r  r  r  tmpr{   r@   r@   rA   r     s(   	
zPdfDocCommon._build_destinationc                 C   s  d\}}}z	t d|d }W n ty#   | jrtd|d}Y nw d|v rQt t|d }t t|tj }|dkrPtj|v rF|tj }n | jrPtd|nd	|v rf|d	 }t	|trfd
|v rf|d
 }t	|t
rr| ||}nEt	|trz| || j| j}W n3 ty   | |d }Y n%w |d u r| ||}n| jrtd|td|dt | |d }|rd|v rt
dd |d D |td< d|v r|d |td< d|v r|d |td< t|dddk|td< ||_z|j|_W |S  ty   Y |S w )N)NNNrK   z/Titlez(Outline Entry Missing /Title attribute: r   z/Az/GoToz%Outline Action Missing /D attribute: z/Destr   zUnexpected destination zRemoved unexpected destination z from destinationz/Cc                 s   s    | ]}t |V  qd S rD   )r+   )r   cr@   r@   rA   r     r   z3PdfDocCommon._build_outline_item.<locals>.<genexpr>z/Fr   r   z
/%is_open%)r   r   rs   r!   r'   r-   r   SDrJ   r#   r   rK   r  Z
dest_arrayr   rm   r$   rI   r   r{   AttributeError)rG   r   r   rQ   Zoutline_itemactionZaction_typer@   r@   rA   r    st   





z PdfDocCommon._build_outline_itemc                 C   s   t | j| jS )aW  
        Property that emulates a list of :class:`PageObject<pypdf._page.PageObject>`.
        This property allows to get a page or a range of pages.

        Note:
            For PdfWriter only: Provides the capability to remove a page/range of
            page from the list (using the del operator). Remember: Only the page
            entry is removed, as the objects beneath can be used elsewhere. A
            solution to completely remove them - if they are not used anywhere - is
            to write to a buffer/temporary file and then load it into a new
            PdfWriter.

        )r   r   r   rF   r@   r@   rA   r     s   zPdfDocCommon.pagesc                    s    fddt t jD S )z
        A list of labels for the pages in this document.

        This property is read-only. The labels are in the order that the pages
        appear in the document.
        c                    s   g | ]}t  |qS r@   )page_index2page_label)r   r   rF   r@   rA   r   +  s    z,PdfDocCommon.page_labels.<locals>.<listcomp>)ranger   r   rF   r@   rF   rA   page_labels#  s   zPdfDocCommon.page_labelsc                 C   s*   z
t t| jtj W S  ty   Y dS w )a  
        Get the page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        N)r   r-   rx   r   ZPAGE_LAYOUTr   rF   r@   r@   rA   page_layout-  s
   zPdfDocCommon.page_layoutc                 C   s"   z| j d W S  ty   Y dS w )a2  
        Get the page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        z	/PageModeN)rx   r   rF   r@   r@   rA   	page_modeI  s
   zPdfDocCommon.page_mode	list_onlyr   inheritc              	   C   s~  t tjt tjt tjt tjf}|du ri }|du r2| j}|d }t	|t
s/tdg | _tj|v r@tt|tj }n
tj|vrHd}nd}|dkr|D ]}||v r\|| ||< qPtt|tj D ],}	i }
t	|	trr|	|
d< |	 }|rz| j|||fi |
 W qe ty   tdw qedS |dkr| D ]\}}||vr|||< qt| |}|s|| | j| dS dS )a  
        Process the document pages to ease searching.

        Attributes of a page may inherit from ancestor nodes
        in the page tree. Flattening means moving
        any inheritance data into descendant nodes,
        effectively removing the inheritance dependency.

        Note: It is distinct from another use of "flattening" applied to PDFs.
        Flattening a PDF also means combining all the contents into one single layer
        and making the file less editable.

        Args:
            list_only: Will only list the pages within _flatten_pages.
            pages:
            inherit:
            indirect_reference: Used recursively to flatten the /Pages object.

        Nr   zInvalid object in /Pagesr   r{   z7Maximum recursion depth reached during page flattening.)r-   PGZ	RESOURCESZMEDIABOXZCROPBOXZROTATErx   rI   rP   rJ   r'   r!   rt   r   ZTYPEr   rK   r   r#   r,   r   RecursionErrorr   r   r   r   )rG   r"  r   r#  r{   Zinheritable_page_attributesr   tr   r  Zaddtr   Zattr_inr   Zpage_objr@   r@   rA   r   c  s`   





zPdfDocCommon._flattencleanc                 C   s   | j du r| | j | j dusJ t|tr)| }t|ts'tdt dS |}t|t	sFz| j 
|}W n tyE   tdt Y dS w d|  krSt| j k s[n tdt dS | j| j}| j|= |rt|durv| |t  dS dS dS )a  
        Remove page from pages list.

        Args:
            page:
                * :class:`int`: Page number to be removed.
                * :class:`~pypdf._page.PageObject`: page to be removed. If the page appears many times
                  only the first one will be removed.
                * :class:`~pypdf.generic.IndirectObject`: Reference to page to be removed.

            clean: replace PageObject with NullObject to prevent annotations
                or destinations to reference a detached page.

        Nz(IndirectObject is not referencing a pagezCannot find page in pagesr   zPage number is out of range)rt   r   rv   rJ   r,   rP   r   r   rm   r   r   r   r   r   r{   r   r.   )rG   r  r'  r   indr@   r@   rA   remove_page  s0   






zPdfDocCommon.remove_pagenumgenc                 C   s   t |||  S )a0  
        Used to ease development.

        This is equivalent to generic.IndirectObject(num,gen,self).get_object()

        Args:
            num: The object number of the indirect object.
            gen: The generation number of the indirect object.

        Returns:
            A PdfObject

        )r,   rP   )rG   r*  r+  r@   r@   rA   _get_indirect_object  s   z!PdfDocCommon._get_indirect_objectpermissions_codec              	      sJ   t dddd tjtjtjtjtjtjtjtj	d} fdd|
 D S )z>Take the permissions as an integer, return the allowed access.decode_permissionsuser_access_permissionsz5.0.0)Zold_namenew_nameZ
removed_in)printmodifycopyannotationsZformsZaccessabilityassembleZprint_high_qualityc                    s   i | ]\}}| |@ d kqS )r   r@   )r   rH   flagr-  r@   rA   
<dictcomp>  s    z3PdfDocCommon.decode_permissions.<locals>.<dictcomp>)r   r   ZPRINTZMODIFYZEXTRACTZADD_OR_MODIFYZFILL_FORM_FIELDSZEXTRACT_TEXT_AND_GRAPHICSZASSEMBLE_DOCZPRINT_TO_REPRESENTATIONr   )rG   r-  Zpermissions_mappingr@   r7  rA   r.    s"   
zPdfDocCommon.decode_permissionsc                 C   s   | j du rdS t| j jS )zWGet the user access permissions for encrypted documents. Returns None if not encrypted.N)ru   r   PrF   r@   r@   rA   r/    s   
z$PdfDocCommon.user_access_permissionsc                 C      dS )z
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        Nr@   rF   r@   r@   rA   r     s   	zPdfDocCommon.is_encryptedc           
      C   s   d }i }| j }d|vs|d sd S tt|d }d|v rLtt|d }t|}|D ]"}|}t|}t|trKttt	 |
 }|rKt|j}	|	||< q)|S )Nz	/AcroFormz/XFA)rx   r   r2   r#   iternextrJ   r,   r   r(   rP   zlib
decompress_data)
rG   r   rL   r   r   r   r   tagr   esr@   r@   rA   xfa  s&   
zPdfDocCommon.xfac                    s   t  fdd  D S )z1Mapping of attachment filenames to their content.c                    s   i | ]}| j |fqS r@   )_get_attachment_list)r   r   rF   r@   rA   r8  9  s    
z,PdfDocCommon.attachments.<locals>.<dictcomp>)LazyDict_list_attachmentsrF   r@   rF   rA   attachments5  s
   
zPdfDocCommon.attachmentsc                 c   s    t | jE dH  dS )zIterable of attachment objects.N)r6   _loadrx   rF   r@   r@   rA   attachment_list?  s   zPdfDocCommon.attachment_listc                 C   s>   g }| j D ]}||j |j }|jkr|r|| q|S )zw
        Retrieves the list of filenames of file attachments.

        Returns:
            list of filenames

        )rH  r   r   alternative_name)rG   r   entryr   r@   r@   rA   rE  D  s   

zPdfDocCommon._list_attachmentsr   c                 C   s"   |  || }t|tr|S |gS rD   )_get_attachmentsrJ   r   )rG   r   outr@   r@   rA   rC  S  s   
z!PdfDocCommon._get_attachment_listfilenamec                 C   s   i }| j D ]S}t }|j}|dur)||j|hv r(||jkr |jn|}|| nq|j|h}|D ]'}|du r7q0||v rRt|| tsI|| g||< || |j q0|j||< q0q|S )a  
        Retrieves all or selected file attachments of the PDF as a dictionary of file names
        and the file data as a bytestring.

        Args:
            filename: If filename is None, then a dictionary of all attachments
                will be returned, where the key is the filename and the value
                is the content. Otherwise, a dictionary with just a single key
                - the filename - and its content will be returned.

        Returns:
            dictionary of filename -> Union[bytestring or List[ByteString]]
            If the filename exists multiple times a list of the different versions will be provided.

        N)	rH  setrI  r   addrJ   r   r   content)rG   rM  rF  rJ  r   rI  r   r@   r@   rA   rK  Y  s(   

	zPdfDocCommon._get_attachmentsincludeexcludec                 C   r:  )z
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to its
        representation.

        .. seealso::

            https://ipython.readthedocs.io/en/stable/config/integrating.html
        Nr@   )rG   rQ  rR  r@   r@   rA   _repr_mimebundle_  s   zPdfDocCommon._repr_mimebundle_r  )NNNN)F)FNNNrD   )\rm   rn   ro   rp   rs   bool__annotations__rt   r   r	   r   ru   r   rv   rq   r   r'   rx   rK   rz   r   r   r,   r0   rP   r   r   rC   r   r9   r   r3   r   r   r   r   r   r   r&   r   r#   r   r2   r   r   r   r   r   r   r   r   r)   r  r1   r%   r  setterr7   r  r
  r  r.   r   r  r  r/   r   r  r   r  r   r8   r!  r   r)  r,  r.  r   r/  r   rB  r
   r   rF  r   r6   rH  rE  rC  rK  r   rS  r@   r@   r@   rA   rr     s  
 


%%


L

2


*

%&
F 	
)
M	
T
,

		
*
rr   c                   @   sb   e Zd ZdededdfddZdedefdd	Zdee fd
dZde	fddZ
defddZdS )rD  argskwargsr<   Nc                 O   s   t |i || _d S rD   )dict	_raw_dict)rG   rW  rX  r@   r@   rA   rE        zLazyDict.__init__rH   c                 C   s   | j |\}}||S rD   )rZ  __getitem__)rG   rH   funcargr@   r@   rA   r\    s   zLazyDict.__getitem__c                 C   
   t | jS rD   )r;  rZ  rF   r@   r@   rA   __iter__     
zLazyDict.__iter__c                 C   r_  rD   )r   rZ  rF   r@   r@   rA   __len__  ra  zLazyDict.__len__c                 C   s   dt |   dS )NzLazyDict(keys=))r   r   rF   r@   r@   rA   __str__  r[  zLazyDict.__str__)rm   rn   ro   r   rE   rK   r\  r   r`  r   rb  rd  r@   r@   r@   rA   rD    s    rD  )Sr>   r=  abcr   r   typingr   r   r   r   r   r	   r
   r   r   r   r   ru   r   Z_pager   r   Z_page_labelsr   r  Z_utilsr   r   r   	constantsr   r   r   r   r   r   r   r   r  r   rN   r   r   r   r$  r    r   errorsr!   r"   Zgenericr#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   Zgeneric._filesr6   r   r7   r8   Zxmpr9   r   r   rB   rC   rr   rD  r@   r@   r@   rA   <module>   sH   4T& !         