o
    ڪZe                     @   s   d dl Z d dlZd dlm  mZ d dlZd dlZd dlZddiZ	dd Z
dd Zdd	 Zdd
dZedkrKe
 ZeejejZejed dS dS )    Nwz<http://schemas.openxmlformats.org/wordprocessingml/2006/mainc                  C   s   t jdd} | jddd | jdddd |  }tj|js-td		|j t
d
 |jd urZtj|jsZz	t|j W |S  tyY   td	|j t
d
 Y |S w |S )NzGA pure python-based utility to extract text and images from docx files.)descriptiondocxzpath of the docx file)helpz-iz	--img_dirz#path of directory to extract imageszFile {} does not exist.   zUnable to create img_dir {})argparseArgumentParseradd_argument
parse_argsospathexistsr   printformatsysexitimg_dirmakedirsOSError)parserargs r   J/var/www/html/corbot_env/lib/python3.10/site-packages/docx2txt/docx2txt.pyprocess_args   s$   

r   c                 C   s"   |  d\}}t| }d||S )a  
    Stands for 'qualified name', a utility function to turn a namespace
    prefixed tag name into a Clark-notation qualified tag name for lxml. For
    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
    Source: https://github.com/python-openxml/python-docx/
    :z{{{}}}{})splitnsmapr   )tagprefixtagrooturir   r   r   qn&   s   r!   c                 C   s   d}t | }| D ]>}|jtdkr"|j}||dur|nd7 }q|jtdkr.|d7 }q|jtdtdfv r>|d7 }q|jtd	krI|d
7 }q|S )z
    A string representing the textual content of this run, with content
    child elements like ``<w:tab/>`` translated to their Python
    equivalent.
    Adapted from: https://github.com/python-openxml/python-docx/
     zw:tNzw:tab	zw:brzw:cr
zw:pz

)ET
fromstringiterr   r!   text)xmlr(   rootchildt_textr   r   r   xml2text2   s   


r-   c              	   C   s  d}t | }| }d}|D ]}t||r |t||7 }qd}|t||7 }d}|D ]}t||rA|t||7 }q0|d ur|D ]6}tj	|\}	}
|
dv r~tj
|tj|}t|d}||| W d    n1 syw   Y  qH|  | S )Nr"   zword/header[0-9]*.xmlzword/document.xmlzword/footer[0-9]*.xml)z.jpgz.jpegz.pngz.bmpwb)zipfileZipFilenamelistrematchr-   readr   r   splitextjoinbasenameopenwriteclosestrip)r   r   r(   zipffilelistheader_xmlsfnamedoc_xmlfooter_xmls_	extension	dst_fnamedst_fr   r   r   processH   s4   
rF   __main__zutf-8)N)r   r2   xml.etree.ElementTreeetreeElementTreer%   r/   r   r   r   r   r!   r-   rF   __name__r   r   r   r(   stdoutr9   encoder   r   r   r   <module>   s    
&