B
    M]"                 @   s   d Z ddlmZmZmZmZ ddlmZ ddlZddl	Z	ddl
Z
ddlmZ ddlmZmZmZmZmZ e
jdk rdd	lmZmZ dd
lmZmZ nddlmZmZmZmZ dddZdd Zdd Ze de dddZ!dddZ"dS )z
Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports
the following items:

 - iri_to_uri()
 - uri_to_iri()
    )unicode_literalsdivisionabsolute_importprint_function)idnaN   )unwrap)byte_clsstr_cls	type_namebytes_to_list	int_types)   )urlsplit
urlunsplit)quoteunquote)r   unquote_to_bytesr   r   Fc             C   s$  t | tsttdt| d}tjdk r| ds| dsd}t	d| }|rp|
d}d| t|d  } t| }|r|| dd  } t|dd	 }nt| }|dkrt|j}|j}|dk	r|d
}t|jdd}t|jdd}|j}	|	dk	r
t|	d}	d}
|dk	r:|
|7 }
|r2|
d| 7 }
|
d7 }
|dk	rL|
|7 }
|	dk	r|dkof|	dk}|dkox|	dk}|r|s|s|
d|	 7 }
t|jdd}t|jdd}t|jdd}|r|dkr|dkr|dkrd}|dkrd}t||
|||f}t |tr |d}|S )z
    Encodes a unicode IRI into an ASCII byte string URI

    :param value:
        A unicode string of an IRI

    :param normalize:
        A bool that controls URI normalization

    :return:
        A byte string of the ASCII-encoded URI
    z@
            value must be a unicode string, not %s
            N)      zhttp://zhttps://z	^[^:]*://r   r   r   z!$&'()*+,;=)safeascii       :   @s   https   80s   httpss   443z/!$&'()*+,;=@:z/?!$&'()*+,;=@:   / latin1)
isinstancer
   	TypeErrorr   r   sysversion_info
startswithrematchgrouplenr   	_urlquoteschemehostnameencodeusernamepasswordportpathqueryfragmentr   )valueZ	normalizer)   Zreal_prefixZprefix_matchparsedr*   r,   r-   r.   netlocZdefault_httpZdefault_httpsr/   r0   r1   output r6   .lib/python3.7/site-packages/asn1crypto/_iri.py
iri_to_uri%   sd    







$

r8   c             C   s(  t | tsttdt| t| }|j}|dk	r<|d}t|j	ddgd}t|j
ddgd}|j}|rt|d}|j}|rt |ts|d}d}|dk	r||7 }|r|d| 7 }|d7 }|dk	r||7 }|dk	r|dt| 7 }t|jd	gd
d}t|jddgd
d}	t|j}
t||||	|
fS )z
    Converts an ASCII URI byte string into a unicode IRI

    :param value:
        An ASCII-encoded byte string of the URI

    :return:
        A unicode string of the IRI
    z=
            value must be a byte string, not %s
            Nr   :@)remapr   r   /T)r;   preserve&=)r   r	   r    r   r   r   r)   decode_urlunquoter,   r-   r*   r.   r   r
   r/   r0   r1   r   )r2   r3   r)   r,   r-   r*   r.   r4   r/   r0   r1   r6   r6   r7   
uri_to_irix   s<    




rB   c             C   s4   t | j| j| j }dd |D }d|| jfS )a>  
    Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte
    sequences encoded in %XX format, but as part of a unicode string.

    :param exc:
        The UnicodeDecodeError exception

    :return:
        A 2-element tuple of (replacement unicode string, integer index to
        resume at)
    c             S   s   g | ]}d | qS )z%%%02xr6   ).0Znumr6   r6   r7   
<listcomp>   s    z,_iri_utf8_errors_handler.<locals>.<listcomp>r   )r   objectstartendjoin)excZbytes_as_intsreplacementsr6   r6   r7   _iri_utf8_errors_handler   s    rK   iriutf8r   c                s   | dks| dkrdS g  t d| rXfdd}t d|| }  fdd}t d|| } t| d	d	d
}t|ts|d}t dkr fdd}t d||}|S )a  
    Quotes a unicode string for use in a URL

    :param string:
        A unicode string

    :param safe:
        A unicode string of character to not encode

    :return:
        None (if string is None) or an ASCII byte string of the quoted string
    Nr   z%[0-9a-fA-F]{2}c                sD   t | d}|dd}x$t D ]}||dt| }q$W |S )Nr   zutf-8rL   z%%%02x)r   r&   r@   listreplaceord)r%   byte_stringZunicode_stringZ	safe_char)r   r6   r7   _try_unescape   s
    z _urlquote.<locals>._try_unescapez(?:%[0-9a-fA-F]{2})+c                s     | dd dS )Nr   r    )appendr&   r+   )r%   )escapesr6   r7   _extract_escape   s    z"_urlquote.<locals>._extract_escapezutf-8)r   r   r   c                s
     dS )Nr   )pop)_)rT   r6   r7   _return_escape   s    z!_urlquote.<locals>._return_escapes   %00)r$   searchsuburlquoter+   r   r	   r'   )stringr   rQ   rU   r5   rX   r6   )rT   r   r7   r(      s    

r(   c       	      C   s   | dkr| S | dkrdS |rfdddddg}i }x6|D ].}| d	}|||< | |d
|d
} q4W t| } |rx,|D ]$}| |d
dt| d
} qxW | dd}|rx | D ]\}}|||}qW |S )a  
    Unquotes a URI portion from a byte string into unicode using UTF-8

    :param byte_string:
        A byte string of the data to unquote

    :param remap:
        A list of characters (as unicode) that should be re-mapped to a
        %XX encoding. This is used when characters are not valid in part of a
        URL.

    :param preserve:
        A bool - indicates that the chars to be remapped if they occur in
        non-hex form, should be preserved. E.g. / for URL path.

    :return:
        A unicode string
    Nr   r   r   r   z%%%02xzutf-8rL   )rV   rN   r+   r   rO   r@   items)	rP   r;   r=   rJ   Zpreserve_unmapcharZreplacementr5   Zoriginalr6   r6   r7   rA      s(    


$rA   )F)r   )NN)#__doc__Z
__future__r   r   r   r   Z	encodingsr   codecsr$   r!   Z_errorsr   Z_typesr	   r
   r   r   r   r"   Zurlparser   r   Zurllibr   r[   r   r   Zurllib.parser8   rB   rK   register_errorr(   rA   r6   r6   r6   r7   <module>	   s"   

S4
4