a
    Of,                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZ erd dlmZ G dd deZdS )    )annotations)TYPE_CHECKINGN)using_pyarrow_string_dtype)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                      sp   e Zd ZdZddd fddZdddd	Zddd
dZdddddZddddZddddZ	  Z
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    zReadBuffer[bytes]None)srcreturnc                   s$   t  | || _|| _|   d S N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__ _/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   %   s    zArrowParserWrapper.__init__r   c                 C  sN   | j d}|du rdn|| _| j d }t|tr:tdt| j d | _dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr    
isinstancedict
ValueErrorlistr!   )r   r    r!   r   r   r   r   ,   s    

zArrowParserWrapper._parse_kwdsc                 C  s  ddddddd}|  D ]4\}}|| jv r| j|dur| j|| j|< q| j}t|trh|g}nd}|| jd	< d
d | j  D | _| jd}|durt|r|| jd< nZ|t	j
jkrd| jd< nB|t	j
jkrdddd}|| jd< n|t	j
jkrdd | jd< dd | j  D | _d| jd v | jd< | jdu rld| jv rldd | jd D | jd< | jdu | jdur| jn| jd | jd| _dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr!   
escapecharZskip_blank_linesdecimal	quotecharNtimestamp_parsersc                 S  s&   i | ]\}}|d ur|dv r||qS )N)	delimiterr,   r)   r*   r   .0Zoption_nameZoption_valuer   r   r   
<dictcomp>Y   s   z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>on_bad_linesZinvalid_row_handlerstrr   c                 S  s.   t jd| j d| j d| j tt d dS )Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnZexpected_columnsZactual_columnstextr   r	   )Zinvalid_rowr   r   r   handle_warningk   s    
z?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningc                 S  s   dS )Nr9   r   )_r   r   r   <lambda>v       z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>c                 S  s&   i | ]\}}|d ur|dv r||qS )N)r'   r(   Ztrue_valuesZfalse_valuesr+   r1   r   r3   r   r   r   r5   x   s    Zstrings_can_be_nullc                 S  s   g | ]}d | qS )fr   )r4   nr   r   r   
<listcomp>   s   z;ArrowParserWrapper._get_pyarrow_options.<locals>.<listcomp>Zskiprows)Zautogenerate_column_namesZ	skip_rowsr    )itemsr   r"   popdate_formatr#   r7   parse_optionscallabler   ZBadLineHandleMethodERRORWARNZSKIPconvert_optionsheaderr    read_options)r   mappingZpandas_nameZpyarrow_namerG   r6   r=   r   r   r   _get_pyarrow_options:   s\    


	
z'ArrowParserWrapper._get_pyarrow_optionsr   )framer   c              
     s  t  j}d}| jdu rn| jdu r6| jdu r6t|| _t | j|krftt|t | j | j | _d}| j _|  j \} | jdur| j }t	| jD ]\}}t
|r j| ||< n| jvrtd| d| jdur| j|dur|| j|fn j| | j j| f\}}	|	dur | |	 |< | j|= q j|ddd | jdu r|sdgt  jj  j_| jdurt| jtr fdd| j D | _nt| j| _z | j W n. ty }
 zt|
W Y d}
~
n
d}
~
0 0  S )	z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)ZdropZinplacec                   s$   i | ]\}}| j v r|t|qS r   )columnsr
   )r4   kvrQ   r   r   r5      s   
z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>)lenrR   rM   namesranger&   Z_do_date_conversionsZ	index_colcopy	enumerater   r%   Zdtyper"   ZastypeZ	set_indexindexr#   r$   rE   r
   	TypeError)r   rQ   Znum_colsZmulti_index_namedr>   Zindex_to_setiitemkeyZ	new_dtypeer   rU   r   _finalize_pandas_output   sP    










z*ArrowParserWrapper._finalize_pandas_outputc                 C  s:   t |r&tdd |D s&tdnt|r6tdd S )Nc                 s  s   | ]}t |tV  qd S r   r#   r7   r4   xr   r   r   	<genexpr>   r@   z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr%   rI   )r   r-   r   r   r   _validate_usecols   s    z$ArrowParserWrapper._validate_usecolsc              
   C  s  t d}t d}|   z|jf i | j}W nh ty   | jdd}|dur\| | | jdt }t	|rt
dd |D std Y n0 z4|j| j|jf i | j|jf i | j|d	}W n0 |jy } zt||W Y d}~n
d}~0 0 | jd
 }|tju rj|j}	| }
t|jjD ]0\}}|j|r.|	||	||
}	q.||	}|dkr|jtjd}nP|dkrt  }t! ||" < |j|jd}n t# r|jt$ d}n| }| %|S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        Zpyarrowzpyarrow.csvr'   Nr(   c                 s  s   | ]}t |tV  qd S r   rb   rc   r   r   r   re      s   z*ArrowParserWrapper.read.<locals>.<genexpr>z9The 'pyarrow' engine requires all na_values to be strings)rN   rH   rL   dtype_backend)Ztypes_mapperZnumpy_nullable)&r   rP   ZConvertOptionsrL   r\   r"   rh   setr   rf   rg   Zread_csvr   ZReadOptionsrN   ZParseOptionsrH   ZArrowInvalidr   r   Z
no_defaultZschemaZfloat64rZ   typesZis_nullfieldZ	with_typecastZ	to_pandaspdZ
ArrowDtyper   Z
Int64Dtypenullr   r   ra   )r   paZpyarrow_csvrL   includeZnullstabler`   ri   Z
new_schemanew_typer]   Z
arrow_typerQ   Zdtype_mappingr   r   r   read   s\    

 



zArrowParserWrapper.read)__name__
__module____qualname____doc__r   r   rP   ra   rh   rt   __classcell__r   r   r   r   r       s   [Ir   )
__future__r   typingr   r:   Zpandas._configr   Zpandas._libsr   Zpandas.compat._optionalr   Zpandas.errorsr   r   Zpandas.util._exceptionsr	   Zpandas.core.dtypes.commonr
   Zpandas.core.dtypes.inferencer   Zpandasrn   r   Zpandas.io._utilr   r   Zpandas.io.parsers.base_parserr   Zpandas._typingr   r   r   r   r   r   <module>   s    