o
    ŀg,                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZ er\d dlmZ G dd deZdS )    )annotations)TYPE_CHECKINGN)using_pyarrow_string_dtype)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                      sT   e Zd ZdZd fddZddd	Zdd
dZdddZdddZdddZ	  Z
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                   s$   t  | || _|| _|   d S N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__ Z/var/www/html/myenv/lib/python3.10/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   %   s   zArrowParserWrapper.__init__c                 C  sN   | j d}|du rdn|| _| j d }t|trtdt| j d | _dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr    
isinstancedict
ValueErrorlistr!   )r   r    r!   r   r   r   r   ,   s   

zArrowParserWrapper._parse_kwdsc                 C  s  ddddddd}|  D ]\}}|| jv r'| j|dur'| j|| j|< q| j}t|tr4|g}nd}|| jd	< d
d | j  D | _| jd}|durt|rZ|| jd< n*|t	j
jkrfd| jd< n|t	j
jkrwddd}|| jd< n|t	j
jkrdd | jd< dd | j  D | _d| jd v | jd< | jdu rd| jv rdd | jd D | jd< | jdu | jdur| jn| jd | jd| _dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr!   
escapecharskip_blank_linesdecimal	quotecharNtimestamp_parsersc                 S  &   i | ]\}}|d ur|dv r||qS )N)	delimiterr,   r)   r*   r   .0option_nameoption_valuer   r   r   
<dictcomp>Y       z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>on_bad_linesinvalid_row_handlerr   strc                 S  s.   t jd| j d| j d| j tt d dS )Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnexpected_columnsactual_columnstextr   r	   )invalid_rowr   r   r   handle_warningk   s   
z?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningc                 S  s   dS )Nr?   r   )_r   r   r   <lambda>v   s    z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>c                 S  r3   )N)r'   r(   true_valuesfalse_valuesr+   r2   r   r5   r   r   r   r9   x   r:    strings_can_be_nullc                 S  s   g | ]}d | qS )fr   )r6   nr   r   r   
<listcomp>   s    
z;ArrowParserWrapper._get_pyarrow_options.<locals>.<listcomp>skiprows)autogenerate_column_names	skip_rowsr    )r   r=   )itemsr   r"   popdate_formatr#   r=   parse_optionscallabler   BadLineHandleMethodERRORWARNSKIPconvert_optionsheaderr    read_options)r   mappingpandas_namepyarrow_namerU   r;   rF   r   r   r   _get_pyarrow_options:   s^   


	
z'ArrowParserWrapper._get_pyarrow_optionsframer   c              
     s  t  j}d}| jdu r7| jdu r| jdu rt|| _t | j|kr3tt|t | j | j | _d}| j _|  j \} | jdur| j }t	| jD ]S\}}t
|r_ j| ||< n| jvrltd| d| jdur| j|dur|| j|fn j| | j j| f\}}	|	dur | |	 |< | j|= qO j|ddd | jdu r|sdgt  jj  j_| jdurt| jtrև fdd| j D | _nt| j| _z	 | j W  S  ty }
 zt|
d}
~
ww  S )	z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)dropinplacec                   s$   i | ]\}}| j v r|t|qS r   )columnsr
   )r6   kvrc   r   r   r9      s
    
z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>)lenrf   r]   namesranger&   _do_date_conversions	index_colcopy	enumerater   r%   dtyper"   astype	set_indexindexr#   r$   rS   r
   	TypeError)r   rc   num_colsmulti_index_namedrG   index_to_setiitemkey	new_dtypeer   ri   r   _finalize_pandas_output   sX   











z*ArrowParserWrapper._finalize_pandas_outputc                 C  s8   t |rtdd |D stdt|rtdd S )Nc                 s      | ]}t |tV  qd S r   r#   r=   r6   xr   r   r   	<genexpr>   s    z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr%   rW   )r   r-   r   r   r   _validate_usecols   s   z$ArrowParserWrapper._validate_usecolsc              
   C  s  t d}t d}|   z|jdi | j}W n2 tyI   | jdd}|dur.| | | jdt }t	|rDt
dd |D sHtd w z|j| j|jdi | j|jdi | j|d	}W n |jyv } zt||d}~ww | jd
 }|tju r|j}	| }
t|jjD ]\}}|j|r|	||	||
}	q||	}|dkr|jtjd}n&|dkrt  }t! ||" < |j|jd}nt# r|jt$ d}n| }| %|S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csvr'   Nr(   c                 s  r   r   r   r   r   r   r   r      s    

z*ArrowParserWrapper.read.<locals>.<genexpr>z9The 'pyarrow' engine requires all na_values to be strings)r^   rV   r\   dtype_backend)types_mappernumpy_nullabler   )&r   rb   ConvertOptionsr\   ru   r"   r   setr   r   r   read_csvr   ReadOptionsr^   ParseOptionsrV   ArrowInvalidr   r   
no_defaultschemafloat64rp   typesis_nullfield	with_typecast	to_pandaspd
ArrowDtyper   
Int64Dtypenullr   r   r~   )r   papyarrow_csvr\   includenullstabler}   r   
new_schemanew_typery   
arrow_typerc   dtype_mappingr   r   r   read   sd   






zArrowParserWrapper.read)r   r   r   r   )r   r   )rc   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r   rb   r~   r   r   __classcell__r   r   r   r   r       s    


[
Ir   )
__future__r   typingr   r@   pandas._configr   pandas._libsr   pandas.compat._optionalr   pandas.errorsr   r   pandas.util._exceptionsr	   pandas.core.dtypes.commonr
   pandas.core.dtypes.inferencer   pandasr   r   pandas.io._utilr   r   pandas.io.parsers.base_parserr   pandas._typingr   r   r   r   r   r   <module>   s"    