o
    ŀgw7                     @  s(  d dl mZ d dlmZ d dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZm Z m!Z! erd dl"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z- G dd deZ.dddZ/d ddZ0dS )!    )annotations)defaultdict)TYPE_CHECKINGN)libparsers)import_optional_dependency)DtypeWarning)find_stack_levelpandas_dtype)concat_compatunion_categoricals)CategoricalDtype)ensure_index_from_sequences)dedup_namesis_potential_multi_index)
ParserBaseParserErroris_index_col)HashableMappingSequence)	ArrayLikeDtypeArgDtypeObjReadCsvBuffer)Index
MultiIndexc                      sh   e Zd ZU ded< ded< d fd	d
Zd ddZd ddZ	d!d"ddZd#ddZd$d%ddZ	  Z
S )&CParserWrapperbool
low_memoryzparsers.TextReader_readersrcReadCsvBuffer[str]returnNonec                   s  t  | || _| }|dd| _| jdu|d< | j|d< | jj	|d< dD ]}||d  q(t
|dd |d< d|vsF|d tju rJd	|d< |d d
krTtd
 tj|fi || _| jj| _| jd u }| jjd u rrd | _n| | jj| j|\| _| _| _}| jd u rtt| jj| _| jd d  | _| jr| | j| j | jd usJ | jdkrt | js|   | j t!| jt! krՇ fddt"| jD | _t!| jt! k r|   | j | #| j | $  | j| _| j%s8| jj&dkrt'| jrd| _(| )| j| j\}| _| _| jd u r|| _| jjd u r8|s8| jd us/J d gt!| j | _| jj&dk| _*d S )Nr    Fallow_leading_colsusecolson_bad_lines)storage_optionsencoding
memory_mapcompressiondtypedtype_backendnumpypyarrowstringc                   $   g | ]\}}| v s| v r|qS  r3   ).0inr'   r3   V/var/www/html/myenv/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py
<listcomp>   s
    z+CParserWrapper.__init__.<locals>.<listcomp>r   T)+super__init__kwdscopypopr    	index_colr'   r(   valueensure_dtype_objsgetr   
no_defaultr   r   
TextReaderr!   unnamed_colsnamesheader_extract_multi_indexer_columnsindex_names	col_nameslistrangetable_width
orig_names_evaluate_usecolsusecols_dtypesetissubset_validate_usecols_nameslen	enumerate_validate_parse_dates_presence_set_noconvert_columns_has_complex_date_colleading_colsr   _name_processed_clean_index_names_implicit_index)selfr"   r<   keypassed_namesrI   	__class__r7   r8   r;   <   s   


	

zCParserWrapper.__init__c                 C  s&   z| j   W d S  ty   Y d S w N)r!   close
ValueError)r]   r3   r3   r8   rc      s
   zCParserWrapper.closec                   s^   | j dusJ dd t| j D   fdd| jD }| || j}|D ]}| j| q$dS )z
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        Nc                 S  s   i | ]\}}||qS r3   r3   )r4   r5   xr3   r3   r8   
<dictcomp>       z9CParserWrapper._set_noconvert_columns.<locals>.<dictcomp>c                   s   g | ]} | qS r3   r3   r4   re   
names_dictr3   r8   r9          z9CParserWrapper._set_noconvert_columns.<locals>.<listcomp>)rN   rU   rF   _set_noconvert_dtype_columnsr!   set_noconvert)r]   col_indicesnoconvert_columnscolr3   ri   r8   rW      s   z%CParserWrapper._set_noconvert_columnsNnrows
int | None_tuple[Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike]]c                   s  z| j r| j|}t|}n| j|}W nK tya   | jr\d| _t| jt	| j| j
}| j|| jd\} }|  | j | jd urJ|    fdd| D }| |f Y S |    w d| _| j}| jjr| jrstdg }| j
r| jjt| j
krtdt| j
 d| jj dt| jjD ]"}| j
d u r||}	n|| j
| }	| j|	|d	d
}	||	 qt|}| jd ur| |}t|t	|| j
}t| }
dd t||
D }|  ||\}}| || j}nTt| }
| jd usJ t!| j}t|t	|| j
}| jd ur| |}dd |
D }| jd u r*| "|| dd t||
D }|  ||\}}| #|||\}}|||fS )NFr-   c                   s   i | ]\}}| v r||qS r3   r3   )r4   kvcolumnsr3   r8   rf      s    z'CParserWrapper.read.<locals>.<dictcomp>z file structure not yet supportedz,Could not construct index. Requested to use z number of columns, but z left to parse.T)try_parse_datesc                 S     i | ]	\}\}}||qS r3   r3   r4   ru   r5   rv   r3   r3   r8   rf   .      c                 S  s   g | ]}|d  qS )   r3   rh   r3   r3   r8   r9   F  rk   z'CParserWrapper.read.<locals>.<listcomp>c                 S  rz   r3   r3   r{   r3   r3   r8   rf   J  r|   )$r    r!   read_low_memory_concatenate_chunksreadStopIteration_first_chunkr   rN   r   r?   _get_empty_metar-   _maybe_make_multi_index_columnsrJ   r'   _filter_usecolsitemsrc   rF   rY   rX   NotImplementedErrorrT   r   rL   r>   _maybe_parse_datesappendr   sortedzip_do_date_conversionsrK   _check_data_length_make_index)r]   rq   chunksdatarF   indexcol_dictarraysr5   values	data_tupscolumn_names	date_dataalldatar3   rw   r8   r      s   









zCParserWrapper.readrF   Sequence[Hashable]c                   s@   |  | j|  d urt|t kr fddt|D }|S )Nc                   r2   r3   r3   )r4   r5   namer7   r3   r8   r9   U  s    z2CParserWrapper._filter_usecols.<locals>.<listcomp>)rO   r'   rT   rU   )r]   rF   r3   r7   r8   r   Q  s   
zCParserWrapper._filter_usecolsTr   intry   c                 C  s4   |r|  |r| j|| jd ur| j| nd d}|S )N)rp   )_should_parse_dates
_date_convrI   )r]   r   r   ry   r3   r3   r8   r   Z  s   z!CParserWrapper._maybe_parse_dates)r"   r#   r$   r%   )r$   r%   rb   )rq   rr   r$   rs   )rF   r   r$   r   )T)r   r   ry   r   )__name__
__module____qualname____annotations__r;   rc   rW   r   r   r   __classcell__r3   r3   r`   r8   r   8   s   
  


s	r   r   list[dict[int, ArrayLike]]r$   dictc           
        s   t | d  }g }i }|D ]H  fdd| D }dd |D }dd |D }| }t|tr9t|dd| < qt|| < t|d	krV|  jt	t
krV|t  q|rqd
|}dd| dg}	tj|	tt d |S )z
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    r   c                   s   g | ]}|  qS r3   )r>   )r4   chunkr   r3   r8   r9   o  rg   z'_concatenate_chunks.<locals>.<listcomp>c                 S  s   h | ]}|j qS r3   rt   )r4   ar3   r3   r8   	<setcomp>q  s    z&_concatenate_chunks.<locals>.<setcomp>c                 S  s   h | ]	}t |ts|qS r3   )
isinstancer   rh   r3   r3   r8   r   r  r|   F)sort_categoriesr}   , z	Columns (zK) have mixed types. Specify dtype option on import or set low_memory=False.)
stacklevel)rK   keysr>   r   r   r   r   rT   r-   npobjectr   strjoinwarningswarnr   r	   )
r   rF   warning_columnsresultarrsdtypesnon_cat_dtypesr-   warning_nameswarning_messager3   r   r8   r   c  s,   
 

r   r-   *DtypeArg | dict[Hashable, DtypeArg] | None*DtypeObj | dict[Hashable, DtypeObj] | Nonec                   sx   t tr$t  t fdd} D ]
}t| ||< q|S t tr2fddD S dur:tS S )zc
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    c                     s    S rb   r3   r3   )default_dtyper3   r8   <lambda>  s    z#ensure_dtype_objs.<locals>.<lambda>c                   s   i | ]	}|t  | qS r3   r
   )r4   ru   rt   r3   r8   rf     r|   z%ensure_dtype_objs.<locals>.<dictcomp>N)r   r   r   default_factoryr   r   )r-   dtype_convertedr^   r3   )r   r-   r8   rA     s   

rA   )r   r   r$   r   )r-   r   r$   r   )1
__future__r   collectionsr   typingr   r   r/   r   pandas._libsr   r   pandas.compat._optionalr   pandas.errorsr   pandas.util._exceptionsr	   pandas.core.dtypes.commonr   pandas.core.dtypes.concatr   r   pandas.core.dtypes.dtypesr   pandas.core.indexes.apir   pandas.io.commonr   r   pandas.io.parsers.base_parserr   r   r   collections.abcr   r   r   pandas._typingr   r   r   r   pandasr   r   r   r   rA   r3   r3   r3   r8   <module>   s0      
-%