o
    ŀgD                     @  st  d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlZd d	lmZmZ d d
lmZ d dlmZmZ d dlmZmZmZmZ d dlm Z m!Z!m"Z" erhd dlm#Z# ej$ej%ej&ej'ej(ej)ej)dZ*ej&ej+dfej)ej,e
fej$ej-dfej%ej-dfej'ej-dfej.ej,dfej(ej/d fiZ0ej-dej+dej,diZ1G dd deZ2dS )    )annotations)TYPE_CHECKINGAnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd1d2d	d
Zd3ddZed3ddZed4ddZ	d4ddZ
edd Zedd Zed3ddZed5ddZd3ddZd6d7d#d$Zd8d&d'Zd9d)d*Zd:d,d-Zd;d/d0Zd S )<PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                 C  sN   t |tjrtd|j dt |tjstdt| d|| _|| _	dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepd	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfr    r"    r2   R/var/www/html/myenv/lib/python3.10/site-packages/pandas/core/interchange/column.py__init__T   s   
zPandasColumn.__init__intc                 C  s   | j jS )z2
        Size of the column, in elements.
        )r/   sizer1   r2   r2   r3   r6   h   s   zPandasColumn.sizec                 C     dS )z7
        Offset of first element. Always zero.
        r   r2   r7   r2   r2   r3   offsetn   s   zPandasColumn.offsettuple[DtypeKind, int, str, str]c                 C  s~   | j j}t|tjr!| j jj}| |j\}}}}tj	||t
jfS t|r:t| j dv r6tjdt|t
jfS td| |S )N)stringempty   z.Non-string object dtypes are not supported yet)r/   dtyper'   r(   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r-   )r1   r>   rA   _bitwidthc_arrow_dtype_f_strr2   r2   r3   r>   v   s.   


zPandasColumn.dtypec                 C  s   t |jd}|du rtd| dt|tr|jj}nt|tr'|j	j}nt|t
r1|jj}n|j}|dkr@||jtj|fS ||jd t||fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]r=   )	_NP_KINDSgetkind
ValueErrorr'   r
   numpy_dtype	byteorderr   baser	   itemsizer   BOOLr   )r1   r>   rL   rO   r2   r2   r3   rB      s"   





z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)r>   r   rC   r*   r/   catorderedr   r(   r,   rU   r7   r2   r2   r3   describe_categorical   s   z!PandasColumn.describe_categoricalc                 C  s   t | jjtrtj}d}||fS t | jjtr/| jjjj	d 
 d d u r*tjd fS tjdfS | jd }zt| \}}W ||fS  tyN   td| dw )N   r   rI   z not yet supported)r'   r/   r>   r	   r   USE_BYTEMASKr
   array	_pa_arraychunksbuffersNON_NULLABLEUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr-   )r1   column_null_dtype
null_valuerL   nullvaluer2   r2   r3   describe_null   s   


zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r/   isnasumitemr7   r2   r2   r3   
null_count   s   zPandasColumn.null_countdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r/   indexr7   r2   r2   r3   metadata   s   zPandasColumn.metadatac                 C  r8   )zE
        Return the number of chunks the column consists of.
        rY   r2   r7   r2   r2   r3   
num_chunks   s   zPandasColumn.num_chunksNn_chunks
int | Nonec                 c  sv    |r6|dkr6t | j}|| }|| dkr|d7 }td|| |D ]}t| jj|||  | jV  q"dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rY   r   N)lenr/   ranger   ilocr0   )r1   rp   r6   stepstartr2   r2   r3   
get_chunks   s   

zPandasColumn.get_chunksr   c                 C  s\   |   ddd}z|  |d< W n	 ty   Y nw z	|  |d< W |S  ty-   Y |S w )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsry   rz   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r1   r^   r2   r2   r3   get_buffers  s    zPandasColumn.get_buffers.tuple[Buffer, tuple[DtypeKind, int, str, str]]c           	      C  s  | j d tjtjtjtjtjfv ri| j }| j d tjkr/t| j d dkr/| jj	
d }n/| jj}t| jj tr>|j}n t| jj tr[|jjd }t| d t|d}||fS |j}t|| jd}||fS | j d tjkr| jjj}t|| jd}| |j }||fS | j d tjkr| j }t }|D ]}t|tr||j dd	 qtt!j"|d
d}| j }||fS t#d| jj  d)zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         NrY   length)r"   utf-8encodinguint8)r>   rI   r&   )$r>   r   INTUINTFLOATrR   DATETIMErr   r/   dt
tz_convertto_numpyr[   r'   r	   _datar
   r\   r]   r   r^   _ndarrayr   r0   rC   r@   _codesrB   rE   	bytearraystrextendencodenp
frombufferr-   )	r1   r>   np_arrarrbufferrA   bufr   objr2   r2   r3   r{   0  sN   	"



zPandasColumn._get_data_buffertuple[Buffer, Any] | Nonec                 C  s`  | j \}}t| jjtr7| jjjjd }tj	dt
j	tjf}| d du r'dS t| d t|d}||fS t| jjtrT| jjj}t|}tj	dt
j	tjf}||fS | jd tjkr| j }|dk}| }tjt|ftjd}t|D ]\}	}
t|
tr|n|||	< qwt|}tj	dt
j	tjf}||fS zt|  d}W t| ty   tdw )	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   rY   Nr   r=   shaper>   z! so does not have a separate maskzSee self.describe_null)rg   r'   r/   r>   r
   r[   r\   r]   r   rR   r   r   rD   r^   r   rr   r	   _maskr   rE   r   r   zerosbool_	enumerater   _NO_VALIDITY_BUFFERrb   r-   r   )r1   re   invalidr   r>   r   maskr   validr   r   msgr2   r2   r3   r|   n  s@   



z!PandasColumn._get_validity_buffertuple[PandasBuffer, Any]c           	      C  s   | j d tjkrM| j }d}tjt|d ftjd}t	|D ]\}}t
|tr5|jdd}|t|7 }|||d < q t|}tjdtjtjf}||fS td)a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rY   r   r   r   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r>   r   rE   r/   r   r   r   rr   int64r   r'   r   r   r   r   r   INT64r   rD   r   )	r1   r@   ptrrz   r   vr   r   r>   r2   r2   r3   r}     s&   

z PandasColumn._get_offsets_buffer)T)r    r!   r"   r#   r$   r%   )r$   r5   )r$   r:   )r$   rl   )N)rp   rq   )r$   r   )r$   r   )r$   r   )r$   r   )__name__
__module____qualname____doc__r4   r6   propertyr9   r   r>   rB   rX   rg   rk   rn   ro   rw   r~   r{   r|   r}   r2   r2   r2   r3   r   H   s.    

!




%
>9r   )3
__future__r   typingr   r   numpyr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr   pandas.util._decoratorsr   pandas.core.dtypes.dtypesr	   pandasr(   r
   r   pandas.api.typesr   pandas.core.interchange.bufferr   r   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   r   r   r   r   rR   rE   r   rJ   USE_NANUSE_SENTINELr_   rC   rZ   ra   r   r   r2   r2   r2   r3   <module>   sJ    