o
    ŀg%                     @  s  d Z ddlmZ ddlZddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZ erVdd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ dZ d-ddZ!dde dfd.d!d"Z"de fd/d%d&Z#de dfd0d(d)Z$de dfd1d+d,Z%dS )2z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKING)hash_object_array)is_list_like)CategoricalDtype)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)HashableIterableIterator)	ArrayLikenpt)	DataFrameIndex
MultiIndexSeries0123456789123456arraysIterator[np.ndarray]	num_itemsintreturnnpt.NDArray[np.uint64]c           	      C  s   zt | }W n ty   tjg tjd Y S w t|g| } td}t|td }d}t| D ]\}}|| }||N }||9 }|td| | 7 }|}q4|d |ks\J d|td7 }|S )	z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 r   iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschain
zeros_like	enumerate)	r   r   firstmultoutlast_iia	inverse_i r.   L/var/www/html/myenv/lib/python3.10/site-packages/pandas/core/util/hashing.pycombine_hash_arrays/   s$   
r0   Tutf8objIndex | DataFrame | Seriesindexboolencodingstrhash_key
str | None
categorizer   c                   s|  ddl m} du rtttr|tdddS ttr8tj j	ddd}||ddd}|S tt
rotj j	ddd}|rd fd	d
dD }t|g|}	t|	d}||jddd}|S ttr fdd
 D }
tj}|r fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}|S tdt )a>  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object

    Examples
    --------
    >>> pd.util.hash_pandas_object(pd.Series([1, 2, 3]))
    0    14639053686158035780
    1     3869563279212530728
    2      393322362522515241
    dtype: uint64
    r   )r   Nr"   F)r   copyr;   )r4   r   r;   c                 3  &    | ]}t jd  djV  qdS F)r4   r6   r8   r:   Nhash_pandas_objectr4   _values.0_r:   r6   r8   r2   r.   r/   	<genexpr>       
z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s$    | ]\}}t |j V  qd S rH   )
hash_arrayrA   )rC   rD   series)r:   r6   r8   r.   r/   rF      s
    
c                 3  r=   r>   r?   rB   rE   r.   r/   rF      rG   r   c                 s  s    | ]}|V  qd S rH   r.   )rC   xr.   r.   r/   rF      s    zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer
   hash_tuplesr	   rJ   rA   astyper   r#   r$   r0   r4   r   itemslencolumns	TypeErrortype)r2   r4   r6   r8   r:   r   hser
index_iterr   hashesr   index_hash_generator_hashesr.   rE   r/   r@   S   sN   #

2






r@   vals+MultiIndex | Iterable[tuple[Hashable, ...]]c                   sz   t | stdddlm m} t| ts|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )Categoricalr   c              	     s,   g | ]}  j| tj| d dqS )F
categoriesordered)_simple_newcodesr   levels)rC   level)r_   mir.   r/   
<listcomp>   s    zhash_tuples.<locals>.<listcomp>c                 3  s     | ]}|j  d dV  qdS )Fr6   r8   r:   N)_hash_pandas_object)rC   cat)r6   r8   r.   r/   rF      s
    
zhash_tuples.<locals>.<genexpr>)r   rU   rM   r_   r   rO   r
   from_tuplesrangenlevelsr0   rS   )r]   r6   r8   r   cat_valsrZ   rW   r.   )r_   r6   r8   rg   r/   rP      s   
	rP   r   c                 C  s\   t | ds	tdt| tr| j|||dS t| tjs'tdt| j dt	| |||S )a  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.

    Examples
    --------
    >>> pd.util.hash_array(np.array([1, 2, 3]))
    array([ 6238072747940578789, 15839785061582574730,  2185194620014831856],
      dtype=uint64)
    r   zmust pass a ndarray-likeri   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)
hasattrrU   rO   r   rj   r    ndarrayrV   __name___hash_ndarray)r]   r6   r8   r:   r.   r.   r/   rJ      s   

rJ   
np.ndarrayc                 C  s  | j }t|tjr t| j|||}t| j|||}|d|  S |tkr*| d} nwt	|j
tjtjfr?| djddd} nbt	|j
tjrY|jdkrY| d| j j d} nH|rdd	lm}m}m}	 |	| dd
\}
}t||dd}||
|}|j||ddS zt| ||} W n ty   t| tt||} Y nw | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr<      ur   )r_   r   	factorize)sortr`   ri      l   e9z    l   b&&&	    )r   r    
issubdtype
complex128rs   realimagr5   rQ   
issubclassrV   
datetime64timedelta64viewnumberitemsizerM   r_   r   rz   r   rc   rj   r   rU   r7   objectr"   )r]   r6   r8   r:   r   	hash_real	hash_imagr_   r   rz   rd   ra   rk   r.   r.   r/   rs     s@   	rs   )r   r   r   r   r   r   )r2   r3   r4   r5   r6   r7   r8   r9   r:   r5   r   r   )r]   r^   r6   r7   r8   r7   r   r   )
r]   r   r6   r7   r8   r7   r:   r5   r   r   )
r]   rt   r6   r7   r8   r7   r:   r5   r   r   )&__doc__
__future__r   r#   typingr   numpyr    pandas._libs.hashingr   pandas.core.dtypes.commonr   pandas.core.dtypes.dtypesr   pandas.core.dtypes.genericr   r   r	   r
   r   collections.abcr   r   r   pandas._typingr   r   rM   r   r   r   r   rN   r0   r@   rP   rJ   rs   r.   r.   r.   r/   <module>   s>    	
&f43