o
    ŀg                     @  sz  d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlZddlZddlmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZGmHZHmIZI ddlJmKZK erddlmLZLmMZMmNZN ddlOmPZPmQZQmRZR ddlSmTZTmUZU dddZVdd d!ZWdd$d%ZXejYejZej[ej\ej]ej^ej_ej`ejaejbejcejdejeejfd&Zgdd'd(Zhdd)d*Zid+d, Zjdd.d/Zkddd2d3ZlejZmd4Zndd8d9Zo	:			dddBdCZpeedDedEedFdG	H	:	dddKdLZq	:	H	H		:dddQdRZr	:	H	H		:dddSdTZs	dddVdWZt	X	ddd[d\Zu	ddd]d^Zv		_	Y	:	HdddfdgZw		H	dddkdlZx	m	dddvdwZyh dxZzdddzd{Z{		:	H	:ddddZ|dddZ}dddZ~dddZ		:ddddZdS )zl
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
    )annotationsN)dedent)TYPE_CHECKINGLiteralcast)algos	hashtableiNaTlib)AnyArrayLike	ArrayLikeAxisIntDtypeObjTakeIndexernpt)doc)find_stack_level)'construct_1d_object_array_from_listlikenp_find_common_type)ensure_float64ensure_objectensure_platform_intis_array_likeis_bool_dtypeis_complex_dtypeis_dict_likeis_extension_array_dtypeis_float_dtype
is_integeris_integer_dtypeis_list_likeis_object_dtypeis_signed_integer_dtypeneeds_i8_conversion)concat_compat)BaseMaskedDtypeCategoricalDtypeExtensionDtypeNumpyEADtype)ABCDatetimeArrayABCExtensionArrayABCIndexABCMultiIndex	ABCSeriesABCTimedeltaArray)isnana_value_for_dtype)take_nd)arrayensure_wrapped_if_datetimelikeextract_array)validate_indices)ListLikeNumpySorterNumpyValueArrayLike)CategoricalIndexSeries)BaseMaskedArrayExtensionArrayvaluesr   return
np.ndarrayc                 C  s@  t | tst| dd} t| jrtt| S t | jtr/t	d| } | j
s*t| jS t| S t | jtr=t	d| } | jS t| jrZt | tjrPt| dS t| jdddS t| jrdt| S t| jrx| jjdv rst| S t| S t| jrt	tj| S t| jr| d	}t	tj|}|S tj| td
} t| S )a  
    routine to ensure that our data is of the correct
    input dtype for lower-level routines

    This will coerce:
    - ints -> int64
    - uint -> uint64
    - bool -> uint8
    - datetimelike -> i8
    - datetime64tz -> i8 (in local tz)
    - categorical -> codes

    Parameters
    ----------
    values : np.ndarray or ExtensionArray

    Returns
    -------
    np.ndarray
    Textract_numpyr<   r9   uint8Fcopy)         i8dtype)
isinstancer,   r4   r!   rK   r   npasarrayr%   r   _hasna_ensure_data_datar&   codesr   ndarrayviewastyper   r   itemsizer   r   r#   object)r>   npvalues rY   J/var/www/html/myenv/lib/python3.10/site-packages/pandas/core/algorithms.pyrP   j   s<   













rP   rK   r   originalr   c                 C  sP   t | tr| j|kr| S t |tjs| }|j| |d} | S | j|dd} | S )z
    reverse of _ensure_data

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    dtype : np.dtype or ExtensionDtype
    original : AnyArrayLike

    Returns
    -------
    ExtensionArray or np.ndarray
    rJ   FrD   )rL   r*   rK   rM   construct_array_type_from_sequencerU   )r>   rK   r[   clsrY   rY   rZ   _reconstruct_data   s   r_   	func_namestrc                 C  sv   t | ttttjfs9|dkrtj| dtt	 d t
j| dd}|dv r4t | tr.t| } t| } | S t| } | S )z5
    ensure that we are arraylike if not already
    isin-targetsz with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
stacklevelFskipna)mixedstringmixed-integer)rL   r+   r-   r*   rM   rS   warningswarnFutureWarningr   r
   infer_dtypetuplelistr   rN   )r>   r`   inferredrY   rY   rZ   _ensure_arraylike   s   

rq   )
complex128	complex64float64float32uint64uint32uint16rC   int64int32int16int8rh   rW   c                 C  s    t | } t| }t| }|| fS )z
    Parameters
    ----------
    values : np.ndarray

    Returns
    -------
    htable : HashTable subclass
    values : ndarray
    )rP   _check_object_for_strings_hashtables)r>   ndtyper   rY   rY   rZ   _get_hashtable_algo  s   r   c                 C  s&   | j j}|dkrtj| ddrd}|S )z
    Check if we can use string hashtable instead of object hashtable.

    Parameters
    ----------
    values : ndarray

    Returns
    -------
    str
    rW   Fre   rh   )rK   namer
   is_string_array)r>   r   rY   rY   rZ   r}     s
   r}   c                 C  s   t | S )a3
  
    Return unique values based on a hash table.

    Uniques are returned in order of appearance. This does NOT sort.

    Significantly faster than numpy.unique for long enough sequences.
    Includes NA values.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    numpy.ndarray or ExtensionArray

        The return can be:

        * Index : when the input is an Index
        * Categorical : when the input is a Categorical dtype
        * ndarray : when the input is a Series/ndarray

        Return numpy.ndarray or ExtensionArray.

    See Also
    --------
    Index.unique : Return unique values from an Index.
    Series.unique : Return unique values of Series object.

    Examples
    --------
    >>> pd.unique(pd.Series([2, 1, 3, 3]))
    array([2, 1, 3])

    >>> pd.unique(pd.Series([2] + [1] * 5))
    array([2, 1])

    >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
    array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')

    >>> pd.unique(
    ...     pd.Series(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    <DatetimeArray>
    ['2016-01-01 00:00:00-05:00']
    Length: 1, dtype: datetime64[ns, US/Eastern]

    >>> pd.unique(
    ...     pd.Index(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    DatetimeIndex(['2016-01-01 00:00:00-05:00'],
            dtype='datetime64[ns, US/Eastern]',
            freq=None)

    >>> pd.unique(np.array(list("baabc"), dtype="O"))
    array(['b', 'a', 'c'], dtype=object)

    An unordered Categorical will return categories in the
    order of appearance.

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    An ordered Categorical preserves the category ordering.

    >>> pd.unique(
    ...     pd.Series(
    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ...     )
    ... )
    ['b', 'a', 'c']
    Categories (3, object): ['a' < 'b' < 'c']

    An array of tuples

    >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
    )unique_with_mask)r>   rY   rY   rZ   unique3  s   ^r   intc                 C  s8   t | dkrdS t| } t|  ddk }|S )aH  
    Return the number of unique values for integer array-likes.

    Significantly faster than pandas.unique for long enough sequences.
    No checks are done to ensure input is integral.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    int : The number of unique values in ``values``
    r   intp)lenrP   rM   bincountravelrU   sum)r>   resultrY   rY   rZ   nunique_ints  s
   r   masknpt.NDArray[np.bool_] | Nonec                 C  s   t | dd} t| jtr|  S | }t| \}} |t| }|du r0|| }t||j|}|S |j| |d\}}t||j|}|dusFJ ||dfS )z?See algorithms.unique for docs. Takes a mask for masked arrays.r   r`   Nr   bool)	rq   rL   rK   r'   r   r   r   r_   rU   )r>   r   r[   r   tableuniquesrY   rY   rZ   r     s   
r   i@B compsr6   npt.NDArray[np.bool_]c                 C  s  t | stdt| j dt |stdt|j dt|ttttj	fsGt
|}t|dd}t|dkrF|jjdv rFt| sFt|}nt|trRt|}nt|ddd}t| d	d}t|dd
}t|tj	sp||S t|jr|t||S t|jrt|jstj|jtdS t|jrt||tS t|jtrtt|t|S t|tkrt|dkr|jtkrt |! rdd }ndd }nt"|j|j}|j|dd}|j|dd}t#j$}|||S )z
    Compute the isin boolean array.

    Parameters
    ----------
    comps : list-like
    values : list-like

    Returns
    -------
    ndarray[bool]
        Same length as `comps`.
    zIonly list-like objects are allowed to be passed to isin(), you passed a ``rb   r   r   iufcbT)rB   extract_rangeisinrA   rJ      c                 S  s   t t | | t | S N)rM   
logical_orr   r   isnan)cvrY   rY   rZ   f  s   zisin.<locals>.fc                 S  s   t | | S r   )rM   r   r   )abrY   rY   rZ   <lambda>  s    zisin.<locals>.<lambda>FrD   )%r    	TypeErrortype__name__rL   r+   r-   r*   rM   rS   ro   rq   r   rK   kindr"   r   r,   r2   r4   r   r#   pd_arrayr!   zerosshaper   rU   rW   r'   rN   _MINIMUM_COMP_ARR_LENr/   anyr   htableismember)r   r>   orig_valuescomps_arrayr   commonrY   rY   rZ   r     s^   







r   Tuse_na_sentinelr   	size_hint
int | Nonena_valuerW   'tuple[npt.NDArray[np.intp], np.ndarray]c           
      C  sf   | }| j jdv r
t}t| \}} ||pt| }|j| d|||d\}}	t||j |}t|	}	|	|fS )a(  
    Factorize a numpy array to codes and uniques.

    This doesn't do any coercion of types or unboxing before factorization.

    Parameters
    ----------
    values : ndarray
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    size_hint : int, optional
        Passed through to the hashtable's 'get_labels' method
    na_value : object, optional
        A value in `values` to consider missing. Note: only use this
        parameter when you know that you don't have any values pandas would
        consider missing in the array (NaN for float data, iNaT for
        datetimes, etc.).
    mask : ndarray[bool], optional
        If not None, the mask is used as indicator for missing values
        (True = missing, False = valid) instead of `na_value` or
        condition "val != val".

    Returns
    -------
    codes : ndarray[np.intp]
    uniques : ndarray
    mM)na_sentinelr   r   	ignore_na)rK   r   r	   r   r   	factorizer_   r   )
r>   r   r   r   r   r[   
hash_klassr   r   rR   rY   rY   rZ   factorize_array$  s   $
	r   z    values : sequence
        A 1-D sequence. Sequences that aren't pandas objects are
        coerced to ndarrays before factorization.
    zt    sort : bool, default False
        Sort `uniques` and shuffle `codes` to maintain the
        relationship.
    zG    size_hint : int, optional
        Hint to the hashtable sizer.
    )r>   sortr   Fr   %tuple[np.ndarray, np.ndarray | Index]c           	      C  s  t | ttfr| j||dS t| dd} | }t | ttfr.| jdur.| j|d\}}||fS t | tj	s=| j|d\}}n+t
| } |s_| jtkr_t| }| r_t| jdd}t||| } t| ||d	\}}|r{t|d
kr{t|||ddd\}}t||j|}||fS )aN  
    Encode the object as an enumerated type or categorical variable.

    This method is useful for obtaining a numeric representation of an
    array when all that matters is identifying distinct values. `factorize`
    is available as both a top-level function :func:`pandas.factorize`,
    and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.

    Parameters
    ----------
    {values}{sort}
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.

        .. versionadded:: 1.5.0
    {size_hint}
    Returns
    -------
    codes : ndarray
        An integer ndarray that's an indexer into `uniques`.
        ``uniques.take(codes)`` will have the same values as `values`.
    uniques : ndarray, Index, or Categorical
        The unique valid values. When `values` is Categorical, `uniques`
        is a Categorical. When `values` is some other pandas object, an
        `Index` is returned. Otherwise, a 1-D ndarray is returned.

        .. note::

           Even if there's a missing value in `values`, `uniques` will
           *not* contain an entry for it.

    See Also
    --------
    cut : Discretize continuous-valued array.
    unique : Find the unique value in an array.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.factorize>` for more examples.

    Examples
    --------
    These examples all show factorize as a top-level method like
    ``pd.factorize(values)``. The results are identical for methods like
    :meth:`Series.factorize`.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([0, 0, 1, 2, 0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    With ``sort=True``, the `uniques` will be sorted, and `codes` will be
    shuffled so that the relationship is the maintained.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
    ...                               sort=True)
    >>> codes
    array([1, 1, 0, 2, 1])
    >>> uniques
    array(['a', 'b', 'c'], dtype=object)

    When ``use_na_sentinel=True`` (the default), missing values are indicated in
    the `codes` with the sentinel value ``-1`` and missing values are not
    included in `uniques`.

    >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([ 0, -1,  1,  2,  0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    Thus far, we've only factorized lists (which are internally coerced to
    NumPy arrays). When factorizing pandas objects, the type of `uniques`
    will differ. For Categoricals, a `Categorical` is returned.

    >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    ['a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    Notice that ``'b'`` is in ``uniques.categories``, despite not being
    present in ``cat.values``.

    For all other pandas objects, an Index of the appropriate type is
    returned.

    >>> cat = pd.Series(['a', 'a', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    Index(['a', 'c'], dtype='object')

    If NaN is in the values, and we want to include NaN in the uniques of the
    values, it can be achieved by setting ``use_na_sentinel=False``.

    >>> values = np.array([1, 2, 1, np.nan])
    >>> codes, uniques = pd.factorize(values)  # default: use_na_sentinel=True
    >>> codes
    array([ 0,  1,  0, -1])
    >>> uniques
    array([1., 2.])

    >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
    >>> codes
    array([0, 1, 0, 2])
    >>> uniques
    array([ 1.,  2., nan])
    )r   r   r   r   N)r   )r   F)compat)r   r   r   T)r   assume_uniqueverify)rL   r+   r-   r   rq   r)   r.   freqrM   rS   rN   rK   rW   r/   r   r0   wherer   r   	safe_sortr_   )	r>   r   r   r   r[   rR   r   	null_maskr   rY   rY   rZ   r   b  sB    



r   	ascending	normalizedropnar;   c                 C  s&   t jdtt d t| |||||dS )aK  
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : bool, default True
        Sort by values
    ascending : bool, default False
        Sort in ascending order
    normalize: bool, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : bool, default True
        Don't include counts of NaN

    Returns
    -------
    Series
    zupandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.rc   )r   r   r   binsr   )rj   rk   rl   r   value_counts_internal)r>   r   r   r   r   r   rY   rY   rZ   value_counts/  s   r   c              
   C  s:  ddl m}m} t| dd }|rdnd}	|d uruddlm}
 t| |r&| j} z	|
| |dd}W n ty@ } ztd	|d }~ww |j	|d
}|	|_
||j  }|jd|_| }|rl|jdk rl|jdd }tt|g}nt| r|| ddjj	|d
}|	|_
||j_
|j}t|tjst|}not| trtt| j}|| |	dj||d }| j|j_|j}nMt| dd} t| |\}}}|j tj!kr|tj"}||}|j t#kr|j t$kr|t$}n|j |j kr|j dkrt%j&dt't( d ||_
||||	dd}|r|j)|d}|r||*  }|S )Nr   )r:   r;   r   
proportioncount)cutT)include_lowestz+bins argument only works with numeric data.r   intervalFrD   )indexr   )levelr   r   r   zstring[pyarrow_numpy]zThe behavior of value_counts with object-dtype is deprecated. In a future version, this will *not* perform dtype inference on the resulting index. To retain the old behavior, use `result.index = result.index.infer_objects()`rc   )r   r   rE   )r   )+pandasr:   r;   getattrpandas.core.reshape.tiler   rL   _valuesr   r   r   r   notnarU   
sort_indexallilocrM   r2   r   r   rS   rN   r,   ro   rangenlevelsgroupbysizenamesrq   value_counts_arraylikerK   float16ru   r   rW   rj   rk   rl   r   sort_valuesr   )r>   r   r   r   r   r   r:   r;   
index_namer   r   iierrr   countslevelskeys_idxrY   rY   rZ   r   ^  sv   






	r   ,tuple[ArrayLike, npt.NDArray[np.int64], int]c                 C  sb   | }t | } tj| ||d\}}}t|jr%|r%|tk}|| || }}t||j|}|||fS )z
    Parameters
    ----------
    values : np.ndarray
    dropna : bool
    mask : np.ndarray[bool] or None, default None

    Returns
    -------
    uniques : np.ndarray
    counts : np.ndarray[np.int64]
    r   )rP   r   value_countr#   rK   r	   r_   )r>   r   r   r[   r   r   
na_counterres_keysrY   rY   rZ   r     s   

r   firstkeepLiteral['first', 'last', False]c                 C  s   t | } tj| ||dS )ax  
    Return boolean ndarray denoting duplicate values.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array over which to check for duplicate values.
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.
    mask : ndarray[bool], optional
        array indicating which elements to exclude from checking

    Returns
    -------
    duplicated : ndarray[bool]
    )r   r   )rP   r   
duplicated)r>   r   r   rY   rY   rZ   r     s   r   c              
   C  s   t | dd} | }t| jrt| } td| } | j|dS t| } tj| ||d\}}|dur2||fS zt	
|}W n tyV } ztjd| t d W Y d}~nd}~ww t||j|}|S )	a  
    Returns the mode(s) of an array.

    Parameters
    ----------
    values : array-like
        Array over which to check for duplicate values.
    dropna : bool, default True
        Don't consider counts of NaN/NaT.

    Returns
    -------
    np.ndarray or ExtensionArray
    moder   r=   r   )r   r   NzUnable to sort modes: rc   )rq   r#   rK   r3   r   _moderP   r   r   rM   r   r   rj   rk   r   r_   )r>   r   r   r[   npresultres_maskr   r   rY   rY   rZ   r     s*   

r   averageaxisr   method	na_optionpctnpt.NDArray[np.float64]c              	   C  sd   t | j}t| } | jdkrtj| |||||d}|S | jdkr.tj| ||||||d}|S td)a  
    Rank the values along a given axis.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array whose values will be ranked. The number of dimensions in this
        array must not exceed 2.
    axis : int, default 0
        Axis over which to perform rankings.
    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
        The method by which tiebreaks are broken during the ranking.
    na_option : {'keep', 'top'}, default 'keep'
        The method by which NaNs are placed in the ranking.
        - ``keep``: rank each NaN value with a NaN ranking
        - ``top``: replace each NaN with either +/- inf so that they
                   there are ranked at the top
    ascending : bool, default True
        Whether or not the elements should be ranked in ascending order.
    pct : bool, default False
        Whether or not to the display the returned rankings in integer form
        (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
       )is_datetimeliketies_methodr   r   r   rF   )r   r   r  r   r   r   z&Array with ndim > 2 are not supported.)r#   rK   rP   ndimr   rank_1drank_2dr   )r>   r   r   r   r   r   r   ranksrY   rY   rZ   rank+  s0   


r  indicesr   
allow_fillc                 C  s|   t | tjtttfstjdtt	 d t
| st| } t|}|r5t|| j|  t| ||d|d}|S | j||d}|S )ak	  
    Take elements from an array.

    Parameters
    ----------
    arr : array-like or scalar value
        Non array-likes (sequences/scalars without a dtype) are coerced
        to an ndarray.

        .. deprecated:: 2.1.0
            Passing an argument other than a numpy.ndarray, ExtensionArray,
            Index, or Series is deprecated.

    indices : sequence of int or one-dimensional np.ndarray of int
        Indices to be taken.
    axis : int, default 0
        The axis over which to select values.
    allow_fill : bool, default False
        How to handle negative values in `indices`.

        * False: negative values in `indices` indicate positional indices
          from the right (the default). This is similar to :func:`numpy.take`.

        * True: negative values in `indices` indicate
          missing values. These values are set to `fill_value`. Any other
          negative values raise a ``ValueError``.

    fill_value : any, optional
        Fill value to use for NA-indices when `allow_fill` is True.
        This may be ``None``, in which case the default NA value for
        the type (``self.dtype.na_value``) is used.

        For multi-dimensional `arr`, each *element* is filled with
        `fill_value`.

    Returns
    -------
    ndarray or ExtensionArray
        Same type as the input.

    Raises
    ------
    IndexError
        When `indices` is out of bounds for the array.
    ValueError
        When the indexer contains negative values other than ``-1``
        and `allow_fill` is True.

    Notes
    -----
    When `allow_fill` is False, `indices` may be whatever dimensionality
    is accepted by NumPy for `arr`.

    When `allow_fill` is True, `indices` should be 1-D.

    See Also
    --------
    numpy.take : Take elements from an array along an axis.

    Examples
    --------
    >>> import pandas as pd

    With the default ``allow_fill=False``, negative numbers indicate
    positional indices from the right.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
    array([10, 10, 30])

    Setting ``allow_fill=True`` will place `fill_value` in those positions.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
    array([10., 10., nan])

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
    ...      fill_value=-10)
    array([ 10,  10, -10])
    zpd.api.extensions.take accepting non-standard inputs is deprecated and will raise in a future version. Pass either a numpy.ndarray, ExtensionArray, Index, or Series instead.rc   T)r   r  
fill_value)r   )rL   rM   rS   r*   r+   r-   rj   rk   rl   r   r   rN   r   r5   r   r1   take)arrr  r   r  r	  r   rY   rY   rZ   r
  k  s"   U

r
  leftr  value$NumpyValueArrayLike | ExtensionArraysideLiteral['left', 'right']sorterNumpySorter | Nonenpt.NDArray[np.intp] | np.intpc                 C  s   |durt |}t| tjr^| jjdv r^t|st|r^t| jj	}t|r-t
|gnt
|}||jk rD||jk rD| j}n|j}t|rTtt|	|}nttt||d}nt| } | j|||dS )a  
    Find indices where elements should be inserted to maintain order.

    Find the indices into a sorted array `arr` (a) such that, if the
    corresponding elements in `value` were inserted before the indices,
    the order of `arr` would be preserved.

    Assuming that `arr` is sorted:

    ======  ================================
    `side`  returned index `i` satisfies
    ======  ================================
    left    ``arr[i-1] < value <= self[i]``
    right   ``arr[i-1] <= value < self[i]``
    ======  ================================

    Parameters
    ----------
    arr: np.ndarray, ExtensionArray, Series
        Input array. If `sorter` is None, then it must be sorted in
        ascending order, otherwise `sorter` must be an array of indices
        that sort it.
    value : array-like or scalar
        Values to insert into `arr`.
    side : {'left', 'right'}, optional
        If 'left', the index of the first suitable location found is given.
        If 'right', return the last such index.  If there is no suitable
        index, return either 0 or N (where N is the length of `self`).
    sorter : 1-D array-like, optional
        Optional array of integer indices that sort array a into ascending
        order. They are typically the result of argsort.

    Returns
    -------
    array of ints or int
        If value is array-like, array of insertion points.
        If value is scalar, a single integer.

    See Also
    --------
    numpy.searchsorted : Similar method from NumPy.
    NiurJ   )r  r  )r   rL   rM   rS   rK   r   r   r   iinfor   r2   minr   maxr   r   r   r   r3   searchsorted)r  r  r  r  r  	value_arrrK   rY   rY   rZ   r    s&   0
r  >   r|   r{   rz   ry   ru   rt   nc                 C  sJ  t |}tj}| j}t|}|rtj}ntj}t|t	r#| 
 } | j}t| tjsVt| d|j drL|dkrDtdt| j d| || | |S tt| j dd}| jjdv rktj}| d} t}d	}n|rqtj}n|jd
v r| jjdv rtj}ntj}| j}|dkr| dd} t|}tj| j|d}	tdgd }
|dkrtd|nt|d|
|< ||	t|
< | jjt v rt!j"| |	|||d nCtdgd }|dkrt|dntd|||< t|}tdgd }|dkrtd| nt| d||< t|}|| | | | |	|< |r|	d}	|dkr#|	dddf }	|	S )aQ  
    difference of n between self,
    analogous to s-s.shift(n)

    Parameters
    ----------
    arr : ndarray or ExtensionArray
    n : int
        number of periods
    axis : {0, 1}
        axis to shift on
    stacklevel : int, default 3
        The stacklevel for the lost dtype warning.

    Returns
    -------
    shifted
    __r   zcannot diff z	 on axis=zK has no 'diff' method. Convert to a suitable dtype prior to calling 'diff'.Fr   rI   Tr  )r|   r{   r   r   rJ   NrF   )datetimelikeztimedelta64[ns])#r   rM   nanrK   r   operatorxorsubrL   r(   to_numpyrS   hasattrr   
ValueErrorr   shiftr   r   ry   rT   r	   object_r   ru   rt   r  reshapeemptyr   slicern   _diff_specialr   diff_2d)r  r  r   narK   is_boolopis_timedelta	orig_ndimout_arr
na_indexer_res_indexerres_indexer_lag_indexerlag_indexerrY   rY   rZ   diff;  sh   



  $

r6  Index | ArrayLikerR   npt.NDArray[np.intp] | Noner   r   .AnyArrayLike | tuple[AnyArrayLike, np.ndarray]c              	   C  s  t | tjttfstdd}t | jts#tj	| dddkr#t
| }n+z|  }| |}W n ttjfyM   | jrGt | d trGt| }nt
| }Y nw |du rT|S t|s\tdtt|}|sstt| t| ksstd|du rt| \}} |t| }||  t||}|r| }	|r|t|  k |t| kB }
d||
< nd}
t|	|d	d
}n2tjt|td}||tt| |j|dd}|r|d	k}
|r|
|t|  k B |t| kB }
|r|
durt ||
d	 |t|fS )a  
    Sort ``values`` and reorder corresponding ``codes``.

    ``values`` should be unique if ``codes`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``codes`` is not None.
    codes : np.ndarray[intp] or None, default None
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``-1``.
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``codes`` is None.
    verify : bool, default True
        Check if codes are out of bound for the values and put out of bound
        codes equal to ``-1``. If ``verify=False``, it is assumed there
        are no out of bound codes. Ignored when ``codes`` is None.

    Returns
    -------
    ordered : AnyArrayLike
        Sorted ``values``
    new_codes : ndarray
        Reordered ``codes``; returned when ``codes`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``codes`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``codes`` is not None and ``values`` contain duplicates.
    zbOnly np.ndarray, ExtensionArray, and Index objects are allowed to be passed to safe_sort as valuesNFre   ri   r   zMOnly list-like objects or None are allowed to be passed to safe_sort as codesz,values should be unique if codes is not Noner   r	  rJ   wrap)r   )!rL   rM   rS   r*   r+   r   rK   r'   r
   rm   _sort_mixedargsortr
  decimalInvalidOperationr   rn   _sort_tuplesr    r   rN   r   r   r#  r   map_locationslookupr1   r'  r   putarangeputmask)r>   rR   r   r   r   r  orderedr   torder2r   	new_codesreverse_indexerrY   rY   rZ   r     sb   0




r   c           
      C  s   t jdd | D td}t jdd | D td}| | @ }t | | }t | | }| d |}| d |}| d }t |||g}	| |	S )z3order ints before strings before nulls in 1d arraysc                 S  s   g | ]}t |tqS rY   )rL   ra   .0xrY   rY   rZ   
<listcomp>0  s    z_sort_mixed.<locals>.<listcomp>rJ   c                 S  s   g | ]}t |qS rY   )r/   rK  rY   rY   rZ   rN  1  s    r   )rM   r2   r   r=  nonzeror
  concatenate)
r>   str_posnull_posnum_posstr_argsortnum_argsortstr_locsnum_locs	null_locslocsrY   rY   rZ   r<  .  s   
r<  c                 C  s:   ddl m} ddlm} || d\}}||dd}| | S )a  
    Convert array of tuples (1d) to array of arrays (2d).
    We need to keep the columns separately as they contain different types and
    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
    column as types cannot be compared).
    r   )	to_arrays)lexsort_indexerNT)orders)"pandas.core.internals.constructionrZ  pandas.core.sortingr[  )r>   rZ  r[  arraysr   indexerrY   rY   rZ   r@  =  s
   r@  lvalsArrayLike | Indexrvalsc           	      C  s  ddl m} t  tjddtd t| dd}t|dd}W d   n1 s)w   Y  |j|dd	\}}t	|j
|j
}|||jd
dd}t| trZt|trZ| | }nt| trb| j} t|trj|j}t| |g}t|}t|}||j
}t||S )a  
    Extracts the union from lvals and rvals with respect to duplicates and nans in
    both arrays.

    Parameters
    ----------
    lvals: np.ndarray or ExtensionArray
        left values which is ordered in front.
    rvals: np.ndarray or ExtensionArray
        right values ordered after lvals.

    Returns
    -------
    np.ndarray or ExtensionArray
        Containing the unsorted union of both arrays.

    Notes
    -----
    Caller is responsible for ensuring lvals.dtype == rvals.dtype.
    r   r;   ignorez<The behavior of value_counts with object-dtype is deprecated)categoryFr   Nr:  r   )r   rK   rE   )r   r;   rj   catch_warningsfilterwarningsrl   r   alignrM   maximumr>   r   rL   r,   appendr   r+   r   r$   r3   reindexrepeat)	ra  rc  r;   l_countr_countfinal_countunique_valscombinedrepeatsrY   rY   rZ   union_with_duplicatesL  s0   



rt  	na_actionLiteral['ignore'] | Noneconvert#np.ndarray | ExtensionArray | Indexc           	        s
  |dvrd| d}t |t|r=t|tr%t|dr%|  fdd}nddlm} t|dkr9||tj	d	}n||}t|t
r[|d
krM||j  }|j| }t|j|}|S t| sc|  S | jtdd}|du rvtj|||dS tj||t|tj|dS )a  
    Map values using an input mapping or function.

    Parameters
    ----------
    mapper : function, dict, or Series
        Mapping correspondence.
    na_action : {None, 'ignore'}, default None
        If 'ignore', propagate NA values, without passing them to the
        mapping correspondence.
    convert : bool, default True
        Try to find better dtype for elementwise function results. If
        False, leave as dtype=object.

    Returns
    -------
    Union[ndarray, Index, ExtensionArray]
        The output of the mapping function applied to the array.
        If the function returns a tuple with more than one element
        a MultiIndex will be returned.
    )Nre  z+na_action must either be 'ignore' or None, z was passed__missing__c                   s$    t | trt| rtj S |  S r   )rL   floatrM   r   r  )rM  dict_with_defaultrY   rZ   r     s
    zmap_array.<locals>.<lambda>r   rd  rJ   re  FrD   N)rw  )r   rw  )r#  r   rL   dictr"  r   r;   r   rM   rt   r-   r   r   get_indexerr1   r   rE   rU   rW   r
   	map_infermap_infer_maskr/   rT   rC   )	r  mapperru  rw  msgr;   r`  
new_valuesr>   rY   r{  rZ   	map_array  s2   
r  )r>   r   r?   r@   )r>   r   rK   r   r[   r   r?   r   )r`   ra   r?   r   )r>   r@   )r>   r@   r?   ra   )r>   r   r?   r   r   )r   r   )r   r6   r>   r6   r?   r   )TNNN)r>   r@   r   r   r   r   r   rW   r   r   r?   r   )FTN)r   r   r   r   r   r   r?   r   )TFFNT)
r   r   r   r   r   r   r   r   r?   r;   )r>   r@   r   r   r   r   r?   r   )r   N)r>   r   r   r   r   r   r?   r   )TN)r>   r   r   r   r   r   r?   r   )r   r   r   TF)r>   r   r   r   r   ra   r   ra   r   r   r   r   r?   r   )r   FN)r  r   r   r   r  r   )r  N)
r  r   r  r  r  r  r  r  r?   r  )r   )r  r   r   r   )NTFT)r>   r7  rR   r8  r   r   r   r   r   r   r?   r9  )r?   r   )r>   r@   r?   r@   )ra  rb  rc  rb  r?   rb  )NT)r  r   ru  rv  rw  r   r?   rx  )__doc__
__future__r   r>  r  textwrapr   typingr   r   r   rj   numpyrM   pandas._libsr   r   r   r	   r
   pandas._typingr   r   r   r   r   r   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.castr   r   pandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   pandas.core.dtypes.concatr$   pandas.core.dtypes.dtypesr%   r&   r'   r(   pandas.core.dtypes.genericr)   r*   r+   r,   r-   r.   pandas.core.dtypes.missingr/   r0   pandas.core.array_algos.taker1   pandas.core.constructionr2   r   r3   r4   pandas.core.indexersr5   r6   r7   r8   r   r9   r:   r;   pandas.core.arraysr<   r=   rP   r_   rq   Complex128HashTableComplex64HashTableFloat64HashTableFloat32HashTableUInt64HashTableUInt32HashTableUInt16HashTableUInt8HashTableInt64HashTableInt32HashTableInt16HashTableInt8HashTableStringHashTablePyObjectHashTabler~   r   r}   r   r   r   unique1dr   r   r   r   r   r   r   r   r   r  r
  r  r)  r6  r   r<  r@  rt  r  rY   rY   rY   rZ   <module>   s     D 
	
N
!


a
]> :1f!.CxXs 


: