o
    ŀge                     @  s  d dl mZ d dlZd dlmZmZ d dlZd dlZd dl	m
  mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlm   m!Z" d dlm#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8 erd dl9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ G dd dZA	dCdDddZBdCdEd d!ZC	dCdFd$d%ZDdGd(d)ZEdHdId-d.ZFdJdId/d0ZGdKd3d4ZH	dHdLd7d8ZIdMd=d>ZJdNdAdBZKdS )O    )annotationsN)TYPE_CHECKINGcast)PerformanceWarning)cache_readonly)find_stack_level)find_common_typemaybe_promote)ensure_platform_intis_1d_only_ea_dtype
is_integerneeds_i8_conversion)ExtensionDtype)notna)	factorizeunique)factorize_from_iterable)ensure_wrapped_if_datetimelike)	DataFrame)Index
MultiIndex
RangeIndex)concat)Series)compress_group_indexdecons_obs_group_idsget_compressed_idsget_group_indexget_group_index_sorter)	ArrayLikeLevelnpt)ExtensionArray)
FrozenListc                   @  s   e Zd ZdZ	d,d-ddZed.ddZed/ddZd0ddZdd Z	ed1ddZ
ed2ddZd3dd Zd4d"d#Zd5d&d'Zed6d(d)Zed7d*d+Zd!S )8
_Unstackera	  
    Helper class to unstack data / pivot with multi-level index

    Parameters
    ----------
    index : MultiIndex
    level : int or str, default last level
        Level to "unstack". Accepts a name for the level.
    fill_value : scalar, optional
        Default value to fill in missing values if subgroups do not have the
        same set of labels. By default, missing values will be replaced with
        the default fill value for that data type, NaN for float, NaT for
        datetimelike, etc. For integer types, by default data will converted to
        float and missing values will be set to NaN.
    constructor : object
        Pandas ``DataFrame`` or subclass used to create unstacked
        response.  If None, DataFrame will be used.

    Examples
    --------
    >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
    ...                                    ('two', 'a'), ('two', 'b')])
    >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
    >>> s
    one  a    1
         b    2
    two  a    3
         b    4
    dtype: int64

    >>> s.unstack(level=-1)
         a  b
    one  1  2
    two  3  4

    >>> s.unstack(level=0)
       one  two
    a    1    3
    b    2    4

    Returns
    -------
    unstacked : DataFrame
    Tindexr   levelr    sortboolreturnNonec           	      C  s  || _ || _| | _| j|| _d| jj| j v rdnd| _t| jj	| _
t| jj| _| j| j| _| j
| j| _|j	| j | _| js_t| jj| j }| j|| _| j|| _tdd | j
D }| jj}|| }|ttjjkrtjd| dtt d |   d S )	N   r   c                 S  s   g | ]}|j qS  )size).0index_levelr-   r-   O/var/www/html/myenv/lib/python3.10/site-packages/pandas/core/reshape/reshape.py
<listcomp>   s    z'_Unstacker.__init__.<locals>.<listcomp>z%The following operation may generate z& cells in the resulting pandas object.)
stacklevel)constructorr'   remove_unused_levelsr%   _get_level_numberr&   codesliftlistlevelsnew_index_levelsnamesnew_index_namespopremoved_nameremoved_levelremoved_level_fullr   takenpmaxr.   iinfoint32warningswarnr   r   _make_selectors)	selfr%   r&   r4   r'   unique_codesnum_rowsnum_columns	num_cellsr-   r-   r1   __init__n   s0   

z_Unstacker.__init__-tuple[npt.NDArray[np.intp], list[np.ndarray]]c           
      C  s   | j }t| jj}t| jj}|d | ||d d   || g }tdd |d | ||d d   || g D }t||\}}t|}t||}	|	|fS )Nr,   c                 s      | ]}t |V  qd S Nlenr/   xr-   r-   r1   	<genexpr>       z2_Unstacker._indexer_and_to_sort.<locals>.<genexpr>)	r&   r9   r%   r7   r:   tupler   rT   r   )
rJ   vr7   levsto_sortsizes
comp_indexobs_idsngroupsindexerr-   r-   r1   _indexer_and_to_sort   s   &4
z_Unstacker._indexer_and_to_sortlist[np.ndarray]c                   s&   | j \ }| jr fdd|D S |S )Nc                      g | ]}|  qS r-   rB   )r/   linera   r-   r1   r2          z,_Unstacker.sorted_labels.<locals>.<listcomp>)rb   r'   )rJ   r\   r-   rg   r1   sorted_labels   s   
z_Unstacker.sorted_labelsvalues
np.ndarrayc                 C  s(   | j r| j\}}tj||dd}|S |S )Nr   )axis)r'   rb   algostake_nd)rJ   rj   ra   _sorted_valuesr-   r-   r1   _make_sorted_values   s
   
z_Unstacker._make_sorted_valuesc           
      C  s   | j }| jd d }tdd |D }t||\}}t|}t|}| jj| j | j	 }||f| _
| jd ||  | j	 }tjt| j
td}	|	|d |	 t| jk r[td|| _|	| _| jro|t|| _d S ttj|ddd | _d S )	Nr+   c                 s  rQ   rR   rS   rU   r-   r-   r1   rW      rX   z-_Unstacker._make_selectors.<locals>.<genexpr>dtypeTz0Index contains duplicate entries, cannot reshape)return_indexr,   )r;   ri   rY   r   rT   r
   r%   levshaper&   r8   
full_shaperC   zerosprodr(   putsum
ValueErrorgroup_indexmaskr'   searchsortedarange
compressorr   )
rJ   
new_levelsremaining_labelslevel_sizesr^   r_   r`   strideselectorr}   r-   r-   r1   rI      s$   
z_Unstacker._make_selectorsc                 C  s   t | j S rR   )r(   r}   allrJ   r-   r-   r1   mask_all   s   z_Unstacker.mask_all2tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]c                 C  s6   t jt| jt jd}| j|dd\}}||dfS )Nrr   r+   
fill_valuer   )rC   r   rT   r%   intpget_new_valuesany)rJ   	dummy_arr
new_valuesr}   r-   r-   r1   arange_result   s   z_Unstacker.arange_resultr   c                 C  sn   |j dkr|d d tjf }|d u r|jd dkrtd| ||\}}| |}| j}| j||||j	dS )Nr,   z-must pass column labels for multi-column data)r%   columnsrs   )
ndimrC   newaxisshaper{   r   get_new_columns	new_indexr4   rs   )rJ   rj   value_columnsr   ro   r   r%   r-   r-   r1   
get_result   s   


z_Unstacker.get_resultNc                 C  s  |j dkr|d d tjf }| |}| j\}}|jd }|| }||f}| j}	| j}
|
rJt|rJ|	|||
dd	|}tj|td}||fS |j}|
rZ|j}tj||d}n*t|trq| }|j||d}||d d < nt||\}}tj||d}|| |j}tj|td}t|jr|d}|d}n|j|dd}t||	d|||||d t|jr|d}t|}||j}||fS )	Nr,      rr   i8Fcopyu1zM8[ns])r   rC   r   rq   rv   r   r}   r   rT   reshapeswapaxesonesr(   rs   empty
isinstancer   construct_array_type_emptyr	   fillnamerw   r   viewastype
libreshapeunstackr   )rJ   rj   r   rp   lengthwidthr   result_widthresult_shaper}   r   r   new_maskrs   clsr   r-   r-   r1   r      s^   









z_Unstacker.get_new_valuesr   Index | Nonec           	        s   |d u r!| j dkr| jj| jdS | jjd| jjd}|| jS t| j| j  }t|}t	t
|| t|trT|j| jf }|j| jf } fdd|jD }n|| jg}|j| jg} g}| j}|t|| t|||ddS )Nr   r   )itemc                   rd   r-   re   r/   lab
propagatorr-   r1   r2   Z  rh   z._Unstacker.get_new_columns.<locals>.<listcomp>Fr:   r7   r<   verify_integrity)r8   r@   _renamer?   insert	_na_valuerenamerT   rC   repeatr   r   r   r:   rA   r<   r7   r   	_repeaterappendtile)	rJ   r   levr   r   r   	new_names	new_codesrepeaterr-   r   r1   r   E  s0   

z_Unstacker.get_new_columnsc                 C  s^   t | jt | jkr| j| j}| jrt|dd}|S t | j| j }t|| j }|S )Nr   r+   )rT   rA   r@   get_indexerr8   rC   r   r   )rJ   r   r   r-   r-   r1   r   k  s   z_Unstacker._repeaterc                   s    fdd j d d D }t jdkr9 jd |d }}|dk r.|t||j}|| jd S t	 j| jddS )Nc                   s   g | ]}|  jqS r-   )rB   r   r   r   r-   r1   r2   }      z(_Unstacker.new_index.<locals>.<listcomp>r+   r,   r   Fr   )
ri   rT   r;   r   r   r   rB   r   r=   r   )rJ   result_codesr&   level_codesr-   r   r1   r   z  s   z_Unstacker.new_index)T)r%   r   r&   r    r'   r(   r)   r*   )r)   rP   )r)   rc   )rj   rk   r)   rk   )r)   r(   )r)   r   )r)   r   rR   )r   r   )r)   rk   )r)   r   )__name__
__module____qualname____doc__rO   r   rb   ri   rq   rI   r   r   r   r   r   r   r   r-   r-   r-   r1   r$   @   s*    ..



O&r$   TdataSeries | DataFramer'   r(   c                   sx  t  dkr| S | jtt jv r g fdd D   fddtjD }fdd D }fdd D }fdd D }fdd|D }fd	d|D }	fd
d|D }
tdd |D }t||ddd}t	|dd\}}t
||||dd}|st|dd}nt||g |	|g |
dg dd}t| tr|  }||_|jd||d}|}|}|}njt| jtr| } r܈ d|j||d}fdd D   s|S | jdd}||_|jd||d}t|tr|jn|jttsJ jd g| }| jjg| }jd g}|fdd|D  t|||dd}t|tr7||_|S ||_|S )Nr   c                   s   g | ]}  |qS r-   )r6   r/   ir%   r-   r1   r2     rh   z%_unstack_multiple.<locals>.<listcomp>c                      g | ]}| vr|qS r-   r-   r   )clocsr-   r1   r2     r   c                      g | ]} j | qS r-   r:   r   r   r-   r1   r2     rh   c                   r   r-   r7   r   r   r-   r1   r2     rh   c                   r   r-   r<   r   r   r-   r1   r2     rh   c                   r   r-   r   r   r   r-   r1   r2     rh   c                   r   r-   r   r   r   r-   r1   r2     rh   c                   r   r-   r   r   r   r-   r1   r2     rh   c                 s  rQ   rR   rS   rU   r-   r-   r1   rW     rX   z$_unstack_multiple.<locals>.<genexpr>F)r'   xnullr'   )r   __placeholder__r   r   r   r'   c                   s    g | ]}| k r
|n|d  qS r,   r-   r/   rZ   )valr-   r1   r2          deepc                 3  s     | ]}|  jd  V  qdS )r+   N)rB   r7   )r/   rec)unstcolsr-   r1   rW     s    )rT   r%   r   r   r<   rangenlevelsrY   r   r   r   r   r   r   r   r   r   r>   r:   r   r7   extend)r   r   r   r'   rlocsclevelsccodescnamesrlevelsrcodesrnamesr   r|   comp_idsr_   recons_codesdummy_indexdummy	unstackedr   r   r   resultdummy_dfnew_columnsr-   )r   r%   r   r   r1   _unstack_multiple  s|   




r   objc                 C  s   t |ttfrt|dkrt| |||dS |d }t|s'|dks'| j| t | trAt | jt	r:t
| |||dS | jjddS t | jt	sRtdt| j dt| jr_t| |||d	S t| j|| j|d
}|j| jd |dS )Nr,   r   r   r   T)future_stackz'index must be a MultiIndex to unstack, z was passedr   r&   r4   r'   r   r   )r   rY   r9   rT   r   r   r%   r6   r   r   _unstack_frameTstackr{   typer   rs   _unstack_extension_seriesr$   _constructor_expanddimr   _values)r   r&   r   r'   	unstackerr-   r-   r1   r     s,   

r   r   r)   c                 C  s^   t | jtsJ t| j|| j|d}| js%| jj||d}| j||j	dS |j
| j| j|dS )Nr   r   )axesr   )r   r%   r   r$   _constructor_can_fast_transpose_mgrr   _constructor_from_mgrr  r   r  r   )r   r&   r   r'   r  mgrr-   r-   r1   r     s   
r   seriesr   c                 C  s,   |   }|j|||d}|jdg|_|S )an  
    Unstack an ExtensionArray-backed Series.

    The ExtensionDtype is preserved.

    Parameters
    ----------
    series : Series
        A Series with an ExtensionArray for values
    level : Any
        The level name or number.
    fill_value : Any
        The user-level (not physical storage) fill value to use for
        missing values introduced by the reshape. Passed to
        ``series.values.take``.
    sort : bool
        Whether to sort the resulting MuliIndex levels

    Returns
    -------
    DataFrame
        Each column of the DataFrame will have the same dtype as
        the input Series.
    )r&   r   r'   r   )to_framer   r   _drop_level_numbers)r  r&   r   r'   dfr   r-   r-   r1   r    s   r  r+   framedropnac                   s  dd }| j \} | j|}t| jtrt| |||dS t| jtrat| jj} fdd| jj	D }|| j\}	}
|
|	 |
t|
|  t| jj}|
| jj t|||dd}n*tt|| j| jf \}\}}
| t|
| f}t||| jj| jjgdd}| js| jrt| jj}|d }t|tr| }|d	d |  D }t|| }n| j }n| j }|rt|}|| }|| }| j||d
S )z
    Convert DataFrame to Series with multi-level Index. Columns become the
    second level of the resulting hierarchical index

    Returns
    -------
    stacked : Series or DataFrame
    c                 S  s,   | j r| tt| fS t| \}}||fS rR   )	is_uniquerC   r   rT   r   )r%   r7   
categoriesr-   r-   r1   stack_factorizeM  s   zstack.<locals>.stack_factorize)	level_numr  r'   c                   rd   r-   r   r   Kr-   r1   r2   ^  rh   zstack.<locals>.<listcomp>Fr   r   c                 S  s   g | ]\}}|j qS r-   )r  )r/   ro   colr-   r-   r1   r2   }  rh   r   )r   r   r6   r   r   _stack_multi_columnsr%   r9   r:   r7   r   rC   r   ravelr<   r   zipmapr   r   _is_homogeneous_typedtypesr  r   r   _concat_same_typeitems"_reorder_for_extension_array_stackr   _constructor_sliced)r  r&   r  r'   r  Nr  r   r   clevclabr   r   r:   ilabr7   r  rs   arrr   r}   r-   r  r1   r   C  sT   




r   c                   s   t  fdd|D r }|D ]
t|||d}q|S t dd |D rL } fdd|D }|rJ|dt|||d}fdd|D }|s2|S td	)
Nc                 3  s    | ]	}| j jv V  qd S rR   )r   r<   r/   r   r  r-   r1   rW     s    z!stack_multiple.<locals>.<genexpr>)r  r'   c                 s  s    | ]}t |tV  qd S rR   )r   intr(  r-   r-   r1   rW     s    c                   s   g | ]} j |qS r-   )r   r6   r(  r)  r-   r1   r2     r   z"stack_multiple.<locals>.<listcomp>r   c                   s    g | ]}| kr
|n|d  qS r   r-   r   r   r-   r1   r2     r   zTlevel should contain all level names or all level numbers, not a mixture of the two.)r   r   r>   r{   )r  r&   r  r'   r   r-   )r  r   r1   stack_multiple  s"   
r,  r   r   c                 C  s   t | jdkr| jd j| jd dS dd t| jdd | jdd D }t| }dd	 t|D }t| }tj	d
d t|| jD | jdd dS )zBCreates a MultiIndex from the first N-1 levels of this MultiIndex.r   r   r   c                   s"   g | ]\ } fd d|D qS )c                   s    g | ]}|d kr | ndqS )r   Nr-   )r/   cr+  r-   r1   r2     r   z8_stack_multi_column_index.<locals>.<listcomp>.<listcomp>r-   r/   r7   r-   r+  r1   r2     s    z-_stack_multi_column_index.<locals>.<listcomp>Nr+   c                 s  s    | ]\}}|V  qd S rR   r-   )r/   keyro   r-   r-   r1   rW     rX   z,_stack_multi_column_index.<locals>.<genexpr>c                 S  s*   g | ]\}}d |vrt ||jdn|qS )Nrr   )r   rs   )r/   new_levr   r-   r-   r1   r2     s    r   )
rT   r:   r   r<   r  r7   	itertoolsgroupbyr   from_arrays)r   r[   tuplesunique_tuplesnew_levsr-   r-   r1   _stack_multi_column_index  s   
r7  r  r*  c           $   	     sn  ddd}| j dd}|j}t|tsJ ||jd	 kr@|}t||jd	 D ]}|||}	||d	 |}
||	|
}q&| |_}| sU|rU|d
|}|j|d	d}|j}t	t|}t
|}i }|jd }t|jd }|rst|}|t|d }t||}t|g }|D ]}z|j|}W n ty   || Y qw t|tst|}n|j|j }|kr|jd d |j| f }||jjd |_|j|dj}nB|jd d |f }t|j  t t r ! " fdd|# D }|j$\}}t%|| &||j'( }||}n|j}|j)d	kr |( }|||< qt|d
kr1|*|}t|}t|j+trTt,|j+j}t,|j+j-}fdd|j+jD }nt.|j+\}} | g}|/g}|j+j0g}|| |t1|| || jj-|  t|||dd}!| j2||!|d}"| jjd	kr| j3|g }#|"j4|#s|"|# }"|r|"j5d
dd}"|"S )Nr  r*  r   r   c                 S  s   | |j v r
|j |  S | S )z
        Logic for converting the level number to something we can safely pass
        to swaplevel.

        If `level_num` matches a column name return the name from
        position `level_num`, otherwise return `level_num`.
        r   )r  r   r-   r-   r1   _convert_level_number  s   

z3_stack_multi_columns.<locals>._convert_level_numberFr   r,   r   )r&   rl   r+   )r   c                   s    g | ]\}}|j j d dqS )Fr   )r  r   )r/   ro   rV   rr   r-   r1   r2     r   z(_stack_multi_columns.<locals>.<listcomp>c                   rd   r-   r  r   )levsizer-   r1   r2   2  rh   r   )r%   r   r   )rl   how)r  r*  r   r   )6r   r   r   r   r   r   	swaplevel_is_lexsorted
sort_indexr   r7  r:   r   r7   rC   r'   r   rT   rB   get_locKeyErrorr   slicestopstartlocreindexrj   ilocr   r  tolistr   r   r  r   r   r   r   r   r  r   
differencer%   r9   r<   r   r   r   r   r  r  equalsr  )$r  r  r  r'   r8  thismi_colsroll_columnsr   lev1lev2level_to_sortr   new_data
level_valsr   level_vals_nanlevel_vals_used	drop_colsr/  rC  	slice_lenchunkvalue_slicesubsetr#  r  idxr   r   r   	old_codes
old_levelsr   r   desired_columnsr-   )rs   r9  r1   r    s   














r  r'  r"   n_rows	n_columnsc                 C  s&   t || ||j }| |S )a  
    Re-orders the values when stacking multiple extension-arrays.

    The indirect stacking method used for EAs requires a followup
    take to get the order correct.

    Parameters
    ----------
    arr : ExtensionArray
    n_rows, n_columns : int
        The number of rows and columns in the original DataFrame.

    Returns
    -------
    taken : ExtensionArray
        The original `arr` with elements re-ordered appropriately

    Examples
    --------
    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
    >>> _reorder_for_extension_array_stack(arr, 2, 3)
    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')

    >>> _reorder_for_extension_array_stack(arr, 3, 2)
    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
    )rC   r   r   r   r  rB   )r'  r\  r]  rX  r-   r-   r1   r!  P  s   !
r!  r&   	list[int]c                   sh   j  t j krtdtdd} j fddt j jD d d d }tdkr;t	}|
|}n|}| }| }g }|D ]_}	t j dkrW  }
n&tdkr`|	f}	t|	tfdd	t j jD } jd d |f }
t j jk r|
j ||
_ n|jdkr|
jdkrd
|
_ntt|
j |
_ ||
 qIt|d
kr jst|}t|t  }nt j jk rЈ j | }nd
g}t| jjd}d
}t j jk r j | }|j |s|| }t jtr jj}tt jj d|f}nt! jdd\}}|g}tt|d|f}t|tr4|j}|" j }n| g}t!|ddd
 g} fdd|D }t|| ||  jj#t|j# dd|_t }t|}t$|}t|| |t%t$|| }|&|}|jdkr j jtkrt|j d
krt'|jd}n	|j(d d d
f }|jdkrd |_|S )Nz8Columns with duplicate values are not supported in stackT)reversec                   r   r-   r-   r/   k)r&   r-   r1   r2   |  r   zstack_v3.<locals>.<listcomp>r+   r,   c                 3  s(    | ]}|v rt  ntd V  qd S rR   )nextr@  r`  )genr&   r-   r1   rW     s
    
zstack_v3.<locals>.<genexpr>r   )r   rs   F)use_na_sentinelc                   s   g | ]
}t |t qS r-   )rC   r   rT   r.  r)  r-   r1   r2     s    r   r   r   ))r   nuniquerT   r{   sortedr  r   r   rC   argsort_reorder_ilevelsr   r   iterrY   rC  r   r   r   r   r   r   r   r  rs   rH  r   r%   r   r:   r9   r   r7   r   drop_duplicatesr<   r   r   rB   r   rE  )r  r&   drop_levnums
stack_colssorterordered_stack_colsstack_cols_uniqueordered_stack_cols_uniquebufrX  r   column_indexerr   ratior   r[  index_levelsindex_codesr7   uniquescolumn_levelscolumn_codeslen_df	n_uniquesra   idxsr-   )r  rc  r&   r1   stack_v3u  s   "






"
r|  )NT)r   r   r'   r(   )r   r   r'   r(   )r   r   r'   r(   r)   r   )r  r   r'   r(   r)   r   )r+   TT)r  r   r  r(   r'   r(   )TT)r   r   r)   r   )
r  r   r  r*  r  r(   r'   r(   r)   r   )r'  r"   r\  r*  r]  r*  r)   r"   )r  r   r&   r^  r)   r   )L
__future__r   r1  typingr   r   rG   numpyrC   pandas._libs.reshape_libsr   r   pandas.errorsr   pandas.util._decoratorsr   pandas.util._exceptionsr   pandas.core.dtypes.castr   r	   pandas.core.dtypes.commonr
   r   r   r   pandas.core.dtypes.dtypesr   pandas.core.dtypes.missingr   pandas.core.algorithmscore
algorithmsrm   r   r   pandas.core.arrays.categoricalr   pandas.core.constructionr   pandas.core.framer   pandas.core.indexes.apir   r   r   pandas.core.reshape.concatr   pandas.core.seriesr   pandas.core.sortingr   r   r   r   r   pandas._typingr   r    r!   pandas.core.arraysr"   pandas.core.indexes.frozenr#   r$   r   r   r   r  r   r,  r7  r  r!  r|  r-   r-   r-   r1   <module>   sV      Q[%
%M
! 
%