o
    ŀg]                  	   @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddl	Z
ddlmZ ddlmZmZ dd Zejdd	 Zejd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejj d d!d"d# Z!ejj d d!d$d% Z"d&d' Z#d(d) Z$d*d+ Z%d,d- Z&d.d/ Z'ej(d0ej)e*d1e+d1de
j,gd2d3 Z-ej(d4d5d6gd7d8 Z.d9d: Z/d;d< Z0d=d> Z1ej(d?d5d6gejj d@d!dAdB Z2ej(d?d5d6gejj d@d!dCdD Z3ej(dEdFdGgej(d?d5d6gdHdI Z4ej(dEdFdGgej(dJe
j5e
j6gdKdL Z7dMdN Z8dOdP Z9ej:dQdRdS Z;ej:dQdTdU Z<dVdW Z=dXdY Z>ej(dZg d[e6g d\fg d]e6g d^fgd_d` Z?dadb Z@ej(dcejAejBej*gddde ZCdfdg ZDdhdi ZEdjdk ZFdldm ZGdndo ZHdpdq ZIdS )rz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)pa_version_under12p0)is_dtype_equal)ArrowStringArrayArrowStringArrayNumpySemanticsc                 C   s   | j dkrtjS tjS )Npyarrow_numpy)storagenpnanpdNAdtype r   [/var/www/html/myenv/lib/python3.10/site-packages/pandas/tests/arrays/string_/test_string.pyna_val   s   
r   c                 C   s   t j| dS )z=Fixture giving StringDtype from parametrized 'string_storage')r   )r
   StringDtype)string_storager   r   r   r      s   r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')construct_array_typer   r   r   r   cls#   s   r   c                 C   s   t dt jdt jdg| di}| jdkrd}nd}t||ks"J | jdkr*d}nd	}t|j|ks5J | jd
krCd}d| d}n| jdkrQd}d| d}nd}d| d}t|jj|kscJ d S )NAabr   r   z     A
0    a
1  NaN
2    bz      A
0     a
1  <NA>
2     bz10      a
1    NaN
2      b
Name: A, dtype: stringz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   <z+>
['a', <NA>, 'b']
Length: 3, dtype: stringr   z*>
['a', nan, 'b']
Length: 3, dtype: stringStringArray)r
   	DataFramearrayr   r   reprr   )r   dfexpectedarr_namer   r   r   	test_repr)   s$    



r!   c                 C   s<   | j g d|d}|d d usJ |d t|ju sJ d S )N)r   Nr   r      )_from_sequencer   r   )r   r   r   r   r   r   test_none_to_nanC   s   r$   c                 C   s   | j ddg|d}| tjju rd}nd}tjt|d d|d< W d    n1 s*w   Y  | tjju r8d	}nd}tjt|d td
dg|d d < W d    d S 1 sXw   Y  d S )Nr   r   r   z4Cannot set non-string value '10' into a StringArray.Scalar must be NA or strmatch
   r   zMust provide strings.r"      )	r#   r
   arraysr   pytestraises	TypeErrorr   r   )r   r   arrmsgr   r   r   test_setitem_validatesI   s   
"r0   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   r
   r   tmassert_extension_array_equal)r   r.   r   r   r   r   test_setitem_with_scalar_string[   s   r6   c                 C   sf   t jg d| d}tdd g}| }||ddg< t jdt jdg| d}t|| t|| d S )Nr   r   r1   r   r   r   r"   r1   )r
   r   r   copyr   r4   r5   assert_numpy_array_equal)r   r.   value
value_origr   r   r   r   $test_setitem_with_array_with_missingd   s   r<   c                 C   s   t t jddd}d |d< || }t|j| sJ |d}t|| ||jd  }|| }t|j| s:J ||j}t|| d S )N2000   )periodsr   zdatetime64[ns])	r
   Series
date_rangeastyper   r   r4   assert_series_equaliloc)r   sercastedresultser2casted2result2r   r   r   test_astype_roundtripq   s   


rL   c                 C   s   t jg d| d}t jg d| d}|| }t jg d| d}t|| ||}t|| ||}t jg d| d}t|| |j|dd}t jg d| d}t|| d S )	N)r   r   r1   NNr   )xyNzN)axbyNNN)xaybNNN-)
fill_value)rP   rQ   zc-z-zN)r
   rA   r4   rD   addradd)r   r   r   rH   r   r   r   r   test_add   s   

rX   c                 C   s   | j |v rd}tjjd |d}|| tjg d| d}tjg dgtd}tj	t
dd ||  W d    n1 s=w   Y  t|}tj	t
dd ||  W d    d S 1 s^w   Y  d S )Nz*Failed: DID NOT RAISE <class 'ValueError'>r,   reasonr7   r   z3 != 1r&   )r   r+   markxfailapplymarkerr
   r   r   objectr,   
ValueErrorrA   )r   requestarrow_string_storagerZ   r[   r   r   sr   r   r   test_add_2d   s   




"rc   c                 C   sj   t jg d| d}g d}|| }t jg d| d}t|| || }t jg d| d}t|| d S )N)r   r   NNr   )rM   NrN   N)rP   NNN)rR   NNNr3   )r   r   otherrH   r   r   r   r   test_add_sequence   s   re   c                 C   sP   t jg d| d}|d }t jg d| d}t|| d| }t|| d S )Nr   r   Nr   r)   )aabbNr3   )r   r   rH   r   r   r   r   test_mul   s   ri   zGH-28527rZ   c                 C   s   t jg d| d}t jg dgtd}||tu sJ || }t g dg| }t|| || }t g dg| }t|| d S )N)r   r   r1   r2   r   )trN   vw)atrQ   cvdw)tarS   vcwd)	r
   r   r   r^   __add__NotImplementedrC   r4   assert_frame_equalr   r.   r   rH   r   r   r   r   test_add_strings   s   rx   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tu s"J || }t dtjtjtjgg| }t	|| || }t dtjtjtjgg| }t	|| d S )Nr   r   r   rM   rN   rP   rR   )
r
   r   r   r	   r   rt   ru   rC   r4   rv   rw   r   r   r   test_add_frame   s     ry   c                    s   d| j  d tjg d|d}dt| }|jdkrEt fdd|D }| tjkr5d|d	< nd
|d	< t	||
tj d S |jdkrLdnd}tj fdd|D td}tj||d}t|| d S )N__r   Nr1   r   r   r   c                       g | ]	}t | qS r   getattr.0itemop_namerd   r   r   
<listcomp>       z2test_comparison_methods_scalar.<locals>.<listcomp>Tr"   Fr   boolean[pyarrow]booleanc                    r|   r   r}   r   r   r   r   r      r   )__name__r
   r   r~   r   r   operatorner4   r9   rC   bool_r^   r5   )comparison_opr   r   rH   r   expected_dtyper   r   r   test_comparison_methods_scalar   s   


r   c                 C   s   d| j  d}tjg d|d}t||tj}|jdkr9tj| kr*tg d}ntg d}t	
|| d S |jdkr@dnd	}tjg d
|d}t	|| t	|| d S )Nrz   r{   r   r   TTTFFFr   r   r   NNN)r   r
   r   r~   r   r   r   r   r   r4   r9   r5   )r   r   r   r   rH   r   r   r   r   r   $test_comparison_methods_scalar_pd_na   s   

r   c           	      C   s   d| j  d}tjg d|d}d}|dvr7tjtdd t||| W d    d S 1 s0w   Y  d S t|||}|jdkr[g d	g d
d| }t|}t	
|| d S g dg dd| }|jdkrmdnd}tj||d}t	|| d S )Nrz   r{   r   *   )__eq____ne__z(Invalid comparison|not supported betweenr&   r   r   r   )FNF)TNTr   r   r   )r   r
   r   r+   r,   r-   r~   r   r   r4   r9   r5   )	r   r   r   r   rd   rH   expected_datar   r   r   r   r   )test_comparison_methods_scalar_not_string  s2   


r   c                 C   sb  d| j  d}tjg d|d}g d}t|||}|jdkrktj| kr-tg d}ntg d}t|d ||d |d< t	|| t||tj
}tj| kr\tg d	}ntg d}t	|| d S |jd
krrdnd}tjt|d dd}t|d ||d |d< tj||d}t|| t||tj
}tjg d|d}t|| d S )Nrz   r{   r   )NNr1   r   )TTFr   r@   r   r   r   r   r^   )rU   r   r   )r   r
   r   r~   r   r   r   r   r4   r9   r   fulllenr5   )r   r   r   r   rd   rH   r   r   r   r   r   test_comparison_methods_array!  s.   


r   c                 C   sB  | t jju r	d}nd}tjt|d | tjddgdd W d    n1 s(w   Y  tjt|d | tg  W d    n1 sFw   Y  | t jju ri| tjdtjgt	d | tjdd gt	d nEtjt|d | tjdtjgt	d W d    n1 sw   Y  tjt|d | tjdd gt	d W d    n1 sw   Y  tjt|d | tjdt j
gt	d W d    n1 sw   Y  tjt|d | tjdtdd	gt	d W d    n1 sw   Y  tjt|d | tjdtdd	gt	d W d    d S 1 sw   Y  d S )
Nz7StringArray requires a sequence of strings or pandas.NAzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr&   r   r   S1r   NaTns)r
   r*   r   r+   r,   r_   r   r   r	   r^   r   
datetime64timedelta64)r   r/   r   r   r   test_constructor_raisesB  s6     $r   nar	   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr   r^   r   )r
   r*   r   r   r   r   r4   r5   )r   r   r   r   r   test_constructor_nan_likec  s   r   r8   TFc           	      C   s   t jdt jgtd}| }t jdtjgtd}|j||| d}|tt	fv r7dd l
}||j|| dd}n||}t|| t|| d S )Nr   r   )r   r8   r   Ttypefrom_pandas)r   r   r	   r^   r8   r
   r   r#   r   r   r   stringr4   r5   r9   )	r8   r   r   nan_arrexpected_inputna_arrrH   par   r   r   r   test_from_sequence_no_mutatek  s   r   c                 C   s   t jg d| d}|d}tjg ddd}t|| t jdt jdg| d}| jdkr2t}d}nt	}d	}t
j||d
 |d W d    d S 1 sNw   Y  d S )N)123r   int64)r"   r)      r   r   r   z#cannot convert float NaN to integerzJint\(\) argument must be a string, a bytes-like object or a( real)? numberr&   )r
   r   rC   r   r4   r9   r   r   r_   r-   r+   r,   )r   r.   rH   r   errr/   r   r   r   test_astype_int~  s   

"r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r"   r   )r
   r   r   rC   r4   r5   r   r.   rH   r   r   r   r   test_astype_nullable_int  s   
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r
   rA   r   rC   r   r	   r4   rD   )r   any_float_dtyperF   rH   r   r   r   r   test_astype_float  s   
r   skipnazNot implemented StringArray.sumc                 C   s.   t jg d|d}|j| d}|dksJ d S )Nr7   r   r   abc)r
   rA   sumr   r   r.   rH   r   r   r   test_reduce  s   r   c                 C   sD   t jg d|d}|j| d}| r|dksJ d S t |s J d S )N)Nr   Nr   r1   Nr   r   r   )r
   rA   r   isnar   r   r   r   test_reduce_missing  s
   r   methodminmaxc                 C   s\   t jg d|d}t|| |d}|r#| dkrdnd}||ks!J d S |t|ju s,J d S )Nr   r   r1   Nr   r   r   r   r1   )r
   rA   r~   r   r   )r   r   r   r.   rH   r   r   r   r   test_min_max  s   r   boxc           
      C   s   |j |v r!|tju r!|tju rd}nd}tjjt|d}|| |g d|d}tt	| |}| dkr6dnd}	||	ks>J d S )	Nz<'<=' not supported between instances of 'str' and 'NoneType'z0'ArrowStringArray' object has no attribute 'max'rY   r   r   r   r   r1   )
r   r
   r   r+   r[   r\   r-   r]   r~   r   )
r   r   r   r`   ra   rZ   r[   r.   rH   r   r   r   r   test_min_max_numpy  s   

r   c                 C   s   t jdt jg| d}|jdd}t jddg| d}t|| |jtdd}t jddg| d}t|| | j|v r?d}nd}t	j
t|d |jdd W d    d S 1 sZw   Y  d S )	Nr   r   r   )r:   z"Invalid value '1' for dtype stringz3Cannot set non-string value '1' into a StringArray.r&   r"   )r
   r   r   fillnar4   r5   r   str_r   r+   r,   r-   )r   ra   r.   resr   r/   r   r   r   test_fillna_args  s   
"r   c                 C   s   t d}dd lm} tjg d| d}||}|jt|| dd}| jdv r1t	r1|
|}| jdkr>||| }||sEJ d S )	Nr   r   r7   r   Tr   )r   r   python)r+   importorskippyarrow.computecomputer
   r   listlarge_stringr   r   chunked_arraycastr   equals)r   r   pcdatar.   r   r   r   r   test_arrow_array  s   



r   z0ignore:Passing a BlockManager:DeprecationWarningc           
      C   s  t d}|r|dkr|t jjdd tjg d| d}td|i}||}| j	dkr:|
djd	ks9J n
|
djd
ksDJ td| | }W d    n1 sYw   Y  t|d jtjsiJ |d| d}	t||	 |jd t|d ju sJ d S )Nr   r   1infer_string takes precedence over string storagerj   rf   r   r   r   r   r   r   string[])r)   r   )r+   r   r]   r[   r\   r
   r   r   tabler   fieldr   option_context	to_pandas
isinstancer   r   rC   r4   rv   locr   
r   string_storage2r`   using_infer_stringr   r   r   r   rH   r   r   r   r   test_arrow_roundtrip  s(   



 r   c           
      C   s  t d}|r|dkr|t jjdd tjg | d}td|i}||}| j	dkr8|
djdks7J n
|
djd	ksBJ |j|jg | d
g|jd}td| | }W d    n1 sgw   Y  t|d jtjswJ |d| d}	t||	 d S )Nr   r   r   rj   r   r   r   r   r   )r   )schemar   r   r   )r+   r   r]   r[   r\   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rC   r4   rv   r   r   r   r    test_arrow_load_from_zero_chunks  s(   


 
r   c                 C   s   t | dddkrd}nt | dddkrd}nd}tjdd	dtjg| d
}|jdd}tjg d|g d |dd}t|| |jdd}tjddg|d d |dd}t|| d S )Nr    r   zint64[pyarrow]r   r   r   r   r   r   F)dropna)r)   r"   r"   )r   r"   r   countindexr   nameTr)   r"   )r~   r
   r   r   value_countsrA   r4   rD   )r   	exp_dtyper.   rH   r   r   r   r   test_value_counts_na0  s   r   c                 C   s   t | dddkrd}nt | dddkrtj}nd}tjdddtjg| d	}|jd
d}tjddg|d d |ddd }t|| d S )Nr   r   r   zdouble[pyarrow]r   Float64r   r   r   T)	normalizer)   r"   
proportionr   r   )	r~   r   float64r
   rA   r   r   r4   rD   )r   r   rF   rH   r   r   r   r    test_value_counts_with_normalizeA  s   "r   zvalues, expectedr7   r   rf   )FFTc              	   C   s   t j| |d} d}tjt|dU t dd6 |  }t|| t |  }t |}t	|| t 
|  }t 
|}t|| W d    n1 sPw   Y  W d    d S W d    d S 1 shw   Y  d S )Nr   z"use_inf_as_na option is deprecatedr&   zmode.use_inf_as_naT)r
   r   r4   assert_produces_warningFutureWarningr   r   r9   rA   rD   r   rv   )valuesr   r   r/   rH   r   r   r   test_use_inf_as_naN  s    	

"r   c                 C   sf   | j |v rtd| j   tjg d| d}d|j  k r.|   kr.|jddk s1J  J d S )Nznot applicable for r7   r   r   T)deep)r   r+   skipr
   rA   nbytesmemory_usage)r   ra   seriesr   r   r   test_memory_usageg  s   
8r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r
   rA   rC   r4   rD   )r   r   rF   rH   r   r   r   r   test_astype_from_float_dtyper  s   
r   c                 C   sH   t jdt jdg| d}t|}tjdt| dgtd}t|| d S )Nr   r   r   )r
   r   r   r   r   r^   r4   r9   r   r   r   r   "test_to_numpy_returns_pdna_default{  s   
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r
   r   r   to_numpyr   r^   r4   r9   )r   nulls_fixturer  r.   rH   r   r   r   r   test_to_numpy_na_value  s
   r  c                 C   s   t jg d| d}|ddg}t g d}t|| |dt jg}t g d}t|| |g }t g d}t|| |d|g}t g d}t|| d S )Nrf   r   r   r1   )TFF)TFTr   )r
   rA   isinr4   rD   r   )r   fixed_now_tsrb   rH   r   r   r   r   	test_isin  s   
r  c                 C   s   t jg d| d}tg d}d ||< |jd t|ju s J t jg d| d}t|jt jju r5d}nd}t	j
t|d d||< W d    d S 1 sNw   Y  d S )Nr7   r   )FTFr"   zCannot set non-string valuer%   r&   )r
   rA   r   r   r   r   r   r*   r   r+   r,   r-   )r   rF   maskr/   r   r   r   (test_setitem_scalar_with_mask_validation  s   
"r	  c                 C   sD   g d}t j|t jd}tj|| d}tj|| d}t|| d S Nr7   r   )r   r   r   r
   r4   r5   r   valsr.   rH   r   r   r   r   test_from_numpy_str  s
   r  c                 C   s2   g d}t j|| d}| }|}t|| d S r
  )r
   r   tolistr4   assert_equalr  r   r   r   test_tolist  s
   r  )J__doc__r   numpyr   r+   pandas.compat.pyarrowr   pandas.core.dtypes.commonr   pandasr
   pandas._testing_testingr4   pandas.core.arrays.string_arrowr   r   r   fixturer   r   r!   r$   r0   r6   r<   rL   rX   rc   re   ri   r[   r\   rx   ry   r   r   r   r   r   parametrizer	   r   floatr   r   r   r   r   r   r   r   r   rA   r   r   r   r   filterwarningsr   r   r   r   r   r   float16float32r   r   r  r  r	  r  r  r   r   r   r   <module>   s    

	


!$!

	






