a
    Of                     @  s  d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlZddlZddlmZmZmZmZ ddlmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lm Z m!Z! ddl"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZGmHZHmIZI ddlJmKZK erddlmLZLmMZMmNZN ddlOmPZPmQZQmRZR ddlSmTZTmUZU dddddZVdddddddZWd dd!d"d#ZXejYejZej[ej\ej]ej^ej_ej`ejaejbejcejdejeejfd$Zgdd%d&d'Zhdd dd(d)Zid*d+ Zjdd,dd-d.Zkdd/d0d1d2ZlejZmd3Znd4d4d5d6d7d8Zoddd:d;d<d/d=d>d?d@ZpeedAedBedCdDdd:d:d;dFdGdHdIZqdd:d:d:d:dJdKdLdMZrdd:d:d:d:dJdKdNdOZsddd:d/dPdQdRdSZtdddUd/d5dVdWdXZuddd:d/ddQdYdZZvddd]d d d:d:d^d_d`daZwddbd]d:dcdddeZxdddgdhdidjdkdldmZyh dnZzdd,d]dodpdqZ{ddrdsd:d:d:dtdudvdwZ|ddxdydzZ}dddd{d|Z~d}d}d}d~ddZdddd:ddddZdS )zl
Generic data algorithms. This module is experimental at the moment and not
intended for public consumption
    )annotationsN)dedent)TYPE_CHECKINGLiteralcast)algos	hashtableiNaTlib)AnyArrayLike	ArrayLikeAxisIntDtypeObjTakeIndexernpt)doc)find_stack_level)'construct_1d_object_array_from_listlikenp_find_common_type)ensure_float64ensure_objectensure_platform_intis_array_likeis_bool_dtypeis_complex_dtypeis_dict_likeis_extension_array_dtypeis_float_dtype
is_integeris_integer_dtypeis_list_likeis_object_dtypeis_signed_integer_dtypeneeds_i8_conversion)concat_compat)BaseMaskedDtypeCategoricalDtypeExtensionDtypeNumpyEADtype)ABCDatetimeArrayABCExtensionArrayABCIndexABCMultiIndex	ABCSeriesABCTimedeltaArray)isnana_value_for_dtype)take_nd)arrayensure_wrapped_if_datetimelikeextract_array)validate_indices)ListLikeNumpySorterNumpyValueArrayLike)CategoricalIndexSeries)BaseMaskedArrayExtensionArrayr   z
np.ndarray)valuesreturnc                 C  sH  t | tst| dd} t| jr.tt| S t | jtr^t	d| } | j
sTt| jS t| S t | jtrzt	d| } | jS t| jrt | tjrt| dS t| jdddS n|t| jrt| S t| j r| jjdv rt| S t| S t| jrt	tj| S t| jr2| d	}t	tj|}|S tj| td
} t| S )a  
    routine to ensure that our data is of the correct
    input dtype for lower-level routines

    This will coerce:
    - ints -> int64
    - uint -> uint64
    - bool -> uint8
    - datetimelike -> i8
    - datetime64tz -> i8 (in local tz)
    - categorical -> codes

    Parameters
    ----------
    values : np.ndarray or ExtensionArray

    Returns
    -------
    np.ndarray
    Textract_numpyr<   r9   uint8Fcopy)         i8dtype)
isinstancer,   r4   r!   rJ   r   npasarrayr%   r   Z_hasna_ensure_data_datar&   codesr   ndarrayviewastyper   r   itemsizer   r   r#   object)r>   Znpvalues rV   O/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/core/algorithms.pyrN   j   s<    










rN   r   r   )r>   rJ   originalr?   c                 C  sN   t | tr| j|kr| S t |tjs<| }|j| |d} n| j|dd} | S )z
    reverse of _ensure_data

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    dtype : np.dtype or ExtensionDtype
    original : AnyArrayLike

    Returns
    -------
    ExtensionArray or np.ndarray
    rI   FrC   )rK   r*   rJ   rL   Zconstruct_array_typeZ_from_sequencerS   )r>   rJ   rX   clsrV   rV   rW   _reconstruct_data   s    rZ   str)	func_namer?   c                 C  st   t | ttttjfsp|dkr4tj| dtt	 d t
j| dd}|dv rft | tr\t| } t| } n
t| } | S )z5
    ensure that we are arraylike if not already
    isin-targetsz with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.
stacklevelFZskipna)mixedstringmixed-integer)rK   r+   r-   r*   rL   rQ   warningswarnFutureWarningr   r
   infer_dtypetuplelistr   rM   )r>   r\   inferredrV   rV   rW   _ensure_arraylike   s    


rk   )Z
complex128Z	complex64float64float32Zuint64Zuint32Zuint16rB   int64int32int16int8rb   rU   r>   c                 C  s    t | } t| }t| }|| fS )z
    Parameters
    ----------
    values : np.ndarray

    Returns
    -------
    htable : HashTable subclass
    values : ndarray
    )rN   _check_object_for_strings_hashtables)r>   ndtyper   rV   rV   rW   _get_hashtable_algo  s    rv   c                 C  s&   | j j}|dkr"tj| ddr"d}|S )z
    Check if we can use string hashtable instead of object hashtable.

    Parameters
    ----------
    values : ndarray

    Returns
    -------
    str
    rU   Fr`   rb   )rJ   namer
   Zis_string_array)r>   ru   rV   rV   rW   rs     s
    rs   c                 C  s   t | S )a3
  
    Return unique values based on a hash table.

    Uniques are returned in order of appearance. This does NOT sort.

    Significantly faster than numpy.unique for long enough sequences.
    Includes NA values.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    numpy.ndarray or ExtensionArray

        The return can be:

        * Index : when the input is an Index
        * Categorical : when the input is a Categorical dtype
        * ndarray : when the input is a Series/ndarray

        Return numpy.ndarray or ExtensionArray.

    See Also
    --------
    Index.unique : Return unique values from an Index.
    Series.unique : Return unique values of Series object.

    Examples
    --------
    >>> pd.unique(pd.Series([2, 1, 3, 3]))
    array([2, 1, 3])

    >>> pd.unique(pd.Series([2] + [1] * 5))
    array([2, 1])

    >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]))
    array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')

    >>> pd.unique(
    ...     pd.Series(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    <DatetimeArray>
    ['2016-01-01 00:00:00-05:00']
    Length: 1, dtype: datetime64[ns, US/Eastern]

    >>> pd.unique(
    ...     pd.Index(
    ...         [
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...             pd.Timestamp("20160101", tz="US/Eastern"),
    ...         ]
    ...     )
    ... )
    DatetimeIndex(['2016-01-01 00:00:00-05:00'],
            dtype='datetime64[ns, US/Eastern]',
            freq=None)

    >>> pd.unique(np.array(list("baabc"), dtype="O"))
    array(['b', 'a', 'c'], dtype=object)

    An unordered Categorical will return categories in the
    order of appearance.

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    >>> pd.unique(pd.Series(pd.Categorical(list("baabc"), categories=list("abc"))))
    ['b', 'a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    An ordered Categorical preserves the category ordering.

    >>> pd.unique(
    ...     pd.Series(
    ...         pd.Categorical(list("baabc"), categories=list("abc"), ordered=True)
    ...     )
    ... )
    ['b', 'a', 'c']
    Categories (3, object): ['a' < 'b' < 'c']

    An array of tuples

    >>> pd.unique(pd.Series([("a", "b"), ("b", "a"), ("a", "c"), ("b", "a")]).values)
    array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object)
    )unique_with_maskrr   rV   rV   rW   unique3  s    ^ry   intc                 C  s8   t | dkrdS t| } t|  ddk }|S )aH  
    Return the number of unique values for integer array-likes.

    Significantly faster than pandas.unique for long enough sequences.
    No checks are done to ensure input is integral.

    Parameters
    ----------
    values : 1d array-like

    Returns
    -------
    int : The number of unique values in ``values``
    r   Zintp)lenrN   rL   ZbincountravelrS   sum)r>   resultrV   rV   rW   nunique_ints  s
    r   znpt.NDArray[np.bool_] | Nonemaskc                 C  s   t | dd} t| jtr |  S | }t| \}} |t| }|du r`|| }t||j|}|S |j| |d\}}t||j|}|dusJ ||dfS dS )z?See algorithms.unique for docs. Takes a mask for masked arrays.ry   r\   Nr   bool)	rk   rK   rJ   r'   ry   rv   r{   rZ   rS   )r>   r   rX   r   tableuniquesrV   rV   rW   rx     s    
rx   i@B r6   znpt.NDArray[np.bool_])compsr>   r?   c                 C  s  t | stdt| j dt |s<tdt|j dt|ttttj	fst
|}t|dd}t|dkr|jjdv rt| st|}n$t|trt|}nt|ddd}t| d	d}t|dd
}t|tj	s||S t|jrt||S t|jr t|js tj|jtdS t|jr<t||tS t|jtr`tt|t|S t|tkrt|dkr|jtkrt |! rdd }ndd }n0t"|j|j}|j|dd}|j|dd}t#j$}|||S )z
    Compute the isin boolean array.

    Parameters
    ----------
    comps : list-like
    values : list-like

    Returns
    -------
    ndarray[bool]
        Same length as `comps`.
    zIonly list-like objects are allowed to be passed to isin(), you passed a ``r]   r   r   ZiufcbT)rA   Zextract_rangeisinr@   rI      c                 S  s   t t | | t | S N)rL   
logical_orr   r|   isnan)cvrV   rV   rW   f  s    zisin.<locals>.fc                 S  s   t | | S r   )rL   r   r|   )abrV   rV   rW   <lambda>      zisin.<locals>.<lambda>FrC   )%r    	TypeErrortype__name__rK   r+   r-   r*   rL   rQ   ri   rk   r{   rJ   kindr"   r   r,   r2   r4   r   r#   pd_arrayr!   Zzerosshaper   rS   rU   r'   rM   _MINIMUM_COMP_ARR_LENr/   anyr   htableZismember)r   r>   Zorig_valuesZcomps_arrayr   commonrV   rV   rW   r     sf    









r   Tr   z
int | NonerU   z'tuple[npt.NDArray[np.intp], np.ndarray])r>   use_na_sentinel	size_hintna_valuer   r?   c           
      C  sf   | }| j jdv rt}t| \}} ||p,t| }|j| d|||d\}}	t||j |}t|	}	|	|fS )a(  
    Factorize a numpy array to codes and uniques.

    This doesn't do any coercion of types or unboxing before factorization.

    Parameters
    ----------
    values : ndarray
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    size_hint : int, optional
        Passed through to the hashtable's 'get_labels' method
    na_value : object, optional
        A value in `values` to consider missing. Note: only use this
        parameter when you know that you don't have any values pandas would
        consider missing in the array (NaN for float data, iNaT for
        datetimes, etc.).
    mask : ndarray[bool], optional
        If not None, the mask is used as indicator for missing values
        (True = missing, False = valid) instead of `na_value` or
        condition "val != val".

    Returns
    -------
    codes : ndarray[np.intp]
    uniques : ndarray
    mM)Zna_sentinelr   r   Z	ignore_na)rJ   r   r	   rv   r{   	factorizerZ   r   )
r>   r   r   r   r   rX   
hash_klassr   r   rP   rV   rV   rW   factorize_array$  s    $
	r   z    values : sequence
        A 1-D sequence. Sequences that aren't pandas objects are
        coerced to ndarrays before factorization.
    zt    sort : bool, default False
        Sort `uniques` and shuffle `codes` to maintain the
        relationship.
    zG    size_hint : int, optional
        Hint to the hashtable sizer.
    )r>   sortr   Fz%tuple[np.ndarray, np.ndarray | Index])r   r   r   r?   c           	      C  s  t | ttfr| j||dS t| dd} | }t | ttfr\| jdur\| j|d\}}||fS t | tj	sz| j|d\}}nVt
| } |s| jtkrt| }| rt| jdd}t||| } t| ||d	\}}|rt|d
krt|||ddd\}}t||j|}||fS )aN  
    Encode the object as an enumerated type or categorical variable.

    This method is useful for obtaining a numeric representation of an
    array when all that matters is identifying distinct values. `factorize`
    is available as both a top-level function :func:`pandas.factorize`,
    and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.

    Parameters
    ----------
    {values}{sort}
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.

        .. versionadded:: 1.5.0
    {size_hint}
    Returns
    -------
    codes : ndarray
        An integer ndarray that's an indexer into `uniques`.
        ``uniques.take(codes)`` will have the same values as `values`.
    uniques : ndarray, Index, or Categorical
        The unique valid values. When `values` is Categorical, `uniques`
        is a Categorical. When `values` is some other pandas object, an
        `Index` is returned. Otherwise, a 1-D ndarray is returned.

        .. note::

           Even if there's a missing value in `values`, `uniques` will
           *not* contain an entry for it.

    See Also
    --------
    cut : Discretize continuous-valued array.
    unique : Find the unique value in an array.

    Notes
    -----
    Reference :ref:`the user guide <reshaping.factorize>` for more examples.

    Examples
    --------
    These examples all show factorize as a top-level method like
    ``pd.factorize(values)``. The results are identical for methods like
    :meth:`Series.factorize`.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([0, 0, 1, 2, 0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    With ``sort=True``, the `uniques` will be sorted, and `codes` will be
    shuffled so that the relationship is the maintained.

    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
    ...                               sort=True)
    >>> codes
    array([1, 1, 0, 2, 1])
    >>> uniques
    array(['a', 'b', 'c'], dtype=object)

    When ``use_na_sentinel=True`` (the default), missing values are indicated in
    the `codes` with the sentinel value ``-1`` and missing values are not
    included in `uniques`.

    >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
    >>> codes
    array([ 0, -1,  1,  2,  0])
    >>> uniques
    array(['b', 'a', 'c'], dtype=object)

    Thus far, we've only factorized lists (which are internally coerced to
    NumPy arrays). When factorizing pandas objects, the type of `uniques`
    will differ. For Categoricals, a `Categorical` is returned.

    >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    ['a', 'c']
    Categories (3, object): ['a', 'b', 'c']

    Notice that ``'b'`` is in ``uniques.categories``, despite not being
    present in ``cat.values``.

    For all other pandas objects, an Index of the appropriate type is
    returned.

    >>> cat = pd.Series(['a', 'a', 'c'])
    >>> codes, uniques = pd.factorize(cat)
    >>> codes
    array([0, 0, 1])
    >>> uniques
    Index(['a', 'c'], dtype='object')

    If NaN is in the values, and we want to include NaN in the uniques of the
    values, it can be achieved by setting ``use_na_sentinel=False``.

    >>> values = np.array([1, 2, 1, np.nan])
    >>> codes, uniques = pd.factorize(values)  # default: use_na_sentinel=True
    >>> codes
    array([ 0,  1,  0, -1])
    >>> uniques
    array([1., 2.])

    >>> codes, uniques = pd.factorize(values, use_na_sentinel=False)
    >>> codes
    array([0, 1, 0, 2])
    >>> uniques
    array([ 1.,  2., nan])
    )r   r   r   r   N)r   )r   F)compat)r   r   r   T)r   assume_uniqueverify)rK   r+   r-   r   rk   r)   r.   freqrL   rQ   rM   rJ   rU   r/   r   r0   wherer   r{   	safe_sortrZ   )	r>   r   r   r   rX   rP   r   Z	null_maskr   rV   rV   rW   r   b  sD     


r   r;   )r   	ascending	normalizedropnar?   c                 C  s&   t jdtt d t| |||||dS )aK  
    Compute a histogram of the counts of non-null values.

    Parameters
    ----------
    values : ndarray (1-d)
    sort : bool, default True
        Sort by values
    ascending : bool, default False
        Sort in ascending order
    normalize: bool, default False
        If True then compute a relative histogram
    bins : integer, optional
        Rather than count values, group them into half-open bins,
        convenience for pd.cut, only works with numeric data
    dropna : bool, default True
        Don't include counts of NaN

    Returns
    -------
    Series
    zupandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.r^   )r   r   r   binsr   )rd   re   rf   r   value_counts_internal)r>   r   r   r   r   r   rV   rV   rW   value_counts/  s    r   c              
   C  sX  ddl m}m} t| dd }|r$dnd}	|d urddlm}
 t| |rL| j} z|
| |dd}W n. ty } ztd	|W Y d }~n
d }~0 0 |j	|d
}|	|_
||j  }|jd|_| }|r|jdk r|jdd }tt|g}n8t| rF|| ddjj	|d
}|	|_
||j_
|j}t|tjs0t|}nt| trtt| j}|| |	dj||d }| j|j_|j}nt| dd} t| |\}}}|j tj!kr|tj"}||}|j t#kr|j t$kr|t$}n,|j |j kr|j dkrt%j&dt't( d ||_
||||	dd}|rB|j)|d}|rT||*  }|S )Nr   )r:   r;   rw   Z
proportioncount)cutT)Zinclude_lowestz+bins argument only works with numeric data.r   intervalFrC   )indexrw   )levelr   r   r   zstring[pyarrow_numpy]zThe behavior of value_counts with object-dtype is deprecated. In a future version, this will *not* perform dtype inference on the resulting index. To retain the old behavior, use `result.index = result.index.infer_objects()`r^   )r   rw   rD   )r   )+pandasr:   r;   getattrZpandas.core.reshape.tiler   rK   _valuesr   r   rw   r   notnarS   Z
sort_indexallZilocrL   r2   r{   r   rQ   rM   r,   ri   rangeZnlevelsgroupbysizenamesrk   value_counts_arraylikerJ   Zfloat16rm   r   rU   rd   re   rf   r   Zsort_valuesr}   )r>   r   r   r   r   r   r:   r;   Z
index_namerw   r   iierrr~   countsZlevelskeys_idxrV   rV   rW   r   ^  sr    
 


	r   z,tuple[ArrayLike, npt.NDArray[np.int64], int])r>   r   r   r?   c                 C  sb   | }t | } tj| ||d\}}}t|jrJ|rJ|tk}|| ||  }}t||j|}|||fS )z
    Parameters
    ----------
    values : np.ndarray
    dropna : bool
    mask : np.ndarray[bool] or None, default None

    Returns
    -------
    uniques : np.ndarray
    counts : np.ndarray[np.int64]
    r   )rN   r   Zvalue_countr#   rJ   r	   rZ   )r>   r   r   rX   r   r   Z
na_counterZres_keysrV   rV   rW   r     s    
r   firstzLiteral['first', 'last', False])r>   keepr   r?   c                 C  s   t | } tj| ||dS )ax  
    Return boolean ndarray denoting duplicate values.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array over which to check for duplicate values.
    keep : {'first', 'last', False}, default 'first'
        - ``first`` : Mark duplicates as ``True`` except for the first
          occurrence.
        - ``last`` : Mark duplicates as ``True`` except for the last
          occurrence.
        - False : Mark all duplicates as ``True``.
    mask : ndarray[bool], optional
        array indicating which elements to exclude from checking

    Returns
    -------
    duplicated : ndarray[bool]
    )r   r   )rN   r   
duplicated)r>   r   r   rV   rV   rW   r     s    r   c              
   C  s   t | dd} | }t| jr8t| } td| } | j|dS t| } tj| ||d\}}|durd||fS zt	
|}W n: ty } z"tjd| t d W Y d}~n
d}~0 0 t||j|}|S )	a  
    Returns the mode(s) of an array.

    Parameters
    ----------
    values : array-like
        Array over which to check for duplicate values.
    dropna : bool, default True
        Don't consider counts of NaN/NaT.

    Returns
    -------
    np.ndarray or ExtensionArray
    moder   r=   r   )r   r   NzUnable to sort modes: r^   )rk   r#   rJ   r3   r   _moderN   r   r   rL   r   r   rd   re   r   rZ   )r>   r   r   rX   ZnpresultZres_maskr   r~   rV   rV   rW   r     s&    

r   averager   r   znpt.NDArray[np.float64])r>   axismethod	na_optionr   pctr?   c              	   C  sd   t | j}t| } | jdkr4tj| |||||d}n,| jdkrXtj| ||||||d}ntd|S )a  
    Rank the values along a given axis.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
        Array whose values will be ranked. The number of dimensions in this
        array must not exceed 2.
    axis : int, default 0
        Axis over which to perform rankings.
    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
        The method by which tiebreaks are broken during the ranking.
    na_option : {'keep', 'top'}, default 'keep'
        The method by which NaNs are placed in the ranking.
        - ``keep``: rank each NaN value with a NaN ranking
        - ``top``: replace each NaN with either +/- inf so that they
                   there are ranked at the top
    ascending : bool, default True
        Whether or not the elements should be ranked in ascending order.
    pct : bool, default False
        Whether or not to the display the returned rankings in integer form
        (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
       )is_datetimeliketies_methodr   r   r   rE   )r   r   r   r   r   r   z&Array with ndim > 2 are not supported.)r#   rJ   rN   ndimr   Zrank_1dZrank_2dr   )r>   r   r   r   r   r   r   ZranksrV   rV   rW   rank+  s.    



r   r   )indicesr   
allow_fillc                 C  sz   t | tjtttfs&tjdtt	 d t
| s8t| } t|}|rht|| j|  t| ||d|d}n| j||d}|S )ak	  
    Take elements from an array.

    Parameters
    ----------
    arr : array-like or scalar value
        Non array-likes (sequences/scalars without a dtype) are coerced
        to an ndarray.

        .. deprecated:: 2.1.0
            Passing an argument other than a numpy.ndarray, ExtensionArray,
            Index, or Series is deprecated.

    indices : sequence of int or one-dimensional np.ndarray of int
        Indices to be taken.
    axis : int, default 0
        The axis over which to select values.
    allow_fill : bool, default False
        How to handle negative values in `indices`.

        * False: negative values in `indices` indicate positional indices
          from the right (the default). This is similar to :func:`numpy.take`.

        * True: negative values in `indices` indicate
          missing values. These values are set to `fill_value`. Any other
          negative values raise a ``ValueError``.

    fill_value : any, optional
        Fill value to use for NA-indices when `allow_fill` is True.
        This may be ``None``, in which case the default NA value for
        the type (``self.dtype.na_value``) is used.

        For multi-dimensional `arr`, each *element* is filled with
        `fill_value`.

    Returns
    -------
    ndarray or ExtensionArray
        Same type as the input.

    Raises
    ------
    IndexError
        When `indices` is out of bounds for the array.
    ValueError
        When the indexer contains negative values other than ``-1``
        and `allow_fill` is True.

    Notes
    -----
    When `allow_fill` is False, `indices` may be whatever dimensionality
    is accepted by NumPy for `arr`.

    When `allow_fill` is True, `indices` should be 1-D.

    See Also
    --------
    numpy.take : Take elements from an array along an axis.

    Examples
    --------
    >>> import pandas as pd

    With the default ``allow_fill=False``, negative numbers indicate
    positional indices from the right.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1])
    array([10, 10, 30])

    Setting ``allow_fill=True`` will place `fill_value` in those positions.

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True)
    array([10., 10., nan])

    >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True,
    ...      fill_value=-10)
    array([ 10,  10, -10])
    zpd.api.extensions.take accepting non-standard inputs is deprecated and will raise in a future version. Pass either a numpy.ndarray, ExtensionArray, Index, or Series instead.r^   T)r   r   
fill_value)r   )rK   rL   rQ   r*   r+   r-   rd   re   rf   r   r   rM   r   r5   r   r1   take)arrr   r   r   r   r~   rV   rV   rW   r   k  s     U

r   leftz$NumpyValueArrayLike | ExtensionArrayzLiteral['left', 'right']zNumpySorter | Noneznpt.NDArray[np.intp] | np.intp)r   valuesidesorterr?   c                 C  s   |durt |}t| tjr| jjdv rt|s8t|rt| jj	}t|rZt
|gnt
|}||jk r||jk r| j}n|j}t|rtt|	|}qttt||d}nt| } | j|||dS )a  
    Find indices where elements should be inserted to maintain order.

    Find the indices into a sorted array `arr` (a) such that, if the
    corresponding elements in `value` were inserted before the indices,
    the order of `arr` would be preserved.

    Assuming that `arr` is sorted:

    ======  ================================
    `side`  returned index `i` satisfies
    ======  ================================
    left    ``arr[i-1] < value <= self[i]``
    right   ``arr[i-1] <= value < self[i]``
    ======  ================================

    Parameters
    ----------
    arr: np.ndarray, ExtensionArray, Series
        Input array. If `sorter` is None, then it must be sorted in
        ascending order, otherwise `sorter` must be an array of indices
        that sort it.
    value : array-like or scalar
        Values to insert into `arr`.
    side : {'left', 'right'}, optional
        If 'left', the index of the first suitable location found is given.
        If 'right', return the last such index.  If there is no suitable
        index, return either 0 or N (where N is the length of `self`).
    sorter : 1-D array-like, optional
        Optional array of integer indices that sort array a into ascending
        order. They are typically the result of argsort.

    Returns
    -------
    array of ints or int
        If value is array-like, array of insertion points.
        If value is scalar, a single integer.

    See Also
    --------
    numpy.searchsorted : Similar method from NumPy.
    NiurI   )r   r   )r   rK   rL   rQ   rJ   r   r   r   iinfor   r2   minr   maxr   rz   r   r   r3   searchsorted)r   r   r   r   r   Z	value_arrrJ   rV   rV   rW   r     s(    0

	r   >   rn   rm   ro   rq   rp   rl   )nr   c                 C  sX  t |}tj}| j}t|}|r(tj}ntj}t|t	rF| 
 } | j}t| tjst| d|j dr|dkrtdt| j d| || | |S tt| j dd}| jjdv rtj}| d} t}d	}n4|rtj}n(|jd
v r
| jjdv rtj}ntj}| j}|dkr&| dd} t|}tj| j|d}	tdgd }
|dkrbtd|nt|d|
|< ||	t|
< | jjt v rt!j"| |	|||d ntdgd }|dkrt|dntd|||< t|}tdgd }|dkrtd| n
t| d||< t|}|| | | | |	|< |r:|	d}	|dkrT|	dddf }	|	S )aQ  
    difference of n between self,
    analogous to s-s.shift(n)

    Parameters
    ----------
    arr : ndarray or ExtensionArray
    n : int
        number of periods
    axis : {0, 1}
        axis to shift on
    stacklevel : int, default 3
        The stacklevel for the lost dtype warning.

    Returns
    -------
    shifted
    __r   zcannot diff z	 on axis=zK has no 'diff' method. Convert to a suitable dtype prior to calling 'diff'.Fr   rH   Tr   )rq   rp   r   r   rI   NrE   )Zdatetimelikeztimedelta64[ns])#rz   rL   nanrJ   r   operatorxorsubrK   r(   Zto_numpyrQ   hasattrr   
ValueErrorr   shiftr   r   rn   rR   r	   Zobject_rw   rm   rl   r   Zreshapeemptyr   slicerh   _diff_specialr   Zdiff_2d)r   r   r   narJ   Zis_boolopZis_timedeltaZ	orig_ndimZout_arrZ
na_indexerZ_res_indexerZres_indexerZ_lag_indexerZlag_indexerrV   rV   rW   diff;  sh    



""&

r   zIndex | ArrayLikeznpt.NDArray[np.intp] | Nonez.AnyArrayLike | tuple[AnyArrayLike, np.ndarray])r>   rP   r   r   r   r?   c              	   C  s  t | tjttfstdd}t | jtsFtj	| dddkrFt
| }nVz|  }| |}W n> ttjfy   | jrt | d trt| }nt
| }Y n0 |du r|S t|stdtt|}|stt| t| kstd|du r t| \}} |t| }||  t||}|rl| }	|rX|t|  k |t| kB }
d||
< nd}
t|	|d	d
}nhtjt|td}||tt| |j|dd}|r|d	k}
|r|
|t|  k B |t| kB }
|r|
durt ||
d	 |t|fS )a  
    Sort ``values`` and reorder corresponding ``codes``.

    ``values`` should be unique if ``codes`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    Parameters
    ----------
    values : list-like
        Sequence; must be unique if ``codes`` is not None.
    codes : np.ndarray[intp] or None, default None
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``-1``.
    use_na_sentinel : bool, default True
        If True, the sentinel -1 will be used for NaN values. If False,
        NaN values will be encoded as non-negative integers and will not drop the
        NaN from the uniques of the values.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``codes`` is None.
    verify : bool, default True
        Check if codes are out of bound for the values and put out of bound
        codes equal to ``-1``. If ``verify=False``, it is assumed there
        are no out of bound codes. Ignored when ``codes`` is None.

    Returns
    -------
    ordered : AnyArrayLike
        Sorted ``values``
    new_codes : ndarray
        Reordered ``codes``; returned when ``codes`` is not None.

    Raises
    ------
    TypeError
        * If ``values`` is not list-like or if ``codes`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
    ValueError
        * If ``codes`` is not None and ``values`` contain duplicates.
    zbOnly np.ndarray, ExtensionArray, and Index objects are allowed to be passed to safe_sort as valuesNFr`   rc   r   zMOnly list-like objects or None are allowed to be passed to safe_sort as codesz,values should be unique if codes is not Noner   r   rI   wrap)r   )!rK   rL   rQ   r*   r+   r   rJ   r'   r
   rg   _sort_mixedargsortr   decimalInvalidOperationr   rh   _sort_tuplesr    r   rM   r{   ry   r   rv   Zmap_locationslookupr1   r   rz   putZarangeZputmask)r>   rP   r   r   r   r   orderedr   tZorder2r   Z	new_codesZreverse_indexerrV   rV   rW   r     s`    0





r   )r?   c           
      C  s   t jdd | D td}t jdd | D td}| | @ }t | | }t | | }| d |}| d |}| d }t |||g}	| |	S )z3order ints before strings before nulls in 1d arraysc                 S  s   g | ]}t |tqS rV   )rK   r[   .0xrV   rV   rW   
<listcomp>0  r   z_sort_mixed.<locals>.<listcomp>rI   c                 S  s   g | ]}t |qS rV   )r/   r   rV   rV   rW   r  1  r   r   )rL   r2   r   r   Znonzeror   Zconcatenate)
r>   Zstr_posZnull_posnum_posZstr_argsortZnum_argsortZstr_locsZnum_locsZ	null_locsZlocsrV   rV   rW   r   .  s    r   c                 C  s:   ddl m} ddlm} || d\}}||dd}| | S )a  
    Convert array of tuples (1d) to array of arrays (2d).
    We need to keep the columns separately as they contain different types and
    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
    column as types cannot be compared).
    r   )	to_arrays)lexsort_indexerNT)Zorders)Z"pandas.core.internals.constructionr  Zpandas.core.sortingr  )r>   r  r  Zarraysr   indexerrV   rV   rW   r   =  s
    r   zArrayLike | Index)lvalsrvalsr?   c           	      C  s  ddl m} t 8 tjddtd t| dd}t|dd}W d   n1 sR0    Y  |j|dd	\}}t	|j
|j
}|||jd
dd}t| trt|tr| | }n<t| tr| j} t|tr|j}t| |g}t|}t|}||j
}t||S )a  
    Extracts the union from lvals and rvals with respect to duplicates and nans in
    both arrays.

    Parameters
    ----------
    lvals: np.ndarray or ExtensionArray
        left values which is ordered in front.
    rvals: np.ndarray or ExtensionArray
        right values ordered after lvals.

    Returns
    -------
    np.ndarray or ExtensionArray
        Containing the unsorted union of both arrays.

    Notes
    -----
    Caller is responsible for ensuring lvals.dtype == rvals.dtype.
    r   r;   ignorez<The behavior of value_counts with object-dtype is deprecated)categoryFr   Nr   rz   )r   rJ   rD   )r   r;   rd   catch_warningsfilterwarningsrf   r   alignrL   maximumr>   r   rK   r,   appendry   r+   r   r$   r3   Zreindexrepeat)	r  r	  r;   Zl_countZr_countZfinal_countZunique_valscombinedZrepeatsrV   rV   rW   union_with_duplicatesL  s.    
*

r  zLiteral['ignore'] | Nonez#np.ndarray | ExtensionArray | Index)r   	na_actionconvertr?   c           	        s  |dvrd| d}t |t|rzt|trJt|drJ|  fdd}n0ddlm} t|dkrr||tj	d	}n||}t|t
r|d
kr||j  }|j| }t|j|}|S t| s|  S | jtdd}|du rtj|||dS tj||t|tj|dS dS )a  
    Map values using an input mapping or function.

    Parameters
    ----------
    mapper : function, dict, or Series
        Mapping correspondence.
    na_action : {None, 'ignore'}, default None
        If 'ignore', propagate NA values, without passing them to the
        mapping correspondence.
    convert : bool, default True
        Try to find better dtype for elementwise function results. If
        False, leave as dtype=object.

    Returns
    -------
    Union[ndarray, Index, ExtensionArray]
        The output of the mapping function applied to the array.
        If the function returns a tuple with more than one element
        a MultiIndex will be returned.
    )Nr  z+na_action must either be 'ignore' or None, z was passed__missing__c                   s"    t | trt| rtjn|  S r   )rK   floatrL   r   r   )r  Zdict_with_defaultrV   rW   r     s   zmap_array.<locals>.<lambda>r   r
  rI   r  FrC   N)r  )r   r  )r   r   rK   dictr   r   r;   r{   rL   rl   r-   r   r   Zget_indexerr1   r   rD   rS   rU   r
   Z	map_inferZmap_infer_maskr/   rR   rB   )	r   Zmapperr  r  msgr;   r  
new_valuesr>   rV   r  rW   	map_array  s2    
r  )N)TNNN)FTN)TFFNT)TFFNT)N)r   N)TN)r   r   r   TF)r   FN)r   N)r   )NTFT)NT)__doc__
__future__r   r   r   textwrapr   typingr   r   r   rd   numpyrL   Zpandas._libsr   r   r   r	   r
   Zpandas._typingr   r   r   r   r   r   Zpandas.util._decoratorsr   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   r   Zpandas.core.dtypes.commonr   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   Zpandas.core.dtypes.concatr$   Zpandas.core.dtypes.dtypesr%   r&   r'   r(   Zpandas.core.dtypes.genericr)   r*   r+   r,   r-   r.   Zpandas.core.dtypes.missingr/   r0   Zpandas.core.array_algos.taker1   Zpandas.core.constructionr2   r   r3   r4   Zpandas.core.indexersr5   r6   r7   r8   r   r9   r:   r;   Zpandas.core.arraysr<   r=   rN   rZ   rk   ZComplex128HashTableZComplex64HashTableZFloat64HashTableZFloat32HashTableZUInt64HashTableZUInt32HashTableZUInt16HashTableZUInt8HashTableZInt64HashTableZInt32HashTableZInt16HashTableZInt8HashTableZStringHashTableZPyObjectHashTablert   rv   rs   ry   r   rx   Zunique1dr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  rV   rV   rV   rW   <module>   s    D 	N!a]    >    :     1     f !   .     C   x  Xs     :  