a
    OfD                     @  st  d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlZd d	lmZmZ d d
lmZ d dlmZmZ d dlmZmZmZmZ d dlm Z m!Z!m"Z" erd dlm#Z# ej$ej%ej&ej'ej(ej)ej)dZ*ej&ej+dfej)ej,e
fej$ej-dfej%ej-dfej'ej-dfej.ej,dfej(ej/d fiZ0ej-dej+dej,diZ1G dd deZ2dS )    )annotations)TYPE_CHECKINGAnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)BaseMaskedDtype)
ArrowDtypeDatetimeTZDtype)is_string_dtype)PandasBufferPandasBufferPyarrow)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)Buffer)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                   @  s   e Zd ZdZd0ddddddZd	d
ddZed	d
ddZedd
ddZ	dd
ddZ
edd Zedd Zed	d
ddZedd
ddZd	d
ddZd1d d!d"d#Zd$d
d%d&Zd'd
d(d)Zd*d
d+d,Zd-d
d.d/ZdS )2PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tz	pd.SeriesboolNone)column
allow_copyreturnc                 C  sN   t |tjrtd|j dt |tjs>tdt| d|| _|| _	dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepdZ	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfr"   r#    r0   W/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/core/interchange/column.py__init__T   s    zPandasColumn.__init__int)r$   c                 C  s   | j jS )z2
        Size of the column, in elements.
        )r-   sizer/   r0   r0   r1   r4   h   s    zPandasColumn.sizec                 C  s   dS )z7
        Offset of first element. Always zero.
        r   r0   r5   r0   r0   r1   offsetn   s    zPandasColumn.offsetztuple[DtypeKind, int, str, str]c                 C  s   | j j}t|tjrB| j jj}| |j\}}}}tj	||t
jfS t|rvt| j dv rltjdt|t
jfS tdn
| |S d S )N)stringempty   z.Non-string object dtypes are not supported yet)r-   dtyper&   r'   ZCategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r+   )r/   r:   r<   _ZbitwidthZc_arrow_dtype_f_strr0   r0   r1   r:   v   s.    


zPandasColumn.dtypec                 C  s   t |jd}|du r&td| dt|tr:|jj}n.t|trN|j	j}nt|t
rb|jj}n|j}|dkr||jtj|fS ||jd t||fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolzbool[pyarrow]r9   )	_NP_KINDSgetkind
ValueErrorr&   r
   Znumpy_dtype	byteorderr   baser	   itemsizer   BOOLr   )r/   r:   rE   rG   r0   r0   r1   r=      s"    





z$PandasColumn._dtype_from_pandasdtypec                 C  s:   | j d tjkstd| jjjdtt	| jjj
dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)Z
is_orderedZis_dictionary
categories)r:   r   r>   r(   r-   catorderedr   r'   r*   rK   r5   r0   r0   r1   describe_categorical   s    z!PandasColumn.describe_categoricalc                 C  s   t | jjtr tj}d}||fS t | jjtr^| jjjj	d 
 d d u rTtjd fS tjdfS | jd }zt| \}}W n" ty   td| dY n0 ||fS )N   r   rB   z not yet supported)r&   r-   r:   r	   r   USE_BYTEMASKr
   array	_pa_arraychunksbuffersNON_NULLABLEZUSE_BITMASK_NULL_DESCRIPTIONKeyErrorr+   )r/   Zcolumn_null_dtypeZ
null_valuerE   nullvaluer0   r0   r1   describe_null   s    


zPandasColumn.describe_nullc                 C  s   | j    S )zB
        Number of null elements. Should always be known.
        )r-   Zisnasumitemr5   r0   r0   r1   
null_count   s    zPandasColumn.null_countzdict[str, pd.Index]c                 C  s   d| j jiS )z8
        Store specific metadata of the column.
        zpandas.index)r-   indexr5   r0   r0   r1   metadata   s    zPandasColumn.metadatac                 C  s   dS )zE
        Return the number of chunks the column consists of.
        rO   r0   r5   r0   r0   r1   
num_chunks   s    zPandasColumn.num_chunksNz
int | None)n_chunksc                 c  sr   |rh|dkrht | j}|| }|| dkr2|d7 }td|| |D ]"}t| jj|||  | jV  qBn| V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rO   r   N)lenr-   ranger   Zilocr.   )r/   ra   r4   stepstartr0   r0   r1   
get_chunks   s    

zPandasColumn.get_chunksr   c                 C  s\   |   ddd}z|  |d< W n ty2   Y n0 z|  |d< W n tyV   Y n0 |S )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrh   ri   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r/   rT   r0   r0   r1   get_buffers  s    zPandasColumn.get_buffersz.tuple[Buffer, tuple[DtypeKind, int, str, str]]c           	      C  s  | j d tjtjtjtjtjfv r| j }| j d tjkr^t| j d dkr^| jj	
d }n^| jj}t| jj tr||j}n@t| jj tr|jjd }t| d t|d}||fS |j}t|| jd}n| j d tjkr| jjj}t|| jd}| |j }nz| j d tjkrj| j }t }|D ]$}t|tr*||j dd	 q*tt!j"|d
d}| j }nt#d| jj  d||fS )zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         NrO   length)r#   utf-8encodingZuint8)r:   rB   r%   )$r:   r   INTUINTFLOATrJ   DATETIMErb   r-   dtZ
tz_convertto_numpyrQ   r&   r	   _datar
   rR   rS   r   rT   Z_ndarrayr   r.   r>   r;   _codesr=   r@   	bytearraystrextendencodenpZ
frombufferr+   )	r/   r:   Znp_arrarrbufferr<   bufr   objr0   r0   r1   rj   0  sH    	"


zPandasColumn._get_data_bufferztuple[Buffer, Any] | Nonec                 C  sp  | j \}}t| jjtrn| jjjjd }tj	dt
j	tjf}| d du rNdS t| d t|d}||fS t| jjtr| jjj}t|}tj	dt
j	tjf}||fS | jd tjkr4| j }|dk}| }tjt|ftjd}t|D ] \}	}
t|
tr|n|||	< qt|}tj	dt
j	tjf}||fS zt|  d}W n tyb   tdY n0 t|dS )	z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   rO   Nrp   r9   shaper:   z! so does not have a separate maskzSee self.describe_null)rZ   r&   r-   r:   r
   rQ   rR   rS   r   rJ   r   r   r?   rT   r   rb   r	   Z_maskr   r@   rz   r   zerosZbool_	enumerater~   _NO_VALIDITY_BUFFERrW   r+   r   )r/   rX   invalidr   r:   r   maskr   Zvalidr   r   msgr0   r0   r1   rk   n  s>    



z!PandasColumn._get_validity_bufferztuple[PandasBuffer, Any]c           	      C  s   | j d tjkr| j }d}tjt|d ftjd}t	|D ]6\}}t
|trj|jdd}|t|7 }|||d < q@t|}tjdtjtjf}ntd||fS )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rO   r   rr   rs   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)r:   r   r@   r-   rz   r   r   rb   Zint64r   r&   r~   r   r   ru   r   ZINT64r   r?   r   )	r/   r;   Zptrri   r   vr   r   r:   r0   r0   r1   rl     s&    

z PandasColumn._get_offsets_buffer)T)N)__name__
__module____qualname____doc__r2   r4   propertyr6   r   r:   r=   rN   rZ   r]   r_   r`   rf   rm   rj   rk   rl   r0   r0   r0   r1   r   H   s,   !

%>9r   )3
__future__r   typingr   r   numpyr   Zpandas._libs.libr   Zpandas._libs.tslibsr   Zpandas.errorsr   Zpandas.util._decoratorsr   Zpandas.core.dtypes.dtypesr	   Zpandasr'   r
   r   Zpandas.api.typesr   Zpandas.core.interchange.bufferr   r   Z*pandas.core.interchange.dataframe_protocolr   r   r   r   Zpandas.core.interchange.utilsr   r   r   r   ru   rv   rw   rJ   r@   rx   rC   ZUSE_NANZUSE_SENTINELrU   r>   rP   rV   r   r   r0   r0   r0   r1   <module>   sH   