o
    +if                     @   s   d dl Zdd ZdddZdS )    Nc                 C   s   ||  |    }t|}t||   }|dkrX|| }tt|ddd }|D ]+}t||k\}	tt|	|}
|j	|	|
dd}	||	  d7  < ||
8 }|dkrW nq,|
tjS )a  Computes approximate mode of multivariate hypergeometric.
    This is an approximation to the mode of the multivariate
    hypergeometric given by class_counts and n_draws.
    It shouldn't be off by more than one.
    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.
    Args
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.
    Returns
    -------
    sampled_classes : ndarray of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws

    r   NF)sizereplace   )sumnpfloorintsortuniquewhereminlenchoiceastypeint64)class_countsn_drawsrng
continuousflooredneed_to_add	remaindervaluesvalueindsadd_now r   P/var/www/html/corbot_env/lib/python3.10/site-packages/datasets/utils/stratify.pyapproximate_mode   s    
r   
   c              	   c   sP   t j| dd\}}|jd }t |}t |dk rtd||k r+td||f ||k r7td||f t t j|dd	t |d
d }	t	|D ]X}
t
|||}|| }t
|||}g }g }t	|D ].}||| }|	| j|dd}||d
||   |||| || ||    qg||}||}||fV  qMd
S )a  

    Provides train/test indices to split data in train/test sets.
    It's reference is taken from StratifiedShuffleSplit implementation
    of scikit-learn library.

    Args
    ----------

    n_train : int,
        represents the absolute number of train samples.

    n_test : int,
        represents the absolute number of test samples.

    random_state : int or RandomState instance, default=None
        Controls the randomness of the training and testing indices produced.
        Pass an int for reproducible output across multiple function calls.

    n_splits : int, default=10
        Number of re-shuffling & splitting iterations.
    T)return_inverser      zMinimum class count errorzLThe train_size = %d should be greater or equal to the number of classes = %dzKThe test_size = %d should be greater or equal to the number of classes = %d	mergesort)kindNr   clip)mode)r   r   shapebincountr   
ValueErrorsplitargsortcumsumranger   permutationtakeextend)yn_trainn_testr   n_splitsclasses	y_indices	n_classesr   class_indices_n_iclass_counts_remainingt_itraintestir.   perm_indices_class_ir   r   r   )stratified_shuffle_split_generate_indices6   s<   



$$

rA   )r    )numpyr   r   rA   r   r   r   r   <module>   s    2