Skip to content

dummy_datasets

generate_corner_dataset(num_samples, corner_position='top_right', random_seed=None)

Generate a toy dataset with a corner of the feature space.

Parameters

num_samples : int Number of samples to generate. corner_position : str, optional Position of the corner in the feature space, by default "top_right"

Returns

(np.ndarray, np.ndarray) Tuple of feature matrix and target vector.

Raises

ValueError If corner_position is not one of "top_left", "top_right", "bottom_left", "bottom_right".

Source code in src/antakia/utils/dummy_datasets.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def generate_corner_dataset(
        num_samples: int, corner_position: str = "top_right", random_seed: int | None = None
) -> (np.ndarray, np.ndarray):
    """Generate a toy dataset with a corner of the feature space.

    Parameters
    ----------
    num_samples : int
        Number of samples to generate.
    corner_position : str, optional
        Position of the corner in the feature space, by default "top_right"

    Returns
    -------
    (np.ndarray, np.ndarray)
        Tuple of feature matrix and target vector.

    Raises
    ------
    ValueError
        If corner_position is not one of "top_left", "top_right", "bottom_left", "bottom_right".
    """
    np.random.seed(random_seed)
    X = np.random.uniform(0, 1, (num_samples, 2))

    if corner_position == "top_right":
        mask = (X[:, 0] > 0.5) & (X[:, 1] > 0.5)
    elif corner_position == "top_left":
        mask = (X[:, 0] < 0.5) & (X[:, 1] > 0.5)
    elif corner_position == "bottom_right":
        mask = (X[:, 0] > 0.5) & (X[:, 1] < 0.5)
    elif corner_position == "bottom_left":
        mask = (X[:, 0] < 0.5) & (X[:, 1] < 0.5)

    else:
        raise ValueError(
            "Invalid corner position must be one of: top_right, top_left, bottom_right, bottom_left."
        )

    y = mask.astype(int)
    return X, y