@File : _knn.py @Time : 2025/03/28 11:43:45 @Author : Alejandro Marrero @Version : 1.0 @Contact : amarrerd@ull.edu.es @License : (C)Copyright 2025, Alejandro Marrero @Desc : None

sparseness(instances, archive, k=15)

Computes the sparseness of the instances in the population.

Parameters:
  • instances (Sequence[Instance]) –

    Sequence of instances to compute the sparseness.

  • archive (Sequence[Instance]) –

    Archive of instances to compute the sparseness.

  • k (int, default: 15 ) –

    Number of neighbours to use in KNN. Defaults to 15.

Returns:
  • Sequence[Instance]

    Sequence[Instance]: Sequence of instances with the sparseness value assigned to each instance.

Source code in digneapy/_core/_knn.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def sparseness(
    instances: Sequence[Instance], archive: Sequence[Instance], k: int = 15
) -> Sequence[Instance]:
    """Computes the sparseness of the instances in the population.

    Args:
        instances (Sequence[Instance]): Sequence of instances to compute the sparseness.
        archive (Sequence[Instance]): Archive of instances to compute the sparseness.
        k (int, optional): Number of neighbours to use in KNN. Defaults to 15.

    Returns:
        Sequence[Instance]: Sequence of instances with the sparseness value assigned to each instance.
    """
    num_instances = len(instances)
    _instance_desc = np.array([instance.descriptor for instance in instances])
    _archive_desc = np.array([instance.descriptor for instance in archive])
    combined = (
        _instance_desc
        if len(archive) == 0
        else np.vstack(
            [
                _instance_desc,
                _archive_desc,
            ]
        )
    )

    result = np.zeros(num_instances)
    for i in range(num_instances):
        mask = np.ones(num_instances, bool)
        mask[i] = False
        differences = combined[i] - combined[np.nonzero(mask)]
        distances = np.linalg.norm(differences, axis=1)
        _neighbors = np.partition(distances, k + 1)[1 : k + 1]
        result[i] = np.sum(_neighbors) / k
        instances[i].s = result[i]
    return result

sparseness_only_values(instances, archive, k=15)

Computes the sparseness of the instances in the population.

Parameters:
  • instances (Sequence[Instance]) –

    Sequence of instances to compute the sparseness.

  • archive (Sequence[Instance]) –

    Archive of instances to compute the sparseness.

  • k (int, default: 15 ) –

    Number of neighbours to use in KNN. Defaults to 15.

Returns:
  • ndarray

    np.ndarray: Numpy array with the sparseness value assigned to each instance.

Source code in digneapy/_core/_knn.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def sparseness_only_values(
    instances: Sequence[Instance], archive: Sequence[Instance], k: int = 15
) -> np.ndarray:
    """Computes the sparseness of the instances in the population.

    Args:
        instances (Sequence[Instance]): Sequence of instances to compute the sparseness.
        archive (Sequence[Instance]): Archive of instances to compute the sparseness.
        k (int, optional): Number of neighbours to use in KNN. Defaults to 15.

    Returns:
        np.ndarray: Numpy array with the sparseness value assigned to each instance.
    """
    _instance_desc = np.array([instance.descriptor for instance in instances])
    _archive_desc = np.array([instance.descriptor for instance in archive])
    combined = (
        _instance_desc
        if len(archive) == 0
        else np.vstack(
            [
                _instance_desc,
                _archive_desc,
            ]
        )
    )

    dist = (
        (_instance_desc**2).sum(-1)[:, None]
        + (combined**2).sum(-1)[None, :]
        - 2 * _instance_desc @ combined.T
    )
    dist = np.nan_to_num(dist, nan=np.inf)
    # clipping necessary - numerical approx make some distancies negative
    dist = np.sqrt(np.clip(dist, min=0.0))
    _neighbors = np.partition(dist, k + 1, axis=1)[:, 1 : k + 1]
    s_ = np.sum(_neighbors, axis=1) / k
    return s_