@File : _cvt_archive.py @Time : 2024/09/18 14:44:44 @Author : Alejandro Marrero @Version : 1.0 @Contact : amarrerd@ull.edu.es @License : (C)Copyright 2024, Alejandro Marrero @Desc : None

CVTArchive

Bases: Archive, RNG

An Archive that divides a high-dimensional measure space into k homogeneous geometric regions. Based on the paper from Vassiliades et al (2018) https://ieeexplore.ieee.org/document/8000667

The computational complexity of the method we provide for constructing the CVT (in Algorithm 1) is O(ndki), where n is the number of d-dimensional samples to be clustered, k is the number of clusters, and i is the number of iterations needed until convergence

Source code in digneapy/archives/_cvt_archive.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
class CVTArchive(Archive, RNG):
    """An Archive that divides a high-dimensional measure space into k homogeneous geometric regions.
    Based on the paper from Vassiliades et al (2018) <https://ieeexplore.ieee.org/document/8000667>
    > The computational complexity of the method we provide for constructing the CVT (in Algorithm 1) is O(ndki),
    > where n is the number of d-dimensional samples to be clustered, k is the number of clusters,
    > and i is the number of iterations needed until convergence
    """

    def __init__(
        self,
        k: int,
        ranges: Sequence[Tuple[float, float]],
        n_samples: int,
        centroids: Optional[npt.NDArray | str] = None,
        samples: Optional[npt.NDArray | str] = None,
        dtype=np.float64,
        seed: int = 42,
    ):
        """Creates a CVTArchive object

        Args:
            k (int): Number of centroids (regions) to create
            ranges (Sequence[Tuple[float, float]]): Ranges of the measure space. Upper and lower bound of each
            dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
            indicates the first dimension should have bounds :math:`[-1,1]`
            (inclusive), and the second dimension should have bounds
            :math:`[-2,2]` (inclusive). The legnth of ``ranges`` indicates the number of dimensions of the measure space.
            n_samples (int): Number of samples to generate before calculating the centroids.
            centroids (Optional[npt.NDArray  |  str], optional): Precalculated centroids for the archive.
            The options are a np.ndarray with the values of ``k`` centroids or a .txt with the centroids to be loaded by Numpy. Defaults to None.
            samples (Optional[npt.NDArray  |  str], optional): Precalculated samples for the archive.
            The options are a np.ndarray with the values of ``n_samples`` samples or a .txt with the samples to be loaded by Numpy. Defaults to None.

        Raises:
            ValueError: If len(ranges) <= 0.
            ValueError: If the number of samples is less than zero or less than the number of regions (k).
            ValueError: If the number of regions is less than zero.
            ValueError: If the samples file cannot be loaded.
            ValueError: If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).
            ValueError: If the centroids file cannot be loaded.
            ValueError: If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).
        """
        Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
        if k <= 0:
            raise ValueError(f"The number of regions (k = {k}) must be >= 1")

        if len(ranges) <= 0:
            raise ValueError(
                f"ranges must have length >= 1 and it has length {len(ranges)}"
            )

        if n_samples <= 0 or n_samples < k:
            raise ValueError(
                f"The number of samples (n_samples = {n_samples}) must be >= 1 and >= regions (k = {k})"
            )

        self._dimensions = len(ranges)
        ranges = list(zip(*ranges))
        self._lower_bounds = np.array(ranges[0], dtype=self._dtype)
        self._upper_bounds = np.array(ranges[1], dtype=self._dtype)
        self._interval = self._upper_bounds - self._lower_bounds
        self._k = k
        self._n_samples = n_samples
        self._samples = None
        self._centroids = None
        self.initialize_rng(seed=seed)
        self._kmeans = KMeans(n_clusters=self._k, n_init=1, random_state=self._seed)

        # Data Structure to store the instances in the CVT
        self._grid: Dict[int, Instance] = {}
        # Loading samples if given
        if samples is not None:
            if isinstance(samples, str):
                try:
                    self._samples = np.load(samples)
                    self._n_samples = len(self._samples)
                except Exception as _:
                    raise ValueError(
                        f"Error in CVTArchive.__init__() loading the samples file {samples}."
                    )
            elif isinstance(samples, np.ndarray) and len(samples) != n_samples:
                raise ValueError(
                    f"The number of samples {len(samples)} must be equal to the number of expected samples (n_samples = {n_samples})"
                )
            else:
                self._samples = np.asarray(samples)

        if centroids is not None:
            if isinstance(centroids, str):
                try:
                    self._centroids = np.load(centroids)
                    self._k = len(self._centroids)
                except Exception as _:
                    raise ValueError(
                        f"Error in CVTArchive.__init__() loading the centroids file {centroids}."
                    )
            elif isinstance(centroids, np.ndarray) and len(centroids) != k:
                raise ValueError(
                    f"The number of centroids {len(centroids)} must be equal to the number of regions (k = {self._k})"
                )
            else:
                self._centroids = np.asarray(centroids)
        else:
            # Generate centroids
            if self._samples is None:
                # Generate uniform samples if not given
                rng = np.random.default_rng(seed=self._seed)
                self._samples = rng.uniform(
                    low=self._lower_bounds,
                    high=self._upper_bounds,
                    size=(self._n_samples, self._dimensions),
                )
            self._kmeans.fit(self._samples)
            self._centroids = self._kmeans.cluster_centers_

        self._kdtree = KDTree(self._centroids, metric="euclidean")

    @property
    def dimensions(self) -> int:
        """Dimensions of the measure space used

        Returns:
            int: Dimensions of the measure space used
        """
        return self._dimensions

    @property
    def samples(self) -> np.ndarray:
        """Returns the samples used to generate the centroids

        Returns:
            np.ndarray: Samples
        """
        return self._samples

    @property
    def centroids(self) -> np.ndarray:
        """Returns k centroids calculated from the samples

        Returns:
            np.ndarray: K d-dimensional centroids
        """
        return self._centroids

    @property
    def regions(self) -> int:
        """Number of regions (k) of centroids in the CVTArchive

        Returns:
            int: k
        """
        return self._k

    @property
    def bounds(self) -> Tuple[np.ndarray, np.ndarray]:
        """Tuple with the lower and upper bounds of the measure space
        The first value is the lower bounds and the second value is the upper bounds.
        Each value is a list with the corresponding lower/upper bound of the ith dimension
        in the measure space
        """
        return (self._lower_bounds, self._upper_bounds)

    @property
    def instances(self) -> list[Instance]:
        return list(self._grid.values())

    def __str__(self):
        return f"CVArchive(dim={self._dimensions},regions={self._k},centroids={self._centroids})"

    def __repr__(self):
        return f"CVArchive(dim={self._dimensions},regions={self._k},centroids={self._centroids})"

    def __iter__(self):
        """Iterates over the dictionary of instances

        Returns:
            Iterator: Yields position in the hypercube and instance located in such position
        """
        return iter(self._grid.values())

    def lower_i(self, i) -> np.float64:
        if i < 0 or i > len(self._lower_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._lower_bounds)}]"
            raise ValueError(msg)
        return self._lower_bounds[i]

    def upper_i(self, i) -> np.float64:
        if i < 0 or i > len(self._upper_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._upper_bounds)}]"
            raise ValueError(msg)
        return self._upper_bounds[i]

    def append(self, instance: Instance):
        """Inserts an Instance into the Grid

        Args:
            instance (Instance): Instace to be inserted

        Raises:
            TypeError: ``instance`` is not a instance of the class Instance.
        """
        if isinstance(instance, Instance):
            index = self.index_of(np.asarray(instance.descriptor).reshape(1, -1))[0]
            if index not in self._grid or instance > self._grid[index]:
                self._grid[index] = instance.clone()

        else:
            msg = "Only objects of type Instance can be inserted into a CVTArchive"
            raise TypeError(msg)

    def extend(self, iterable: Iterable[Instance]):
        """Includes all the instances in iterable into the Grid

        Args:
            iterable (Iterable[Instance]): Iterable of instances
        """
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be inserted into a CVTArchive"
            raise TypeError(msg)

        indeces = self.index_of([i.descriptor for i in iterable])
        for idx, instance in zip(indeces, iterable, strict=True):
            if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
                self._grid[idx] = instance.clone()

    def remove(self, iterable: Iterable[Instance]):
        """Removes all the instances in iterable from the grid"""
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be removed from a CVTArchive"
            raise TypeError(msg)

        indeces_to_remove = self.index_of([i.descriptor for i in iterable])
        for index in indeces_to_remove:
            if index in self._grid:
                del self._grid[index]

    def remove_unfeasible(self, attr: str = "p"):
        """Removes all the unfeasible instances from the grid"""
        keys_to_remove = [
            i for i in self._grid.keys() if getattr(self._grid[i], attr) < 0
        ]
        for i in keys_to_remove:
            del self._grid[i]

    def index_of(self, descriptors) -> np.ndarray:
        """Computes the indeces of a batch of descriptors.

        Args:
            descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

        Raises:
            ValueError: ``descriptors`` is not shape (batch_size, dimensions)

        Returns:
            np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
        """
        descriptors = np.array(descriptors)

        if len(descriptors) == 0:
            return np.empty(0)
        if (
            descriptors.ndim == 1
            and descriptors.shape[0] != self._dimensions
            or descriptors.ndim == 2
            and descriptors.shape[1] != self._dimensions
        ):
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, dimensions) (i.e. shape "
                f"(batch_size, {self._dimensions})) but it had shape "
                f"{descriptors.shape}"
            )

        indices = self._kdtree.query(descriptors, return_distance=False)
        indices = indices[:, 0]
        return indices.astype(np.int32)

    def to_file(self, file_pattern: str = "CVTArchive"):
        """Saves the centroids and the samples of the CVTArchive to .npy files
            Each attribute is saved in its own filename.
            Therefore, file_pattern is expected not to contain any extension

        Args:
            file_pattern (str, optional): Pattern of the expected filenames. Defaults to "CVTArchive".
        """
        np.save(f"{file_pattern}_centroids.npy", self._centroids)
        np.save(f"{file_pattern}_samples.npy", self._samples)

    @classmethod
    def load_from_json(cls, filename: str):
        """Creates a CVTArchive object from the content of a previously created JSON file

        Args:
            filename (str): Filename of the JSON file with the CVTArchive information

        Raises:
            ValueError: If there's any error while loading the file. (IOError)
            ValueError: If the JSON file does not contain all the expected keys

        Returns:
            Self: Returns a CVTArchive object
        """
        expected_keys = {
            "dimensions",
            "n_samples",
            "regions",
            "lbs",
            "ubs",
            "centroids",
            "samples",
        }
        try:
            with open(filename, "r") as file:
                json_data = json.load(file)
                if expected_keys != json_data.keys():
                    raise ValueError(
                        f"The JSON file does not contain all the minimum expected keys. Expected keys are {expected_keys} and got {json_data.keys()}"
                    )
                _ranges = [
                    (l_i, u_i) for l_i, u_i in zip(json_data["lbs"], json_data["ubs"])
                ]
                new_archive = cls(
                    k=json_data["regions"],
                    ranges=_ranges,
                    n_samples=json_data["n_samples"],
                    centroids=json_data["centroids"],
                    samples=json_data["samples"],
                )
                return new_archive

        except IOError as io:
            raise ValueError(f"Error opening file {filename}. Reason -> {io.strerror}")

    def asdict(self) -> dict:
        return {
            "dimensions": self._dimensions,
            "n_samples": self._n_samples,
            "regions": self._k,
            "lbs": self._lower_bounds.tolist(),
            "ubs": self._upper_bounds.tolist(),
            "centroids": self._centroids.tolist(),
            "samples": self._samples.tolist(),
            "instances": {
                i: instance.asdict() for i, instance in enumerate(self._grid.values())
            },
        }

    def to_json(self, filename: Optional[str] = None) -> str:
        """Returns the content of the CVTArchive in JSON format.

        Returns:
            str: String in JSON format with the content of the CVTArchive
        """
        json_data = json.dumps(self.asdict(), indent=4)
        if filename is not None:
            filename = (
                f"{filename}.json" if not filename.endswith(".json") else filename
            )
            with open(filename, "w") as f:
                f.write(json_data)

        return json_data

bounds property

Tuple with the lower and upper bounds of the measure space The first value is the lower bounds and the second value is the upper bounds. Each value is a list with the corresponding lower/upper bound of the ith dimension in the measure space

centroids property

Returns k centroids calculated from the samples

Returns:
  • ndarray

    np.ndarray: K d-dimensional centroids

dimensions property

Dimensions of the measure space used

Returns:
  • int( int ) –

    Dimensions of the measure space used

regions property

Number of regions (k) of centroids in the CVTArchive

Returns:
  • int( int ) –

    k

samples property

Returns the samples used to generate the centroids

Returns:
  • ndarray

    np.ndarray: Samples

__init__(k, ranges, n_samples, centroids=None, samples=None, dtype=np.float64, seed=42)

Creates a CVTArchive object

Parameters:
  • k (int) –

    Number of centroids (regions) to create

  • ranges (Sequence[Tuple[float, float]]) –

    Ranges of the measure space. Upper and lower bound of each

  • indicates (the first dimension should have bounds ) –

    math:[-1,1]

  • math:[-2,2] (inclusive). The legnth of ranges indicates the number of dimensions of the measure space.

  • n_samples (int) –

    Number of samples to generate before calculating the centroids.

  • centroids (Optional[NDArray | str], default: None ) –

    Precalculated centroids for the archive.

  • samples (Optional[NDArray | str], default: None ) –

    Precalculated samples for the archive.

Raises:
  • ValueError

    If len(ranges) <= 0.

  • ValueError

    If the number of samples is less than zero or less than the number of regions (k).

  • ValueError

    If the number of regions is less than zero.

  • ValueError

    If the samples file cannot be loaded.

  • ValueError

    If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).

  • ValueError

    If the centroids file cannot be loaded.

  • ValueError

    If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).

Source code in digneapy/archives/_cvt_archive.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def __init__(
    self,
    k: int,
    ranges: Sequence[Tuple[float, float]],
    n_samples: int,
    centroids: Optional[npt.NDArray | str] = None,
    samples: Optional[npt.NDArray | str] = None,
    dtype=np.float64,
    seed: int = 42,
):
    """Creates a CVTArchive object

    Args:
        k (int): Number of centroids (regions) to create
        ranges (Sequence[Tuple[float, float]]): Ranges of the measure space. Upper and lower bound of each
        dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
        indicates the first dimension should have bounds :math:`[-1,1]`
        (inclusive), and the second dimension should have bounds
        :math:`[-2,2]` (inclusive). The legnth of ``ranges`` indicates the number of dimensions of the measure space.
        n_samples (int): Number of samples to generate before calculating the centroids.
        centroids (Optional[npt.NDArray  |  str], optional): Precalculated centroids for the archive.
        The options are a np.ndarray with the values of ``k`` centroids or a .txt with the centroids to be loaded by Numpy. Defaults to None.
        samples (Optional[npt.NDArray  |  str], optional): Precalculated samples for the archive.
        The options are a np.ndarray with the values of ``n_samples`` samples or a .txt with the samples to be loaded by Numpy. Defaults to None.

    Raises:
        ValueError: If len(ranges) <= 0.
        ValueError: If the number of samples is less than zero or less than the number of regions (k).
        ValueError: If the number of regions is less than zero.
        ValueError: If the samples file cannot be loaded.
        ValueError: If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).
        ValueError: If the centroids file cannot be loaded.
        ValueError: If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).
    """
    Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
    if k <= 0:
        raise ValueError(f"The number of regions (k = {k}) must be >= 1")

    if len(ranges) <= 0:
        raise ValueError(
            f"ranges must have length >= 1 and it has length {len(ranges)}"
        )

    if n_samples <= 0 or n_samples < k:
        raise ValueError(
            f"The number of samples (n_samples = {n_samples}) must be >= 1 and >= regions (k = {k})"
        )

    self._dimensions = len(ranges)
    ranges = list(zip(*ranges))
    self._lower_bounds = np.array(ranges[0], dtype=self._dtype)
    self._upper_bounds = np.array(ranges[1], dtype=self._dtype)
    self._interval = self._upper_bounds - self._lower_bounds
    self._k = k
    self._n_samples = n_samples
    self._samples = None
    self._centroids = None
    self.initialize_rng(seed=seed)
    self._kmeans = KMeans(n_clusters=self._k, n_init=1, random_state=self._seed)

    # Data Structure to store the instances in the CVT
    self._grid: Dict[int, Instance] = {}
    # Loading samples if given
    if samples is not None:
        if isinstance(samples, str):
            try:
                self._samples = np.load(samples)
                self._n_samples = len(self._samples)
            except Exception as _:
                raise ValueError(
                    f"Error in CVTArchive.__init__() loading the samples file {samples}."
                )
        elif isinstance(samples, np.ndarray) and len(samples) != n_samples:
            raise ValueError(
                f"The number of samples {len(samples)} must be equal to the number of expected samples (n_samples = {n_samples})"
            )
        else:
            self._samples = np.asarray(samples)

    if centroids is not None:
        if isinstance(centroids, str):
            try:
                self._centroids = np.load(centroids)
                self._k = len(self._centroids)
            except Exception as _:
                raise ValueError(
                    f"Error in CVTArchive.__init__() loading the centroids file {centroids}."
                )
        elif isinstance(centroids, np.ndarray) and len(centroids) != k:
            raise ValueError(
                f"The number of centroids {len(centroids)} must be equal to the number of regions (k = {self._k})"
            )
        else:
            self._centroids = np.asarray(centroids)
    else:
        # Generate centroids
        if self._samples is None:
            # Generate uniform samples if not given
            rng = np.random.default_rng(seed=self._seed)
            self._samples = rng.uniform(
                low=self._lower_bounds,
                high=self._upper_bounds,
                size=(self._n_samples, self._dimensions),
            )
        self._kmeans.fit(self._samples)
        self._centroids = self._kmeans.cluster_centers_

    self._kdtree = KDTree(self._centroids, metric="euclidean")

__iter__()

Iterates over the dictionary of instances

Returns:
  • Iterator

    Yields position in the hypercube and instance located in such position

Source code in digneapy/archives/_cvt_archive.py
200
201
202
203
204
205
206
def __iter__(self):
    """Iterates over the dictionary of instances

    Returns:
        Iterator: Yields position in the hypercube and instance located in such position
    """
    return iter(self._grid.values())

append(instance)

Inserts an Instance into the Grid

Parameters:
  • instance (Instance) –

    Instace to be inserted

Raises:
  • TypeError

    instance is not a instance of the class Instance.

Source code in digneapy/archives/_cvt_archive.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def append(self, instance: Instance):
    """Inserts an Instance into the Grid

    Args:
        instance (Instance): Instace to be inserted

    Raises:
        TypeError: ``instance`` is not a instance of the class Instance.
    """
    if isinstance(instance, Instance):
        index = self.index_of(np.asarray(instance.descriptor).reshape(1, -1))[0]
        if index not in self._grid or instance > self._grid[index]:
            self._grid[index] = instance.clone()

    else:
        msg = "Only objects of type Instance can be inserted into a CVTArchive"
        raise TypeError(msg)

extend(iterable)

Includes all the instances in iterable into the Grid

Parameters:
  • iterable (Iterable[Instance]) –

    Iterable of instances

Source code in digneapy/archives/_cvt_archive.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def extend(self, iterable: Iterable[Instance]):
    """Includes all the instances in iterable into the Grid

    Args:
        iterable (Iterable[Instance]): Iterable of instances
    """
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be inserted into a CVTArchive"
        raise TypeError(msg)

    indeces = self.index_of([i.descriptor for i in iterable])
    for idx, instance in zip(indeces, iterable, strict=True):
        if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
            self._grid[idx] = instance.clone()

index_of(descriptors)

Computes the indeces of a batch of descriptors.

Parameters:
  • descriptors (array - like) –

    (batch_size, dimensions) array of descriptors for each instance

Raises:
  • ValueError

    descriptors is not shape (batch_size, dimensions)

Returns:
  • ndarray

    np.ndarray: (batch_size, ) array of integer indices representing the flattened grid coordinates.

Source code in digneapy/archives/_cvt_archive.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def index_of(self, descriptors) -> np.ndarray:
    """Computes the indeces of a batch of descriptors.

    Args:
        descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

    Raises:
        ValueError: ``descriptors`` is not shape (batch_size, dimensions)

    Returns:
        np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
    """
    descriptors = np.array(descriptors)

    if len(descriptors) == 0:
        return np.empty(0)
    if (
        descriptors.ndim == 1
        and descriptors.shape[0] != self._dimensions
        or descriptors.ndim == 2
        and descriptors.shape[1] != self._dimensions
    ):
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, dimensions) (i.e. shape "
            f"(batch_size, {self._dimensions})) but it had shape "
            f"{descriptors.shape}"
        )

    indices = self._kdtree.query(descriptors, return_distance=False)
    indices = indices[:, 0]
    return indices.astype(np.int32)

load_from_json(filename) classmethod

Creates a CVTArchive object from the content of a previously created JSON file

Parameters:
  • filename (str) –

    Filename of the JSON file with the CVTArchive information

Raises:
  • ValueError

    If there's any error while loading the file. (IOError)

  • ValueError

    If the JSON file does not contain all the expected keys

Returns:
  • Self

    Returns a CVTArchive object

Source code in digneapy/archives/_cvt_archive.py
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
@classmethod
def load_from_json(cls, filename: str):
    """Creates a CVTArchive object from the content of a previously created JSON file

    Args:
        filename (str): Filename of the JSON file with the CVTArchive information

    Raises:
        ValueError: If there's any error while loading the file. (IOError)
        ValueError: If the JSON file does not contain all the expected keys

    Returns:
        Self: Returns a CVTArchive object
    """
    expected_keys = {
        "dimensions",
        "n_samples",
        "regions",
        "lbs",
        "ubs",
        "centroids",
        "samples",
    }
    try:
        with open(filename, "r") as file:
            json_data = json.load(file)
            if expected_keys != json_data.keys():
                raise ValueError(
                    f"The JSON file does not contain all the minimum expected keys. Expected keys are {expected_keys} and got {json_data.keys()}"
                )
            _ranges = [
                (l_i, u_i) for l_i, u_i in zip(json_data["lbs"], json_data["ubs"])
            ]
            new_archive = cls(
                k=json_data["regions"],
                ranges=_ranges,
                n_samples=json_data["n_samples"],
                centroids=json_data["centroids"],
                samples=json_data["samples"],
            )
            return new_archive

    except IOError as io:
        raise ValueError(f"Error opening file {filename}. Reason -> {io.strerror}")

remove(iterable)

Removes all the instances in iterable from the grid

Source code in digneapy/archives/_cvt_archive.py
253
254
255
256
257
258
259
260
261
262
def remove(self, iterable: Iterable[Instance]):
    """Removes all the instances in iterable from the grid"""
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be removed from a CVTArchive"
        raise TypeError(msg)

    indeces_to_remove = self.index_of([i.descriptor for i in iterable])
    for index in indeces_to_remove:
        if index in self._grid:
            del self._grid[index]

remove_unfeasible(attr='p')

Removes all the unfeasible instances from the grid

Source code in digneapy/archives/_cvt_archive.py
264
265
266
267
268
269
270
def remove_unfeasible(self, attr: str = "p"):
    """Removes all the unfeasible instances from the grid"""
    keys_to_remove = [
        i for i in self._grid.keys() if getattr(self._grid[i], attr) < 0
    ]
    for i in keys_to_remove:
        del self._grid[i]

to_file(file_pattern='CVTArchive')

Saves the centroids and the samples of the CVTArchive to .npy files Each attribute is saved in its own filename. Therefore, file_pattern is expected not to contain any extension

Parameters:
  • file_pattern (str, default: 'CVTArchive' ) –

    Pattern of the expected filenames. Defaults to "CVTArchive".

Source code in digneapy/archives/_cvt_archive.py
305
306
307
308
309
310
311
312
313
314
def to_file(self, file_pattern: str = "CVTArchive"):
    """Saves the centroids and the samples of the CVTArchive to .npy files
        Each attribute is saved in its own filename.
        Therefore, file_pattern is expected not to contain any extension

    Args:
        file_pattern (str, optional): Pattern of the expected filenames. Defaults to "CVTArchive".
    """
    np.save(f"{file_pattern}_centroids.npy", self._centroids)
    np.save(f"{file_pattern}_samples.npy", self._samples)

to_json(filename=None)

Returns the content of the CVTArchive in JSON format.

Returns:
  • str( str ) –

    String in JSON format with the content of the CVTArchive

Source code in digneapy/archives/_cvt_archive.py
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
def to_json(self, filename: Optional[str] = None) -> str:
    """Returns the content of the CVTArchive in JSON format.

    Returns:
        str: String in JSON format with the content of the CVTArchive
    """
    json_data = json.dumps(self.asdict(), indent=4)
    if filename is not None:
        filename = (
            f"{filename}.json" if not filename.endswith(".json") else filename
        )
        with open(filename, "w") as f:
            f.write(json_data)

    return json_data