@File : init.py @Time : 2024/06/07 12:16:04 @Author : Alejandro Marrero @Version : 1.0 @Contact : amarrerd@ull.edu.es @License : (C)Copyright 2024, Alejandro Marrero @Desc : None

Archive

Class Archive Stores a collection of diverse Instances

Source code in digneapy/archives/_base_archive.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
class Archive:
    """Class Archive
    Stores a collection of diverse Instances
    """

    def __init__(
        self,
        threshold: float,
        instances: Optional[Iterable[Instance]] = None,
        dtype=np.float64,
    ):
        """Creates an instance of a Archive (unstructured) for QD algorithms

        Args:
            threshold (float): Minimum value of sparseness to include an Instance into the archive.
            instances (Iterable[Instance], optional): Instances to initialise the archive. Defaults to None.
        """
        if instances:
            self._instances = list(i for i in instances)
        else:
            self._instances = []

        self._threshold = threshold
        self._dtype = dtype

    @property
    def instances(self):
        return self._instances

    @property
    def threshold(self):
        return self._threshold

    @threshold.setter
    def threshold(self, t: float):
        try:
            t_f = float(t)
        except Exception:
            msg = f"The threshold value {t} is not a float in 'threshold' setter of class {self.__class__.__name__}"
            raise TypeError(msg)
        self._threshold = t_f

    def __iter__(self):
        return iter(self._instances)

    def __str__(self):
        return f"Archive(threshold={self._threshold},data=(|{len(self)}|))"

    def __repr__(self):
        return f"Archive(threshold={self._threshold},data=(|{len(self)}|))"

    def __array__(self, dtype=Instance, copy=True) -> np.ndarray:
        """Creates a ndarray with the descriptors

        >>> import numpy as np
        >>> descriptors = [list(range(d, d + 5)) for d in range(10)]
        >>> archive = Archive(descriptors)
        >>> np_archive = np.array(archive)
        >>> assert len(np_archive) == len(archive)
        >>> assert type(np_archive) == type(np.zeros(1))
        """
        return np.array(self._instances, dtype=Instance, copy=copy)

    def __eq__(self, other):
        """Compares whether to Archives are equal

        >>> import copy
        >>> variables = [list(range(d, d + 5)) for d in range(10)]
        >>> instances = [Instance(variables=v, s=1.0) for v in variables]
        >>> archive = Archive(threshold=0.0, instances=instances)
        >>> empty_archive = Archive(threshold=0.0)

        >>> a1 = copy.copy(archive)
        >>> assert a1 == archive
        >>> assert empty_archive != archive
        """
        return len(self) == len(other) and all(a == b for a, b in zip(self, other))

    def __hash__(self):
        from functools import reduce

        hashes = (hash(i) for i in self.instances)
        return reduce(lambda a, b: a ^ b, hashes, 0)

    def __bool__(self):
        """Returns True if len(self) > 1

        >>> descriptors = [list(range(d, d + 5)) for d in range(10)]
        >>> archive = Archive(threshold=0.0, instances=descriptors)
        >>> empty_archive = Archive(threshold=0.0)

        >>> assert archive
        >>> assert not empty_archive
        """
        return len(self) != 0

    def __len__(self):
        return len(self.instances)

    def __getitem__(self, key):
        if isinstance(key, slice):
            cls = type(self)  # To facilitate subclassing
            return cls(self._threshold, self.instances[key])
        index = operator.index(key)
        return self.instances[index]

    def append(self, i: Instance):
        if i.s > self.threshold:
            self.instances.append(i)

    def extend(self, iterable: Iterable[Instance]):
        """Extends the current archive with all the individuals inside iterable that have
        a sparseness value greater than the archive threshold.

        Args:
            iterable (Iterable[Instance]): Iterable of instances to be include in the archive.
        """
        self.instances.extend(i for i in iterable if i.s >= self._threshold)

    def __format__(self, fmt_spec=""):
        variables = self
        outer_fmt = "({})"
        components = (format(c, fmt_spec) for c in variables)
        return outer_fmt.format(", ".join(components))

    def asdict(self) -> dict:
        return {
            "threshold": self._threshold,
            "instances": {
                i: instance.asdict() for i, instance in enumerate(self.instances)
            },
        }

    def to_json(self) -> str:
        """Converts the archive into a JSON object

        Returns:
            str: JSON str of the archive content
        """

        return json.dumps(self.asdict(), indent=4)

__array__(dtype=Instance, copy=True)

Creates a ndarray with the descriptors

import numpy as np descriptors = [list(range(d, d + 5)) for d in range(10)] archive = Archive(descriptors) np_archive = np.array(archive) assert len(np_archive) == len(archive) assert type(np_archive) == type(np.zeros(1))

Source code in digneapy/archives/_base_archive.py
74
75
76
77
78
79
80
81
82
83
84
def __array__(self, dtype=Instance, copy=True) -> np.ndarray:
    """Creates a ndarray with the descriptors

    >>> import numpy as np
    >>> descriptors = [list(range(d, d + 5)) for d in range(10)]
    >>> archive = Archive(descriptors)
    >>> np_archive = np.array(archive)
    >>> assert len(np_archive) == len(archive)
    >>> assert type(np_archive) == type(np.zeros(1))
    """
    return np.array(self._instances, dtype=Instance, copy=copy)

__bool__()

Returns True if len(self) > 1

descriptors = [list(range(d, d + 5)) for d in range(10)] archive = Archive(threshold=0.0, instances=descriptors) empty_archive = Archive(threshold=0.0)

assert archive assert not empty_archive

Source code in digneapy/archives/_base_archive.py
107
108
109
110
111
112
113
114
115
116
117
def __bool__(self):
    """Returns True if len(self) > 1

    >>> descriptors = [list(range(d, d + 5)) for d in range(10)]
    >>> archive = Archive(threshold=0.0, instances=descriptors)
    >>> empty_archive = Archive(threshold=0.0)

    >>> assert archive
    >>> assert not empty_archive
    """
    return len(self) != 0

__eq__(other)

Compares whether to Archives are equal

import copy variables = [list(range(d, d + 5)) for d in range(10)] instances = [Instance(variables=v, s=1.0) for v in variables] archive = Archive(threshold=0.0, instances=instances) empty_archive = Archive(threshold=0.0)

a1 = copy.copy(archive) assert a1 == archive assert empty_archive != archive

Source code in digneapy/archives/_base_archive.py
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def __eq__(self, other):
    """Compares whether to Archives are equal

    >>> import copy
    >>> variables = [list(range(d, d + 5)) for d in range(10)]
    >>> instances = [Instance(variables=v, s=1.0) for v in variables]
    >>> archive = Archive(threshold=0.0, instances=instances)
    >>> empty_archive = Archive(threshold=0.0)

    >>> a1 = copy.copy(archive)
    >>> assert a1 == archive
    >>> assert empty_archive != archive
    """
    return len(self) == len(other) and all(a == b for a, b in zip(self, other))

__init__(threshold, instances=None, dtype=np.float64)

Creates an instance of a Archive (unstructured) for QD algorithms

Parameters:
  • threshold (float) –

    Minimum value of sparseness to include an Instance into the archive.

  • instances (Iterable[Instance], default: None ) –

    Instances to initialise the archive. Defaults to None.

Source code in digneapy/archives/_base_archive.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    threshold: float,
    instances: Optional[Iterable[Instance]] = None,
    dtype=np.float64,
):
    """Creates an instance of a Archive (unstructured) for QD algorithms

    Args:
        threshold (float): Minimum value of sparseness to include an Instance into the archive.
        instances (Iterable[Instance], optional): Instances to initialise the archive. Defaults to None.
    """
    if instances:
        self._instances = list(i for i in instances)
    else:
        self._instances = []

    self._threshold = threshold
    self._dtype = dtype

extend(iterable)

Extends the current archive with all the individuals inside iterable that have a sparseness value greater than the archive threshold.

Parameters:
  • iterable (Iterable[Instance]) –

    Iterable of instances to be include in the archive.

Source code in digneapy/archives/_base_archive.py
133
134
135
136
137
138
139
140
def extend(self, iterable: Iterable[Instance]):
    """Extends the current archive with all the individuals inside iterable that have
    a sparseness value greater than the archive threshold.

    Args:
        iterable (Iterable[Instance]): Iterable of instances to be include in the archive.
    """
    self.instances.extend(i for i in iterable if i.s >= self._threshold)

to_json()

Converts the archive into a JSON object

Returns:
  • str( str ) –

    JSON str of the archive content

Source code in digneapy/archives/_base_archive.py
156
157
158
159
160
161
162
163
def to_json(self) -> str:
    """Converts the archive into a JSON object

    Returns:
        str: JSON str of the archive content
    """

    return json.dumps(self.asdict(), indent=4)

CVTArchive

Bases: Archive, RNG

An Archive that divides a high-dimensional measure space into k homogeneous geometric regions. Based on the paper from Vassiliades et al (2018) https://ieeexplore.ieee.org/document/8000667

The computational complexity of the method we provide for constructing the CVT (in Algorithm 1) is O(ndki), where n is the number of d-dimensional samples to be clustered, k is the number of clusters, and i is the number of iterations needed until convergence

Source code in digneapy/archives/_cvt_archive.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
class CVTArchive(Archive, RNG):
    """An Archive that divides a high-dimensional measure space into k homogeneous geometric regions.
    Based on the paper from Vassiliades et al (2018) <https://ieeexplore.ieee.org/document/8000667>
    > The computational complexity of the method we provide for constructing the CVT (in Algorithm 1) is O(ndki),
    > where n is the number of d-dimensional samples to be clustered, k is the number of clusters,
    > and i is the number of iterations needed until convergence
    """

    def __init__(
        self,
        k: int,
        ranges: Sequence[Tuple[float, float]],
        n_samples: int,
        centroids: Optional[npt.NDArray | str] = None,
        samples: Optional[npt.NDArray | str] = None,
        dtype=np.float64,
        seed: int = 42,
    ):
        """Creates a CVTArchive object

        Args:
            k (int): Number of centroids (regions) to create
            ranges (Sequence[Tuple[float, float]]): Ranges of the measure space. Upper and lower bound of each
            dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
            indicates the first dimension should have bounds :math:`[-1,1]`
            (inclusive), and the second dimension should have bounds
            :math:`[-2,2]` (inclusive). The legnth of ``ranges`` indicates the number of dimensions of the measure space.
            n_samples (int): Number of samples to generate before calculating the centroids.
            centroids (Optional[npt.NDArray  |  str], optional): Precalculated centroids for the archive.
            The options are a np.ndarray with the values of ``k`` centroids or a .txt with the centroids to be loaded by Numpy. Defaults to None.
            samples (Optional[npt.NDArray  |  str], optional): Precalculated samples for the archive.
            The options are a np.ndarray with the values of ``n_samples`` samples or a .txt with the samples to be loaded by Numpy. Defaults to None.

        Raises:
            ValueError: If len(ranges) <= 0.
            ValueError: If the number of samples is less than zero or less than the number of regions (k).
            ValueError: If the number of regions is less than zero.
            ValueError: If the samples file cannot be loaded.
            ValueError: If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).
            ValueError: If the centroids file cannot be loaded.
            ValueError: If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).
        """
        Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
        if k <= 0:
            raise ValueError(f"The number of regions (k = {k}) must be >= 1")

        if len(ranges) <= 0:
            raise ValueError(
                f"ranges must have length >= 1 and it has length {len(ranges)}"
            )

        if n_samples <= 0 or n_samples < k:
            raise ValueError(
                f"The number of samples (n_samples = {n_samples}) must be >= 1 and >= regions (k = {k})"
            )

        self._dimensions = len(ranges)
        ranges = list(zip(*ranges))
        self._lower_bounds = np.array(ranges[0], dtype=self._dtype)
        self._upper_bounds = np.array(ranges[1], dtype=self._dtype)
        self._interval = self._upper_bounds - self._lower_bounds
        self._k = k
        self._n_samples = n_samples
        self._samples = None
        self._centroids = None
        self.initialize_rng(seed=seed)
        self._kmeans = KMeans(n_clusters=self._k, n_init=1, random_state=self._seed)

        # Data Structure to store the instances in the CVT
        self._grid: Dict[int, Instance] = {}
        # Loading samples if given
        if samples is not None:
            if isinstance(samples, str):
                try:
                    self._samples = np.load(samples)
                    self._n_samples = len(self._samples)
                except Exception as _:
                    raise ValueError(
                        f"Error in CVTArchive.__init__() loading the samples file {samples}."
                    )
            elif isinstance(samples, np.ndarray) and len(samples) != n_samples:
                raise ValueError(
                    f"The number of samples {len(samples)} must be equal to the number of expected samples (n_samples = {n_samples})"
                )
            else:
                self._samples = np.asarray(samples)

        if centroids is not None:
            if isinstance(centroids, str):
                try:
                    self._centroids = np.load(centroids)
                    self._k = len(self._centroids)
                except Exception as _:
                    raise ValueError(
                        f"Error in CVTArchive.__init__() loading the centroids file {centroids}."
                    )
            elif isinstance(centroids, np.ndarray) and len(centroids) != k:
                raise ValueError(
                    f"The number of centroids {len(centroids)} must be equal to the number of regions (k = {self._k})"
                )
            else:
                self._centroids = np.asarray(centroids)
        else:
            # Generate centroids
            if self._samples is None:
                # Generate uniform samples if not given
                rng = np.random.default_rng(seed=self._seed)
                self._samples = rng.uniform(
                    low=self._lower_bounds,
                    high=self._upper_bounds,
                    size=(self._n_samples, self._dimensions),
                )
            self._kmeans.fit(self._samples)
            self._centroids = self._kmeans.cluster_centers_

        self._kdtree = KDTree(self._centroids, metric="euclidean")

    @property
    def dimensions(self) -> int:
        """Dimensions of the measure space used

        Returns:
            int: Dimensions of the measure space used
        """
        return self._dimensions

    @property
    def samples(self) -> np.ndarray:
        """Returns the samples used to generate the centroids

        Returns:
            np.ndarray: Samples
        """
        return self._samples

    @property
    def centroids(self) -> np.ndarray:
        """Returns k centroids calculated from the samples

        Returns:
            np.ndarray: K d-dimensional centroids
        """
        return self._centroids

    @property
    def regions(self) -> int:
        """Number of regions (k) of centroids in the CVTArchive

        Returns:
            int: k
        """
        return self._k

    @property
    def bounds(self) -> Tuple[np.ndarray, np.ndarray]:
        """Tuple with the lower and upper bounds of the measure space
        The first value is the lower bounds and the second value is the upper bounds.
        Each value is a list with the corresponding lower/upper bound of the ith dimension
        in the measure space
        """
        return (self._lower_bounds, self._upper_bounds)

    @property
    def instances(self) -> list[Instance]:
        return list(self._grid.values())

    def __str__(self):
        return f"CVArchive(dim={self._dimensions},regions={self._k},centroids={self._centroids})"

    def __repr__(self):
        return f"CVArchive(dim={self._dimensions},regions={self._k},centroids={self._centroids})"

    def __iter__(self):
        """Iterates over the dictionary of instances

        Returns:
            Iterator: Yields position in the hypercube and instance located in such position
        """
        return iter(self._grid.values())

    def lower_i(self, i) -> np.float64:
        if i < 0 or i > len(self._lower_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._lower_bounds)}]"
            raise ValueError(msg)
        return self._lower_bounds[i]

    def upper_i(self, i) -> np.float64:
        if i < 0 or i > len(self._upper_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._upper_bounds)}]"
            raise ValueError(msg)
        return self._upper_bounds[i]

    def append(self, instance: Instance):
        """Inserts an Instance into the Grid

        Args:
            instance (Instance): Instace to be inserted

        Raises:
            TypeError: ``instance`` is not a instance of the class Instance.
        """
        if isinstance(instance, Instance):
            index = self.index_of(np.asarray(instance.descriptor).reshape(1, -1))[0]
            if index not in self._grid or instance > self._grid[index]:
                self._grid[index] = instance.clone()

        else:
            msg = "Only objects of type Instance can be inserted into a CVTArchive"
            raise TypeError(msg)

    def extend(self, iterable: Iterable[Instance]):
        """Includes all the instances in iterable into the Grid

        Args:
            iterable (Iterable[Instance]): Iterable of instances
        """
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be inserted into a CVTArchive"
            raise TypeError(msg)

        indeces = self.index_of([i.descriptor for i in iterable])
        for idx, instance in zip(indeces, iterable, strict=True):
            if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
                self._grid[idx] = instance.clone()

    def remove(self, iterable: Iterable[Instance]):
        """Removes all the instances in iterable from the grid"""
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be removed from a CVTArchive"
            raise TypeError(msg)

        indeces_to_remove = self.index_of([i.descriptor for i in iterable])
        for index in indeces_to_remove:
            if index in self._grid:
                del self._grid[index]

    def remove_unfeasible(self, attr: str = "p"):
        """Removes all the unfeasible instances from the grid"""
        keys_to_remove = [
            i for i in self._grid.keys() if getattr(self._grid[i], attr) < 0
        ]
        for i in keys_to_remove:
            del self._grid[i]

    def index_of(self, descriptors) -> np.ndarray:
        """Computes the indeces of a batch of descriptors.

        Args:
            descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

        Raises:
            ValueError: ``descriptors`` is not shape (batch_size, dimensions)

        Returns:
            np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
        """
        descriptors = np.array(descriptors)

        if len(descriptors) == 0:
            return np.empty(0)
        if (
            descriptors.ndim == 1
            and descriptors.shape[0] != self._dimensions
            or descriptors.ndim == 2
            and descriptors.shape[1] != self._dimensions
        ):
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, dimensions) (i.e. shape "
                f"(batch_size, {self._dimensions})) but it had shape "
                f"{descriptors.shape}"
            )

        indices = self._kdtree.query(descriptors, return_distance=False)
        indices = indices[:, 0]
        return indices.astype(np.int32)

    def to_file(self, file_pattern: str = "CVTArchive"):
        """Saves the centroids and the samples of the CVTArchive to .npy files
            Each attribute is saved in its own filename.
            Therefore, file_pattern is expected not to contain any extension

        Args:
            file_pattern (str, optional): Pattern of the expected filenames. Defaults to "CVTArchive".
        """
        np.save(f"{file_pattern}_centroids.npy", self._centroids)
        np.save(f"{file_pattern}_samples.npy", self._samples)

    @classmethod
    def load_from_json(cls, filename: str):
        """Creates a CVTArchive object from the content of a previously created JSON file

        Args:
            filename (str): Filename of the JSON file with the CVTArchive information

        Raises:
            ValueError: If there's any error while loading the file. (IOError)
            ValueError: If the JSON file does not contain all the expected keys

        Returns:
            Self: Returns a CVTArchive object
        """
        expected_keys = {
            "dimensions",
            "n_samples",
            "regions",
            "lbs",
            "ubs",
            "centroids",
            "samples",
        }
        try:
            with open(filename, "r") as file:
                json_data = json.load(file)
                if expected_keys != json_data.keys():
                    raise ValueError(
                        f"The JSON file does not contain all the minimum expected keys. Expected keys are {expected_keys} and got {json_data.keys()}"
                    )
                _ranges = [
                    (l_i, u_i) for l_i, u_i in zip(json_data["lbs"], json_data["ubs"])
                ]
                new_archive = cls(
                    k=json_data["regions"],
                    ranges=_ranges,
                    n_samples=json_data["n_samples"],
                    centroids=json_data["centroids"],
                    samples=json_data["samples"],
                )
                return new_archive

        except IOError as io:
            raise ValueError(f"Error opening file {filename}. Reason -> {io.strerror}")

    def asdict(self) -> dict:
        return {
            "dimensions": self._dimensions,
            "n_samples": self._n_samples,
            "regions": self._k,
            "lbs": self._lower_bounds.tolist(),
            "ubs": self._upper_bounds.tolist(),
            "centroids": self._centroids.tolist(),
            "samples": self._samples.tolist(),
            "instances": {
                i: instance.asdict() for i, instance in enumerate(self._grid.values())
            },
        }

    def to_json(self, filename: Optional[str] = None) -> str:
        """Returns the content of the CVTArchive in JSON format.

        Returns:
            str: String in JSON format with the content of the CVTArchive
        """
        json_data = json.dumps(self.asdict(), indent=4)
        if filename is not None:
            filename = (
                f"{filename}.json" if not filename.endswith(".json") else filename
            )
            with open(filename, "w") as f:
                f.write(json_data)

        return json_data

bounds property

Tuple with the lower and upper bounds of the measure space The first value is the lower bounds and the second value is the upper bounds. Each value is a list with the corresponding lower/upper bound of the ith dimension in the measure space

centroids property

Returns k centroids calculated from the samples

Returns:
  • ndarray

    np.ndarray: K d-dimensional centroids

dimensions property

Dimensions of the measure space used

Returns:
  • int( int ) –

    Dimensions of the measure space used

regions property

Number of regions (k) of centroids in the CVTArchive

Returns:
  • int( int ) –

    k

samples property

Returns the samples used to generate the centroids

Returns:
  • ndarray

    np.ndarray: Samples

__init__(k, ranges, n_samples, centroids=None, samples=None, dtype=np.float64, seed=42)

Creates a CVTArchive object

Parameters:
  • k (int) –

    Number of centroids (regions) to create

  • ranges (Sequence[Tuple[float, float]]) –

    Ranges of the measure space. Upper and lower bound of each

  • indicates (the first dimension should have bounds ) –

    math:[-1,1]

  • math:[-2,2] (inclusive). The legnth of ranges indicates the number of dimensions of the measure space.

  • n_samples (int) –

    Number of samples to generate before calculating the centroids.

  • centroids (Optional[NDArray | str], default: None ) –

    Precalculated centroids for the archive.

  • samples (Optional[NDArray | str], default: None ) –

    Precalculated samples for the archive.

Raises:
  • ValueError

    If len(ranges) <= 0.

  • ValueError

    If the number of samples is less than zero or less than the number of regions (k).

  • ValueError

    If the number of regions is less than zero.

  • ValueError

    If the samples file cannot be loaded.

  • ValueError

    If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).

  • ValueError

    If the centroids file cannot be loaded.

  • ValueError

    If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).

Source code in digneapy/archives/_cvt_archive.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def __init__(
    self,
    k: int,
    ranges: Sequence[Tuple[float, float]],
    n_samples: int,
    centroids: Optional[npt.NDArray | str] = None,
    samples: Optional[npt.NDArray | str] = None,
    dtype=np.float64,
    seed: int = 42,
):
    """Creates a CVTArchive object

    Args:
        k (int): Number of centroids (regions) to create
        ranges (Sequence[Tuple[float, float]]): Ranges of the measure space. Upper and lower bound of each
        dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
        indicates the first dimension should have bounds :math:`[-1,1]`
        (inclusive), and the second dimension should have bounds
        :math:`[-2,2]` (inclusive). The legnth of ``ranges`` indicates the number of dimensions of the measure space.
        n_samples (int): Number of samples to generate before calculating the centroids.
        centroids (Optional[npt.NDArray  |  str], optional): Precalculated centroids for the archive.
        The options are a np.ndarray with the values of ``k`` centroids or a .txt with the centroids to be loaded by Numpy. Defaults to None.
        samples (Optional[npt.NDArray  |  str], optional): Precalculated samples for the archive.
        The options are a np.ndarray with the values of ``n_samples`` samples or a .txt with the samples to be loaded by Numpy. Defaults to None.

    Raises:
        ValueError: If len(ranges) <= 0.
        ValueError: If the number of samples is less than zero or less than the number of regions (k).
        ValueError: If the number of regions is less than zero.
        ValueError: If the samples file cannot be loaded.
        ValueError: If given a samples np.ndarray the number of samples in the file is different from the number of expected samples (n_samples).
        ValueError: If the centroids file cannot be loaded.
        ValueError: If given a centroids np.ndarray the number of centroids in the file is different from the number of regions (k).
    """
    Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
    if k <= 0:
        raise ValueError(f"The number of regions (k = {k}) must be >= 1")

    if len(ranges) <= 0:
        raise ValueError(
            f"ranges must have length >= 1 and it has length {len(ranges)}"
        )

    if n_samples <= 0 or n_samples < k:
        raise ValueError(
            f"The number of samples (n_samples = {n_samples}) must be >= 1 and >= regions (k = {k})"
        )

    self._dimensions = len(ranges)
    ranges = list(zip(*ranges))
    self._lower_bounds = np.array(ranges[0], dtype=self._dtype)
    self._upper_bounds = np.array(ranges[1], dtype=self._dtype)
    self._interval = self._upper_bounds - self._lower_bounds
    self._k = k
    self._n_samples = n_samples
    self._samples = None
    self._centroids = None
    self.initialize_rng(seed=seed)
    self._kmeans = KMeans(n_clusters=self._k, n_init=1, random_state=self._seed)

    # Data Structure to store the instances in the CVT
    self._grid: Dict[int, Instance] = {}
    # Loading samples if given
    if samples is not None:
        if isinstance(samples, str):
            try:
                self._samples = np.load(samples)
                self._n_samples = len(self._samples)
            except Exception as _:
                raise ValueError(
                    f"Error in CVTArchive.__init__() loading the samples file {samples}."
                )
        elif isinstance(samples, np.ndarray) and len(samples) != n_samples:
            raise ValueError(
                f"The number of samples {len(samples)} must be equal to the number of expected samples (n_samples = {n_samples})"
            )
        else:
            self._samples = np.asarray(samples)

    if centroids is not None:
        if isinstance(centroids, str):
            try:
                self._centroids = np.load(centroids)
                self._k = len(self._centroids)
            except Exception as _:
                raise ValueError(
                    f"Error in CVTArchive.__init__() loading the centroids file {centroids}."
                )
        elif isinstance(centroids, np.ndarray) and len(centroids) != k:
            raise ValueError(
                f"The number of centroids {len(centroids)} must be equal to the number of regions (k = {self._k})"
            )
        else:
            self._centroids = np.asarray(centroids)
    else:
        # Generate centroids
        if self._samples is None:
            # Generate uniform samples if not given
            rng = np.random.default_rng(seed=self._seed)
            self._samples = rng.uniform(
                low=self._lower_bounds,
                high=self._upper_bounds,
                size=(self._n_samples, self._dimensions),
            )
        self._kmeans.fit(self._samples)
        self._centroids = self._kmeans.cluster_centers_

    self._kdtree = KDTree(self._centroids, metric="euclidean")

__iter__()

Iterates over the dictionary of instances

Returns:
  • Iterator

    Yields position in the hypercube and instance located in such position

Source code in digneapy/archives/_cvt_archive.py
200
201
202
203
204
205
206
def __iter__(self):
    """Iterates over the dictionary of instances

    Returns:
        Iterator: Yields position in the hypercube and instance located in such position
    """
    return iter(self._grid.values())

append(instance)

Inserts an Instance into the Grid

Parameters:
  • instance (Instance) –

    Instace to be inserted

Raises:
  • TypeError

    instance is not a instance of the class Instance.

Source code in digneapy/archives/_cvt_archive.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def append(self, instance: Instance):
    """Inserts an Instance into the Grid

    Args:
        instance (Instance): Instace to be inserted

    Raises:
        TypeError: ``instance`` is not a instance of the class Instance.
    """
    if isinstance(instance, Instance):
        index = self.index_of(np.asarray(instance.descriptor).reshape(1, -1))[0]
        if index not in self._grid or instance > self._grid[index]:
            self._grid[index] = instance.clone()

    else:
        msg = "Only objects of type Instance can be inserted into a CVTArchive"
        raise TypeError(msg)

extend(iterable)

Includes all the instances in iterable into the Grid

Parameters:
  • iterable (Iterable[Instance]) –

    Iterable of instances

Source code in digneapy/archives/_cvt_archive.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def extend(self, iterable: Iterable[Instance]):
    """Includes all the instances in iterable into the Grid

    Args:
        iterable (Iterable[Instance]): Iterable of instances
    """
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be inserted into a CVTArchive"
        raise TypeError(msg)

    indeces = self.index_of([i.descriptor for i in iterable])
    for idx, instance in zip(indeces, iterable, strict=True):
        if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
            self._grid[idx] = instance.clone()

index_of(descriptors)

Computes the indeces of a batch of descriptors.

Parameters:
  • descriptors (array - like) –

    (batch_size, dimensions) array of descriptors for each instance

Raises:
  • ValueError

    descriptors is not shape (batch_size, dimensions)

Returns:
  • ndarray

    np.ndarray: (batch_size, ) array of integer indices representing the flattened grid coordinates.

Source code in digneapy/archives/_cvt_archive.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def index_of(self, descriptors) -> np.ndarray:
    """Computes the indeces of a batch of descriptors.

    Args:
        descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

    Raises:
        ValueError: ``descriptors`` is not shape (batch_size, dimensions)

    Returns:
        np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
    """
    descriptors = np.array(descriptors)

    if len(descriptors) == 0:
        return np.empty(0)
    if (
        descriptors.ndim == 1
        and descriptors.shape[0] != self._dimensions
        or descriptors.ndim == 2
        and descriptors.shape[1] != self._dimensions
    ):
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, dimensions) (i.e. shape "
            f"(batch_size, {self._dimensions})) but it had shape "
            f"{descriptors.shape}"
        )

    indices = self._kdtree.query(descriptors, return_distance=False)
    indices = indices[:, 0]
    return indices.astype(np.int32)

load_from_json(filename) classmethod

Creates a CVTArchive object from the content of a previously created JSON file

Parameters:
  • filename (str) –

    Filename of the JSON file with the CVTArchive information

Raises:
  • ValueError

    If there's any error while loading the file. (IOError)

  • ValueError

    If the JSON file does not contain all the expected keys

Returns:
  • Self

    Returns a CVTArchive object

Source code in digneapy/archives/_cvt_archive.py
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
@classmethod
def load_from_json(cls, filename: str):
    """Creates a CVTArchive object from the content of a previously created JSON file

    Args:
        filename (str): Filename of the JSON file with the CVTArchive information

    Raises:
        ValueError: If there's any error while loading the file. (IOError)
        ValueError: If the JSON file does not contain all the expected keys

    Returns:
        Self: Returns a CVTArchive object
    """
    expected_keys = {
        "dimensions",
        "n_samples",
        "regions",
        "lbs",
        "ubs",
        "centroids",
        "samples",
    }
    try:
        with open(filename, "r") as file:
            json_data = json.load(file)
            if expected_keys != json_data.keys():
                raise ValueError(
                    f"The JSON file does not contain all the minimum expected keys. Expected keys are {expected_keys} and got {json_data.keys()}"
                )
            _ranges = [
                (l_i, u_i) for l_i, u_i in zip(json_data["lbs"], json_data["ubs"])
            ]
            new_archive = cls(
                k=json_data["regions"],
                ranges=_ranges,
                n_samples=json_data["n_samples"],
                centroids=json_data["centroids"],
                samples=json_data["samples"],
            )
            return new_archive

    except IOError as io:
        raise ValueError(f"Error opening file {filename}. Reason -> {io.strerror}")

remove(iterable)

Removes all the instances in iterable from the grid

Source code in digneapy/archives/_cvt_archive.py
253
254
255
256
257
258
259
260
261
262
def remove(self, iterable: Iterable[Instance]):
    """Removes all the instances in iterable from the grid"""
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be removed from a CVTArchive"
        raise TypeError(msg)

    indeces_to_remove = self.index_of([i.descriptor for i in iterable])
    for index in indeces_to_remove:
        if index in self._grid:
            del self._grid[index]

remove_unfeasible(attr='p')

Removes all the unfeasible instances from the grid

Source code in digneapy/archives/_cvt_archive.py
264
265
266
267
268
269
270
def remove_unfeasible(self, attr: str = "p"):
    """Removes all the unfeasible instances from the grid"""
    keys_to_remove = [
        i for i in self._grid.keys() if getattr(self._grid[i], attr) < 0
    ]
    for i in keys_to_remove:
        del self._grid[i]

to_file(file_pattern='CVTArchive')

Saves the centroids and the samples of the CVTArchive to .npy files Each attribute is saved in its own filename. Therefore, file_pattern is expected not to contain any extension

Parameters:
  • file_pattern (str, default: 'CVTArchive' ) –

    Pattern of the expected filenames. Defaults to "CVTArchive".

Source code in digneapy/archives/_cvt_archive.py
305
306
307
308
309
310
311
312
313
314
def to_file(self, file_pattern: str = "CVTArchive"):
    """Saves the centroids and the samples of the CVTArchive to .npy files
        Each attribute is saved in its own filename.
        Therefore, file_pattern is expected not to contain any extension

    Args:
        file_pattern (str, optional): Pattern of the expected filenames. Defaults to "CVTArchive".
    """
    np.save(f"{file_pattern}_centroids.npy", self._centroids)
    np.save(f"{file_pattern}_samples.npy", self._samples)

to_json(filename=None)

Returns the content of the CVTArchive in JSON format.

Returns:
  • str( str ) –

    String in JSON format with the content of the CVTArchive

Source code in digneapy/archives/_cvt_archive.py
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
def to_json(self, filename: Optional[str] = None) -> str:
    """Returns the content of the CVTArchive in JSON format.

    Returns:
        str: String in JSON format with the content of the CVTArchive
    """
    json_data = json.dumps(self.asdict(), indent=4)
    if filename is not None:
        filename = (
            f"{filename}.json" if not filename.endswith(".json") else filename
        )
        with open(filename, "w") as f:
            f.write(json_data)

    return json_data

GridArchive

Bases: Archive

An archive that divides each dimension into a uniformly-sized cells. The source code of this class is inspired by the GridArchive class of pyribs https://github.com/icaros-usc/pyribs/blob/master/ribs/archives/_grid_archive.py This archive is the container described in Mouret 2015 <https://arxiv.org/pdf/1504.04909.pdf>_. It can be visualized as an n-dimensional grid in the measure space that is divided into a certain number of cells in each dimension. Each cell contains an elite, i.e. a solution that maximizes the objective function for the measures in that cell.

Source code in digneapy/archives/_grid_archive.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
class GridArchive(Archive):
    """An archive that divides each dimension into a uniformly-sized cells.
    The source code of this class is inspired by the GridArchive class of pyribs <https://github.com/icaros-usc/pyribs/blob/master/ribs/archives/_grid_archive.py>
    This archive is the container described in `Mouret 2015
    <https://arxiv.org/pdf/1504.04909.pdf>`_. It can be visualized as an
    n-dimensional grid in the measure space that is divided into a certain
    number of cells in each dimension. Each cell contains an elite, i.e. a
    solution that `maximizes` the objective function for the measures in that
    cell.
    """

    def __init__(
        self,
        dimensions: Sequence[int],
        ranges: Sequence[Tuple[float, float]],
        instances: Optional[Iterable[Instance]] = None,
        eps: float = 1e-6,
        dtype=np.float64,
    ):
        """Creates a GridArchive instance

        Args:
            dimensions (Sequence[int]): (array-like of int): Number of cells in each dimension of the
            measure space, e.g. ``[20, 30, 40]`` indicates there should be 3
            dimensions with 20, 30, and 40 cells. (The number of dimensions is
            implicitly defined in the length of this argument).
            ranges (Sequence[Tuple[float]]): (array-like of (float, float)): Upper and lower bound of each
            dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
            indicates the first dimension should have bounds :math:`[-1,1]`
            (inclusive), and the second dimension should have bounds
            :math:`[-2,2]` (inclusive). ``ranges`` should be the same length as
            ``dims``.
            instances (Optional[Iterable[Instance]], optional): Instances to pre-initialise the archive. Defaults to None.
            eps (float, optional): Due to floating point precision errors, we add a small
            epsilon when computing the archive indices in the :meth:`index_of`
            method -- refer to the implementation `here. Defaults to 1e-6.
            dtype(str or data-type): Data type of the solutions, objectives,
            and measures.

        Raises:
            ValueError: ``dimensions`` and ``ranges`` are not the same length
        """
        Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
        if len(ranges) == 0 or len(dimensions) == 0:
            raise ValueError("dimensions and ranges must have length >= 1")
        if len(ranges) != len(dimensions):
            raise ValueError(
                f"len(dimensions) = {len(dimensions)} != len(ranges) = {len(ranges)} in GridArchive.__init__()"
            )

        self._dimensions = np.asarray(dimensions)
        ranges = list(zip(*ranges))
        self._lower_bounds = np.array(ranges[0], dtype=dtype)
        self._upper_bounds = np.array(ranges[1], dtype=dtype)
        self._interval = self._upper_bounds - self._lower_bounds
        self._eps = eps
        self._cells = np.prod(self._dimensions, dtype=object)
        self._grid: Dict[int, Instance] = {}

        _bounds = []
        for dimension, l_b, u_b in zip(
            self._dimensions, self._lower_bounds, self._upper_bounds
        ):
            _bounds.append(np.linspace(l_b, u_b, dimension))

        self._boundaries = np.asarray(_bounds)

        if instances is not None:
            self.extend(instances)

    @property
    def dimensions(self):
        return self._dimensions

    @property
    def bounds(self):
        """list of numpy.ndarray: The boundaries of the cells in each dimension.

        Entry ``i`` in this list is an array that contains the boundaries of the
        cells in dimension ``i``. The array contains ``self.dims[i] + 1``
        entries laid out like this::

            Archive cells:  | 0 | 1 |   ...   |    self.dims[i]    |
            boundaries[i]:    0   1   2            self.dims[i] - 1  self.dims[i]

        Thus, ``boundaries[i][j]`` and ``boundaries[i][j + 1]`` are the lower
        and upper bounds of cell ``j`` in dimension ``i``. To access the lower
        bounds of all the cells in dimension ``i``, use ``boundaries[i][:-1]``,
        and to access all the upper bounds, use ``boundaries[i][1:]``.
        """
        return self._boundaries

    @property
    def n_cells(self):
        return self._cells

    @property
    def coverage(self):
        """Get the coverage of the hypercube space.
        The coverage is calculated has the number of cells filled over the total space available.

        Returns:
            float: Filled cells over the total available.
        """
        if len(self._grid) == 0:
            return 0.0

        return len(self._grid) / self._cells

    @property
    def filled_cells(self):
        return self._grid.keys()

    @property
    def instances(self):
        return list(self._grid.values())

    def __str__(self):
        return f"GridArchive(dim={self._dimensions},cells={self._cells},bounds={self._boundaries})"

    def __repr__(self):
        return f"GridArchive(dim={self._dimensions},cells={self._cells},bounds={self._boundaries})"

    def __len__(self):
        return len(self._grid)

    def __getitem__(self, key):
        """Returns a dictionary with the descriptors as the keys. The values are the instances found.
        Note that some of the given keys may not be in the archive.

        Args:
            key (array-like or descriptor): Descriptors of the instances that want to retrieve.
            Valid examples are:
            -   archive[[0,11], [0,5]] --> Get the instances with the descriptors (0,11) and (0, 5)
            -   archive[0,11] --> Get the instance with the descriptor (0,11)

        Raises:
            TypeError: If the key is an slice. Not allowed.
            ValueError: If the shape of the keys are not valid.

        Returns:
            dict: Returns a dict with the found instances.
        """
        if isinstance(key, slice):
            raise TypeError(
                "Slicing is not available in GridArchive. Use 1D index or descriptor-type indeces"
            )
        descriptors = np.asarray(key)
        if descriptors.ndim == 1 and descriptors.shape[0] != len(self._dimensions):
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, 1) or (batch_size, dimensions) (i.e. shape "
                f"(batch_size, {len(self._dimensions)})) but it had shape "
                f"{descriptors.shape}"
            )

        indeces = self.index_of(descriptors).tolist()
        if isinstance(indeces, int):
            indeces = [indeces]
            descriptors = [descriptors]

        instances = {}
        for idx, desc in zip(indeces, descriptors):
            if idx not in self._grid:
                print(f"There is not any instance in the cell {desc}.")
            else:
                instances[tuple(desc)] = copy.copy(self._grid[idx])
        return instances

    def __iter__(self):
        """Iterates over the dictionary of instances

        Returns:
            Iterator: Yields position in the hypercube and instance located in such position
        """
        return iter(self._grid.values())

    def lower_i(self, i):
        if i < 0 or i > len(self._lower_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._boundaries)}]"
            raise ValueError(msg)
        return self._lower_bounds[i]

    def upper_i(self, i):
        if i < 0 or i > len(self._upper_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._boundaries)}]"
            raise ValueError(msg)
        return self._upper_bounds[i]

    def append(self, instance: Instance):
        """Inserts an Instance into the Grid

        Args:
            instance (Instance): Instace to be inserted

        Raises:
            TypeError: ``instance`` is not a instance of the class Instance.
        """
        if isinstance(instance, Instance):
            index = self.index_of(np.asarray(instance.descriptor))
            if index not in self._grid or instance > self._grid[index]:
                self._grid[index] = instance.clone()

        else:
            msg = "Only objects of type Instance can be inserted into a GridArchive"
            raise TypeError(msg)

    def extend(self, iterable: Iterable[Instance], *args, **kwargs):
        """Includes all the instances in iterable into the Grid

        Args:
            iterable (Iterable[Instance]): Iterable of instances
        """
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be inserted into a GridArchive"
            raise TypeError(msg)

        indeces = self.index_of([i.descriptor for i in iterable])
        for idx, instance in zip(indeces, iterable, strict=True):
            if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
                self._grid[idx] = instance.clone()

    def remove(self, iterable: Iterable[Instance]):
        """Removes all the instances in iterable from the grid"""
        if not all(isinstance(i, Instance) for i in iterable):
            msg = "Only objects of type Instance can be removed from a CVTArchive"
            raise TypeError(msg)

        indeces_to_remove = self.index_of([i.descriptor for i in iterable])
        for index in indeces_to_remove:
            if index in self._grid:
                del self._grid[index]

    def index_of(self, descriptors):
        """Computes the indeces of a batch of descriptors.

        Args:
            descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

        Raises:
            ValueError: ``descriptors`` is not shape (batch_size, dimensions)

        Returns:
            np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
        """
        if len(descriptors) == 0:
            return np.empty(0)

        descriptors = np.asarray(descriptors)
        if (
            descriptors.ndim == 1
            and descriptors.shape[0] != len(self._dimensions)
            or descriptors.ndim == 2
            and descriptors.shape[1] != len(self._dimensions)
        ):
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, dimensions) (i.e. shape "
                f"(batch_size, {len(self._dimensions)})) but it had shape "
                f"{descriptors.shape}"
            )

        grid_indices = (
            (self._dimensions * (descriptors - self._lower_bounds) + self._eps)
            / self._interval
        ).astype(int)

        # Clip the indexes to make sure they are in the expected range for each dimension
        clipped = np.clip(grid_indices, 0, self._dimensions - 1)
        return self._grid_to_int_index(clipped)

    def _grid_to_int_index(self, grid_indices) -> np.ndarray:
        grid_indices = np.asarray(grid_indices)
        if len(self._dimensions) > 64:
            strides = np.cumprod((1,) + tuple(self._dimensions[::-1][:-1]))[::-1]
            # Reshape strides to (1, num_dimensions) to make it broadcastable with grid_indices
            strides = strides.reshape(1, -1)
            flattened_indeces = np.sum(grid_indices * strides, axis=1, dtype=object)
        else:
            flattened_indeces = np.ravel_multi_index(
                grid_indices.T, self._dimensions
            ).astype(int)
        return flattened_indeces

    def int_to_grid_index(self, int_indices) -> np.ndarray:
        int_indices = np.asarray(int_indices)
        if len(self._dimensions) > 64:
            # Manually unravel the index for dimensions > 64
            unravel_indices = []
            remaining_indices = int_indices.astype(object)

            for dim_size in self._dimensions[::-1]:
                unravel_indices.append(remaining_indices % dim_size)
                remaining_indices //= dim_size

            unravel_indices = np.array(unravel_indices[::-1]).T
        else:
            unravel_indices = np.asarray(
                np.unravel_index(
                    int_indices,
                    self._dimensions,
                )
            ).T.astype(int)
        return unravel_indices

    def asdict(self) -> dict:
        return {
            "dimensions": self._dimensions.tolist(),
            "lbs": self._lower_bounds.tolist(),
            "ubs": self._upper_bounds.tolist(),
            "n_cells": self._cells,
            "instances": {
                i: instance.asdict() for i, instance in enumerate(self._grid.values())
            },
        }

    def to_json(self) -> str:
        return json.dumps(self.asdict(), indent=4)

bounds property

list of numpy.ndarray: The boundaries of the cells in each dimension.

Entry i in this list is an array that contains the boundaries of the cells in dimension i. The array contains self.dims[i] + 1 entries laid out like this::

Archive cells:  | 0 | 1 |   ...   |    self.dims[i]    |
boundaries[i]:    0   1   2            self.dims[i] - 1  self.dims[i]

Thus, boundaries[i][j] and boundaries[i][j + 1] are the lower and upper bounds of cell j in dimension i. To access the lower bounds of all the cells in dimension i, use boundaries[i][:-1], and to access all the upper bounds, use boundaries[i][1:].

coverage property

Get the coverage of the hypercube space. The coverage is calculated has the number of cells filled over the total space available.

Returns:
  • float

    Filled cells over the total available.

__getitem__(key)

Returns a dictionary with the descriptors as the keys. The values are the instances found. Note that some of the given keys may not be in the archive.

Parameters:
  • key (array - like or descriptor) –

    Descriptors of the instances that want to retrieve.

  • Valid (examples are) –
Raises:
  • TypeError

    If the key is an slice. Not allowed.

  • ValueError

    If the shape of the keys are not valid.

Returns:
  • dict

    Returns a dict with the found instances.

Source code in digneapy/archives/_grid_archive.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def __getitem__(self, key):
    """Returns a dictionary with the descriptors as the keys. The values are the instances found.
    Note that some of the given keys may not be in the archive.

    Args:
        key (array-like or descriptor): Descriptors of the instances that want to retrieve.
        Valid examples are:
        -   archive[[0,11], [0,5]] --> Get the instances with the descriptors (0,11) and (0, 5)
        -   archive[0,11] --> Get the instance with the descriptor (0,11)

    Raises:
        TypeError: If the key is an slice. Not allowed.
        ValueError: If the shape of the keys are not valid.

    Returns:
        dict: Returns a dict with the found instances.
    """
    if isinstance(key, slice):
        raise TypeError(
            "Slicing is not available in GridArchive. Use 1D index or descriptor-type indeces"
        )
    descriptors = np.asarray(key)
    if descriptors.ndim == 1 and descriptors.shape[0] != len(self._dimensions):
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, 1) or (batch_size, dimensions) (i.e. shape "
            f"(batch_size, {len(self._dimensions)})) but it had shape "
            f"{descriptors.shape}"
        )

    indeces = self.index_of(descriptors).tolist()
    if isinstance(indeces, int):
        indeces = [indeces]
        descriptors = [descriptors]

    instances = {}
    for idx, desc in zip(indeces, descriptors):
        if idx not in self._grid:
            print(f"There is not any instance in the cell {desc}.")
        else:
            instances[tuple(desc)] = copy.copy(self._grid[idx])
    return instances

__init__(dimensions, ranges, instances=None, eps=1e-06, dtype=np.float64)

Creates a GridArchive instance

Parameters:
  • dimensions (Sequence[int]) –

    (array-like of int): Number of cells in each dimension of the

  • ranges (Sequence[Tuple[float]]) –

    (array-like of (float, float)): Upper and lower bound of each

  • indicates (the first dimension should have bounds ) –

    math:[-1,1]

  • math:[-2,2] (inclusive). ranges should be the same length as

  • instances (Optional[Iterable[Instance]], default: None ) –

    Instances to pre-initialise the archive. Defaults to None.

  • eps (float, default: 1e-06 ) –

    Due to floating point precision errors, we add a small

  • epsilon (when computing the archive indices in the ) –

    meth:index_of

  • dtype(str (or data-type) –

    Data type of the solutions, objectives,

Raises:
  • ValueError

    dimensions and ranges are not the same length

Source code in digneapy/archives/_grid_archive.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def __init__(
    self,
    dimensions: Sequence[int],
    ranges: Sequence[Tuple[float, float]],
    instances: Optional[Iterable[Instance]] = None,
    eps: float = 1e-6,
    dtype=np.float64,
):
    """Creates a GridArchive instance

    Args:
        dimensions (Sequence[int]): (array-like of int): Number of cells in each dimension of the
        measure space, e.g. ``[20, 30, 40]`` indicates there should be 3
        dimensions with 20, 30, and 40 cells. (The number of dimensions is
        implicitly defined in the length of this argument).
        ranges (Sequence[Tuple[float]]): (array-like of (float, float)): Upper and lower bound of each
        dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
        indicates the first dimension should have bounds :math:`[-1,1]`
        (inclusive), and the second dimension should have bounds
        :math:`[-2,2]` (inclusive). ``ranges`` should be the same length as
        ``dims``.
        instances (Optional[Iterable[Instance]], optional): Instances to pre-initialise the archive. Defaults to None.
        eps (float, optional): Due to floating point precision errors, we add a small
        epsilon when computing the archive indices in the :meth:`index_of`
        method -- refer to the implementation `here. Defaults to 1e-6.
        dtype(str or data-type): Data type of the solutions, objectives,
        and measures.

    Raises:
        ValueError: ``dimensions`` and ``ranges`` are not the same length
    """
    Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
    if len(ranges) == 0 or len(dimensions) == 0:
        raise ValueError("dimensions and ranges must have length >= 1")
    if len(ranges) != len(dimensions):
        raise ValueError(
            f"len(dimensions) = {len(dimensions)} != len(ranges) = {len(ranges)} in GridArchive.__init__()"
        )

    self._dimensions = np.asarray(dimensions)
    ranges = list(zip(*ranges))
    self._lower_bounds = np.array(ranges[0], dtype=dtype)
    self._upper_bounds = np.array(ranges[1], dtype=dtype)
    self._interval = self._upper_bounds - self._lower_bounds
    self._eps = eps
    self._cells = np.prod(self._dimensions, dtype=object)
    self._grid: Dict[int, Instance] = {}

    _bounds = []
    for dimension, l_b, u_b in zip(
        self._dimensions, self._lower_bounds, self._upper_bounds
    ):
        _bounds.append(np.linspace(l_b, u_b, dimension))

    self._boundaries = np.asarray(_bounds)

    if instances is not None:
        self.extend(instances)

__iter__()

Iterates over the dictionary of instances

Returns:
  • Iterator

    Yields position in the hypercube and instance located in such position

Source code in digneapy/archives/_grid_archive.py
194
195
196
197
198
199
200
def __iter__(self):
    """Iterates over the dictionary of instances

    Returns:
        Iterator: Yields position in the hypercube and instance located in such position
    """
    return iter(self._grid.values())

append(instance)

Inserts an Instance into the Grid

Parameters:
  • instance (Instance) –

    Instace to be inserted

Raises:
  • TypeError

    instance is not a instance of the class Instance.

Source code in digneapy/archives/_grid_archive.py
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def append(self, instance: Instance):
    """Inserts an Instance into the Grid

    Args:
        instance (Instance): Instace to be inserted

    Raises:
        TypeError: ``instance`` is not a instance of the class Instance.
    """
    if isinstance(instance, Instance):
        index = self.index_of(np.asarray(instance.descriptor))
        if index not in self._grid or instance > self._grid[index]:
            self._grid[index] = instance.clone()

    else:
        msg = "Only objects of type Instance can be inserted into a GridArchive"
        raise TypeError(msg)

extend(iterable, *args, **kwargs)

Includes all the instances in iterable into the Grid

Parameters:
  • iterable (Iterable[Instance]) –

    Iterable of instances

Source code in digneapy/archives/_grid_archive.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def extend(self, iterable: Iterable[Instance], *args, **kwargs):
    """Includes all the instances in iterable into the Grid

    Args:
        iterable (Iterable[Instance]): Iterable of instances
    """
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be inserted into a GridArchive"
        raise TypeError(msg)

    indeces = self.index_of([i.descriptor for i in iterable])
    for idx, instance in zip(indeces, iterable, strict=True):
        if idx not in self._grid or instance.fitness > self._grid[idx].fitness:
            self._grid[idx] = instance.clone()

index_of(descriptors)

Computes the indeces of a batch of descriptors.

Parameters:
  • descriptors (array - like) –

    (batch_size, dimensions) array of descriptors for each instance

Raises:
  • ValueError

    descriptors is not shape (batch_size, dimensions)

Returns:
  • np.ndarray: (batch_size, ) array of integer indices representing the flattened grid coordinates.

Source code in digneapy/archives/_grid_archive.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def index_of(self, descriptors):
    """Computes the indeces of a batch of descriptors.

    Args:
        descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

    Raises:
        ValueError: ``descriptors`` is not shape (batch_size, dimensions)

    Returns:
        np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
    """
    if len(descriptors) == 0:
        return np.empty(0)

    descriptors = np.asarray(descriptors)
    if (
        descriptors.ndim == 1
        and descriptors.shape[0] != len(self._dimensions)
        or descriptors.ndim == 2
        and descriptors.shape[1] != len(self._dimensions)
    ):
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, dimensions) (i.e. shape "
            f"(batch_size, {len(self._dimensions)})) but it had shape "
            f"{descriptors.shape}"
        )

    grid_indices = (
        (self._dimensions * (descriptors - self._lower_bounds) + self._eps)
        / self._interval
    ).astype(int)

    # Clip the indexes to make sure they are in the expected range for each dimension
    clipped = np.clip(grid_indices, 0, self._dimensions - 1)
    return self._grid_to_int_index(clipped)

remove(iterable)

Removes all the instances in iterable from the grid

Source code in digneapy/archives/_grid_archive.py
247
248
249
250
251
252
253
254
255
256
def remove(self, iterable: Iterable[Instance]):
    """Removes all the instances in iterable from the grid"""
    if not all(isinstance(i, Instance) for i in iterable):
        msg = "Only objects of type Instance can be removed from a CVTArchive"
        raise TypeError(msg)

    indeces_to_remove = self.index_of([i.descriptor for i in iterable])
    for index in indeces_to_remove:
        if index in self._grid:
            del self._grid[index]