grid archive

`GridArchive`

Bases: Archive

An archive that divides each dimension into a uniformly-sized cells. The source code of this class is inspired by the GridArchive class of pyribs https://github.com/icaros-usc/pyribs/blob/master/ribs/archives/_grid_archive.py This archive is the container described in Mouret 2015 <https://arxiv.org/pdf/1504.04909.pdf>_. It can be visualized as an n-dimensional grid in the measure space that is divided into a certain number of cells in each dimension. Each cell contains an elite, i.e. a solution that maximizes the objective function for the measures in that cell.

Source code in digneapy/archives/_grid_archive.py

class GridArchive(Archive):
    """An archive that divides each dimension into a uniformly-sized cells.
    The source code of this class is inspired by the GridArchive class of pyribs <https://github.com/icaros-usc/pyribs/blob/master/ribs/archives/_grid_archive.py>
    This archive is the container described in `Mouret 2015
    <https://arxiv.org/pdf/1504.04909.pdf>`_. It can be visualized as an
    n-dimensional grid in the measure space that is divided into a certain
    number of cells in each dimension. Each cell contains an elite, i.e. a
    solution that `maximizes` the objective function for the measures in that
    cell.
    """

    def __init__(
        self,
        dimensions: Sequence[int],
        ranges: Sequence[Tuple[float, float]],
        instances: Optional[Iterable[Instance]] = None,
        eps: float = 1e-6,
        dtype=np.float64,
    ):
        """Creates a GridArchive instance

        Args:
            dimensions (Sequence[int]): (array-like of int): Number of cells in each dimension of the
            measure space, e.g. ``[20, 30, 40]`` indicates there should be 3
            dimensions with 20, 30, and 40 cells. (The number of dimensions is
            implicitly defined in the length of this argument).
            ranges (Sequence[Tuple[float]]): (array-like of (float, float)): Upper and lower bound of each
            dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
            indicates the first dimension should have bounds :math:`[-1,1]`
            (inclusive), and the second dimension should have bounds
            :math:`[-2,2]` (inclusive). ``ranges`` should be the same length as
            ``dims``.
            instances (Optional[Iterable[Instance]], optional): Instances to pre-initialise the archive. Defaults to None.
            eps (float, optional): Due to floating point precision errors, we add a small
            epsilon when computing the archive indices in the :meth:`index_of`
            method -- refer to the implementation `here. Defaults to 1e-6.
            dtype(str or data-type): Data type of the solutions, objectives,
            and measures.

        Raises:
            ValueError: ``dimensions`` and ``ranges`` are not the same length
        """
        Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
        if len(ranges) == 0 or len(dimensions) == 0:
            raise ValueError("dimensions and ranges must have length >= 1")
        if len(ranges) != len(dimensions):
            raise ValueError(
                f"len(dimensions) = {len(dimensions)} != len(ranges) = {len(ranges)} in GridArchive.__init__()"
            )

        self._dimensions = np.asarray(dimensions)
        ranges = list(zip(*ranges))
        self._lower_bounds = np.array(ranges[0], dtype=dtype)
        self._upper_bounds = np.array(ranges[1], dtype=dtype)
        self._interval = self._upper_bounds - self._lower_bounds
        self._eps = eps
        self._cells = np.prod(self._dimensions, dtype=object)

        self._grid: Dict[int, np.ndarray] = {}
        self._storage: Dict[int, Instance] = {}

        _bounds = []
        for dimension, l_b, u_b in zip(
            self._dimensions, self._lower_bounds, self._upper_bounds
        ):
            _bounds.append(np.linspace(l_b, u_b, dimension))

        self._boundaries = np.asarray(_bounds)

        if instances is not None:
            self.extend(instances)

    @property
    def dimensions(self):
        return self._dimensions

    @property
    def bounds(self):
        """list of numpy.ndarray: The boundaries of the cells in each dimension.

        Entry ``i`` in this list is an array that contains the boundaries of the
        cells in dimension ``i``. The array contains ``self.dims[i] + 1``
        entries laid out like this::

            Archive cells:  | 0 | 1 |   ...   |    self.dims[i]    |
            boundaries[i]:    0   1   2            self.dims[i] - 1  self.dims[i]

        Thus, ``boundaries[i][j]`` and ``boundaries[i][j + 1]`` are the lower
        and upper bounds of cell ``j`` in dimension ``i``. To access the lower
        bounds of all the cells in dimension ``i``, use ``boundaries[i][:-1]``,
        and to access all the upper bounds, use ``boundaries[i][1:]``.
        """
        return self._boundaries

    @property
    def n_cells(self):
        return self._cells

    @property
    def coverage(self):
        """Get the coverage of the hypercube space.
        The coverage is calculated has the number of cells filled over the total space available.

        Returns:
            float: Filled cells over the total available.
        """
        if len(self._grid) == 0:
            return 0.0

        return len(self._grid) / self._cells

    @property
    def filled_cells(self):
        return self._grid.keys()

    @property
    def instances(self) -> Sequence[Instance]:
        return list(self._storage.values())

    def __str__(self):
        return f"GridArchive(dim={self._dimensions},cells={self._cells},bounds={self._boundaries})"

    def __repr__(self):
        return f"GridArchive(dim={self._dimensions},cells={self._cells},bounds={self._boundaries})"

    def __len__(self):
        return len(self._grid)

    def __getitem__(self, key):
        """Returns a dictionary with the descriptors as the keys. The values are the instances found.
        Note that some of the given keys may not be in the archive.

        Args:
            key (array-like or descriptor): Descriptors of the instances that want to retrieve.
            Valid examples are:
            -   archive[[0,11], [0,5]] --> Get the instances with the descriptors (0,11) and (0, 5)
            -   archive[0,11] --> Get the instances at indices 0 and 11

        Raises:
            TypeError: If the key is an slice. Not allowed.
            ValueError: If the shape of the keys are not valid.

        Returns:
            dict: Returns a dict with the found instances.
        """
        if isinstance(key, slice):
            raise TypeError(
                "Slicing is not available in GridArchive. Use 1D index or descriptor-type indices"
            )
        descriptors = np.asarray(key)
        if descriptors.ndim == 1:
            indices = descriptors
        elif descriptors.ndim == 2 and descriptors.shape[1] == len(self._dimensions):
            indices = self.index_of(descriptors).tolist()
        else:
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, dimensions) (i.e. shape "
                f"(batch_size, {len(self._dimensions)})) but it had shape "
                f"{descriptors.shape}"
            )
        if isinstance(indices, int):
            indices = [indices]

        instances = [self._storage[idx] for idx in indices]
        return instances

    def __iter__(self):
        """Iterates over the dictionary of instances

        Returns:
            Iterator: Yields position in the hypercube and instance located in such position
        """
        return iter(self._storage.values())

    def lower_i(self, i):
        if i < 0 or i > len(self._lower_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._boundaries)}]"
            raise ValueError(msg)
        return self._lower_bounds[i]

    def upper_i(self, i):
        if i < 0 or i > len(self._upper_bounds):
            msg = f"index {i} is out of bounds. Valid values are [0-{len(self._boundaries)}]"
            raise ValueError(msg)
        return self._upper_bounds[i]

    def append(self, instance: Instance, descriptor: Optional[np.ndarray] = None):
        """Inserts an Instance into the Grid

        Args:
            instance (Instance): Instace to be inserted

        Raises:
            TypeError: ``instance`` is not a instance of the class Instance.
        """
        if not isinstance(instance, Instance):
            msg = "Only objects of type Instance can be inserted into a GridArchive"
            raise TypeError(msg)
        descriptor = (
            np.asarray(instance.descriptor) if descriptor is None else descriptor
        )
        index = self.index_of([descriptor])[0]
        if index not in self._grid or instance > self._grid[index]:
            self._grid[index] = descriptor
            self._storage[index] = instance.clone()

    def extend(
        self,
        instances: Sequence[Instance],
        descriptors: Optional[np.ndarray] = None,
        *args,
        **kwargs,
    ):
        """Includes all the instances in iterable into the Grid

        Args:
            iterable (Iterable[Instance]): Iterable of instances
        """
        if not all(isinstance(i, Instance) for i in instances):
            msg = "Only objects of type Instance can be inserted into a GridArchive"
            raise TypeError(msg)
        if descriptors is None:
            try:
                descriptors = np.asarray([i.descriptor for i in instances])
            except AttributeError as e:
                print(
                    "Instances do not have a descriptor yet and the value descriptor is None"
                )
                raise (e)

        indices = self.index_of(descriptors)
        for idx, instance, descriptor in zip(
            indices, instances, descriptors, strict=True
        ):
            if idx not in self._grid or instance.fitness > self._storage[idx].fitness:
                self._storage[idx] = instance.clone()
                self._grid[idx] = descriptor

    def remove(self, descriptors: np.ndarray):
        """Removes all the instances with the matching descriptors in iterable from the grid"""

        indices_to_remove = self.index_of(descriptors)
        for index in indices_to_remove:
            if index in self._grid:
                del self._grid[index]
                del self._storage[index]

    def purge_unfeasible(self, attr: str = "p"):
        """Removes all the unfeasible instances from the grid"""
        keys_to_remove = [
            i for i in self._storage.keys() if getattr(self._storage[i], attr) < 0
        ]
        for i in keys_to_remove:
            del self._grid[i]
            del self._storage[i]

    def index_of(self, descriptors):
        """Computes the indices of a batch of descriptors.

        Args:
            descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

        Raises:
            ValueError: ``descriptors`` is not shape (batch_size, dimensions)

        Returns:
            np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
        """
        if len(descriptors) == 0:
            return np.empty(0)
        descriptors = np.asarray(descriptors)
        if (
            descriptors.ndim == 1
            and descriptors.shape[0] != len(self._dimensions)
            or descriptors.ndim == 2
            and descriptors.shape[1] != len(self._dimensions)
        ):
            raise ValueError(
                f"Expected descriptors to be an array with shape "
                f"(batch_size, dimensions) (i.e. shape "
                f"(batch_size, {len(self._dimensions)})) but it had shape "
                f"{descriptors.shape}"
            )

        grid_indices = (
            (self._dimensions * (descriptors - self._lower_bounds) + self._eps)
            / self._interval
        ).astype(int)

        # Clip the indexes to make sure they are in the expected range for each dimension
        clipped = np.clip(grid_indices, 0, self._dimensions - 1)
        return self._grid_to_int_index(clipped)

    def _grid_to_int_index(self, grid_indices) -> np.ndarray:
        grid_indices = np.asarray(grid_indices)
        if len(self._dimensions) > 64:
            strides = np.cumprod((1,) + tuple(self._dimensions[::-1][:-1]))[::-1]
            # Reshape strides to (1, num_dimensions) to make it broadcastable with grid_indices
            strides = strides.reshape(1, -1)
            flattened_indices = np.sum(grid_indices * strides, axis=1, dtype=object)
        else:
            flattened_indices = np.ravel_multi_index(
                grid_indices.T, self._dimensions
            ).astype(int)
        return flattened_indices

    def int_to_grid_index(self, int_indices) -> np.ndarray:
        int_indices = np.asarray(int_indices)
        if len(self._dimensions) > 64:
            # Manually unravel the index for dimensions > 64
            unravel_indices = []
            remaining_indices = int_indices.astype(object)

            for dim_size in self._dimensions[::-1]:
                unravel_indices.append(remaining_indices % dim_size)
                remaining_indices //= dim_size

            unravel_indices = np.array(unravel_indices[::-1]).T
        else:
            unravel_indices = np.asarray(
                np.unravel_index(
                    int_indices,
                    self._dimensions,
                )
            ).T.astype(int)
        return unravel_indices

    def asdict(self) -> dict:
        return {
            "dimensions": self._dimensions.tolist(),
            "lbs": self._lower_bounds.tolist(),
            "ubs": self._upper_bounds.tolist(),
            "n_cells": self._cells,
            "instances": {
                i: instance.asdict()
                for i, instance in enumerate(self._storage.values())
            },
        }

    def to_json(self) -> str:
        return json.dumps(self.asdict(), indent=4)

`bounds` `property`

list of numpy.ndarray: The boundaries of the cells in each dimension.

Entry i in this list is an array that contains the boundaries of the cells in dimension i. The array contains self.dims[i] + 1 entries laid out like this::

Archive cells:  | 0 | 1 |   ...   |    self.dims[i]    |
boundaries[i]:    0   1   2            self.dims[i] - 1  self.dims[i]

Thus, boundaries[i][j] and boundaries[i][j + 1] are the lower and upper bounds of cell j in dimension i. To access the lower bounds of all the cells in dimension i, use boundaries[i][:-1], and to access all the upper bounds, use boundaries[i][1:].

`coverage` `property`

Get the coverage of the hypercube space. The coverage is calculated has the number of cells filled over the total space available.

Returns:	`float` – Filled cells over the total available.

`getitem(key)`

Returns a dictionary with the descriptors as the keys. The values are the instances found. Note that some of the given keys may not be in the archive.

Parameters:	`key` (`array - like or descriptor`) – Descriptors of the instances that want to retrieve. `Valid examples are` –

Raises:	`TypeError` – If the key is an slice. Not allowed. `ValueError` – If the shape of the keys are not valid.

Returns:	`dict` – Returns a dict with the found instances.

Source code in digneapy/archives/_grid_archive.py

def __getitem__(self, key):
    """Returns a dictionary with the descriptors as the keys. The values are the instances found.
    Note that some of the given keys may not be in the archive.

    Args:
        key (array-like or descriptor): Descriptors of the instances that want to retrieve.
        Valid examples are:
        -   archive[[0,11], [0,5]] --> Get the instances with the descriptors (0,11) and (0, 5)
        -   archive[0,11] --> Get the instances at indices 0 and 11

    Raises:
        TypeError: If the key is an slice. Not allowed.
        ValueError: If the shape of the keys are not valid.

    Returns:
        dict: Returns a dict with the found instances.
    """
    if isinstance(key, slice):
        raise TypeError(
            "Slicing is not available in GridArchive. Use 1D index or descriptor-type indices"
        )
    descriptors = np.asarray(key)
    if descriptors.ndim == 1:
        indices = descriptors
    elif descriptors.ndim == 2 and descriptors.shape[1] == len(self._dimensions):
        indices = self.index_of(descriptors).tolist()
    else:
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, dimensions) (i.e. shape "
            f"(batch_size, {len(self._dimensions)})) but it had shape "
            f"{descriptors.shape}"
        )
    if isinstance(indices, int):
        indices = [indices]

    instances = [self._storage[idx] for idx in indices]
    return instances

`init(dimensions, ranges, instances=None, eps=1e-06, dtype=np.float64)`

Creates a GridArchive instance

Parameters:

dimensions (Sequence[int]) –

(array-like of int): Number of cells in each dimension of the
ranges (Sequence[Tuple[float]]) –

(array-like of (float, float)): Upper and lower bound of each
indicates the first dimension should have bounds –

math:[-1,1]
–

math:[-2,2] (inclusive). ranges should be the same length as
instances (Optional[Iterable[Instance]], default: None ) –

Instances to pre-initialise the archive. Defaults to None.
eps (float, default: 1e-06 ) –

Due to floating point precision errors, we add a small
epsilon when computing the archive indices in the –

meth:index_of
dtype (str or data - type, default: float64 ) –

Data type of the solutions, objectives,

Raises:	`ValueError` – `dimensions` and `ranges` are not the same length

Source code in digneapy/archives/_grid_archive.py

def __init__(
    self,
    dimensions: Sequence[int],
    ranges: Sequence[Tuple[float, float]],
    instances: Optional[Iterable[Instance]] = None,
    eps: float = 1e-6,
    dtype=np.float64,
):
    """Creates a GridArchive instance

    Args:
        dimensions (Sequence[int]): (array-like of int): Number of cells in each dimension of the
        measure space, e.g. ``[20, 30, 40]`` indicates there should be 3
        dimensions with 20, 30, and 40 cells. (The number of dimensions is
        implicitly defined in the length of this argument).
        ranges (Sequence[Tuple[float]]): (array-like of (float, float)): Upper and lower bound of each
        dimension of the measure space, e.g. ``[(-1, 1), (-2, 2)]``
        indicates the first dimension should have bounds :math:`[-1,1]`
        (inclusive), and the second dimension should have bounds
        :math:`[-2,2]` (inclusive). ``ranges`` should be the same length as
        ``dims``.
        instances (Optional[Iterable[Instance]], optional): Instances to pre-initialise the archive. Defaults to None.
        eps (float, optional): Due to floating point precision errors, we add a small
        epsilon when computing the archive indices in the :meth:`index_of`
        method -- refer to the implementation `here. Defaults to 1e-6.
        dtype(str or data-type): Data type of the solutions, objectives,
        and measures.

    Raises:
        ValueError: ``dimensions`` and ``ranges`` are not the same length
    """
    Archive.__init__(self, threshold=np.finfo(np.float32).max, dtype=dtype)
    if len(ranges) == 0 or len(dimensions) == 0:
        raise ValueError("dimensions and ranges must have length >= 1")
    if len(ranges) != len(dimensions):
        raise ValueError(
            f"len(dimensions) = {len(dimensions)} != len(ranges) = {len(ranges)} in GridArchive.__init__()"
        )

    self._dimensions = np.asarray(dimensions)
    ranges = list(zip(*ranges))
    self._lower_bounds = np.array(ranges[0], dtype=dtype)
    self._upper_bounds = np.array(ranges[1], dtype=dtype)
    self._interval = self._upper_bounds - self._lower_bounds
    self._eps = eps
    self._cells = np.prod(self._dimensions, dtype=object)

    self._grid: Dict[int, np.ndarray] = {}
    self._storage: Dict[int, Instance] = {}

    _bounds = []
    for dimension, l_b, u_b in zip(
        self._dimensions, self._lower_bounds, self._upper_bounds
    ):
        _bounds.append(np.linspace(l_b, u_b, dimension))

    self._boundaries = np.asarray(_bounds)

    if instances is not None:
        self.extend(instances)

`iter()`

Iterates over the dictionary of instances

Returns:	`Iterator` – Yields position in the hypercube and instance located in such position

Source code in digneapy/archives/_grid_archive.py

def __iter__(self):
    """Iterates over the dictionary of instances

    Returns:
        Iterator: Yields position in the hypercube and instance located in such position
    """
    return iter(self._storage.values())

`append(instance, descriptor=None)`

Inserts an Instance into the Grid

Parameters:	`instance` (`Instance`) – Instace to be inserted

Raises:	`TypeError` – `instance` is not a instance of the class Instance.

Source code in digneapy/archives/_grid_archive.py

def append(self, instance: Instance, descriptor: Optional[np.ndarray] = None):
    """Inserts an Instance into the Grid

    Args:
        instance (Instance): Instace to be inserted

    Raises:
        TypeError: ``instance`` is not a instance of the class Instance.
    """
    if not isinstance(instance, Instance):
        msg = "Only objects of type Instance can be inserted into a GridArchive"
        raise TypeError(msg)
    descriptor = (
        np.asarray(instance.descriptor) if descriptor is None else descriptor
    )
    index = self.index_of([descriptor])[0]
    if index not in self._grid or instance > self._grid[index]:
        self._grid[index] = descriptor
        self._storage[index] = instance.clone()

`extend(instances, descriptors=None, *args, **kwargs)`

Includes all the instances in iterable into the Grid

Parameters:	`iterable` (`Iterable[Instance]`) – Iterable of instances

Source code in digneapy/archives/_grid_archive.py

def extend(
    self,
    instances: Sequence[Instance],
    descriptors: Optional[np.ndarray] = None,
    *args,
    **kwargs,
):
    """Includes all the instances in iterable into the Grid

    Args:
        iterable (Iterable[Instance]): Iterable of instances
    """
    if not all(isinstance(i, Instance) for i in instances):
        msg = "Only objects of type Instance can be inserted into a GridArchive"
        raise TypeError(msg)
    if descriptors is None:
        try:
            descriptors = np.asarray([i.descriptor for i in instances])
        except AttributeError as e:
            print(
                "Instances do not have a descriptor yet and the value descriptor is None"
            )
            raise (e)

    indices = self.index_of(descriptors)
    for idx, instance, descriptor in zip(
        indices, instances, descriptors, strict=True
    ):
        if idx not in self._grid or instance.fitness > self._storage[idx].fitness:
            self._storage[idx] = instance.clone()
            self._grid[idx] = descriptor

`index_of(descriptors)`

Computes the indices of a batch of descriptors.

Parameters:	`descriptors` (`array - like`) – (batch_size, dimensions) array of descriptors for each instance

Raises:	`ValueError` – `descriptors` is not shape (batch_size, dimensions)

Returns:	– np.ndarray: (batch_size, ) array of integer indices representing the flattened grid coordinates.

Source code in digneapy/archives/_grid_archive.py

def index_of(self, descriptors):
    """Computes the indices of a batch of descriptors.

    Args:
        descriptors (array-like): (batch_size, dimensions) array of descriptors for each instance

    Raises:
        ValueError: ``descriptors`` is not shape (batch_size, dimensions)

    Returns:
        np.ndarray:  (batch_size, ) array of integer indices representing the flattened grid coordinates.
    """
    if len(descriptors) == 0:
        return np.empty(0)
    descriptors = np.asarray(descriptors)
    if (
        descriptors.ndim == 1
        and descriptors.shape[0] != len(self._dimensions)
        or descriptors.ndim == 2
        and descriptors.shape[1] != len(self._dimensions)
    ):
        raise ValueError(
            f"Expected descriptors to be an array with shape "
            f"(batch_size, dimensions) (i.e. shape "
            f"(batch_size, {len(self._dimensions)})) but it had shape "
            f"{descriptors.shape}"
        )

    grid_indices = (
        (self._dimensions * (descriptors - self._lower_bounds) + self._eps)
        / self._interval
    ).astype(int)

    # Clip the indexes to make sure they are in the expected range for each dimension
    clipped = np.clip(grid_indices, 0, self._dimensions - 1)
    return self._grid_to_int_index(clipped)

`purge_unfeasible(attr='p')`

Removes all the unfeasible instances from the grid

Source code in digneapy/archives/_grid_archive.py

def purge_unfeasible(self, attr: str = "p"):
    """Removes all the unfeasible instances from the grid"""
    keys_to_remove = [
        i for i in self._storage.keys() if getattr(self._storage[i], attr) < 0
    ]
    for i in keys_to_remove:
        del self._grid[i]
        del self._storage[i]

`remove(descriptors)`

Removes all the instances with the matching descriptors in iterable from the grid

Source code in digneapy/archives/_grid_archive.py

def remove(self, descriptors: np.ndarray):
    """Removes all the instances with the matching descriptors in iterable from the grid"""

    indices_to_remove = self.index_of(descriptors)
    for index in indices_to_remove:
        if index in self._grid:
            del self._grid[index]
            del self._storage[index]

GridArchive

bounds property

coverage property

__getitem__(key)

__init__(dimensions, ranges, instances=None, eps=1e-06, dtype=np.float64)

__iter__()

append(instance, descriptor=None)

extend(instances, descriptors=None, *args, **kwargs)

index_of(descriptors)

purge_unfeasible(attr='p')

remove(descriptors)

`GridArchive`

`bounds` `property`

`coverage` `property`

`getitem(key)`

`init(dimensions, ranges, instances=None, eps=1e-06, dtype=np.float64)`

`iter()`

`append(instance, descriptor=None)`

`extend(instances, descriptors=None, *args, **kwargs)`

`index_of(descriptors)`

`purge_unfeasible(attr='p')`

`remove(descriptors)`