Index

`save_results_to_files(filename_pattern, result, only_instances=True, only_genotypes=False, solvers_names=None, features_names=None, vars_names=None, files_format='parquet')`

Saves the results of the generation to CSV files. Args: filename_pattern (str): Pattern for the filenames. result (GenResult): Result of the generation. only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files. only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores). solvers_names (Sequence[str]): Names of the solvers. features_names (Sequence[str]): Names of the features. vars_names (Sequence[str]): Names of the variables. files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file. Parquet is the most efficient for large datasets.

Source code in digneapy/utils/save_data.py

def save_results_to_files(
    filename_pattern: str,
    result: GenResult,
    only_instances: bool = True,
    only_genotypes: bool = False,
    solvers_names: Optional[Sequence[str]] = None,
    features_names: Optional[Sequence[str]] = None,
    vars_names: Optional[Sequence[str]] = None,
    files_format: Literal["csv", "parquet"] = "parquet",
):
    """Saves the results of the generation to CSV files.
    Args:
        filename_pattern (str): Pattern for the filenames.
        result (GenResult): Result of the generation.
        only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files.
        only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores).
        solvers_names (Sequence[str]): Names of the solvers.
        features_names (Sequence[str]): Names of the features.
        vars_names (Sequence[str]): Names of the variables.
        files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file.
            Parquet is the most efficient for large datasets.
    """
    if files_format not in ("csv", "parquet"):
        print(f"Unrecognised file format: {files_format}. Selecting parquet.")
        files_format = "parquet"
    df = pd.DataFrame(
        [
            i.to_series(
                only_genotype=only_genotypes,
                variables_names=vars_names,
                features_names=features_names,
                score_names=solvers_names,
            )
            for i in result.instances
        ]
    )
    if not df.empty:
        df.insert(0, "target", result.target)
        if files_format == "csv":
            df.to_csv(f"{filename_pattern}_instances.csv", index=False)
        elif files_format == "parquet":
            df.to_parquet(f"{filename_pattern}_instances.parquet", index=False)

    if not only_instances:
        result.history.to_df().to_csv(f"{filename_pattern}_history.csv", index=False)
        if result.metrics is not None:
            result.metrics.to_csv(f"{filename_pattern}_archive_metrics.csv")

`sort_knapsack_instances(instances)`

sort_knapsack_instances(
    instances: np.ndarray,
) -> np.ndarray

sort_knapsack_instances(
    instances: Sequence[Instance],
) -> List[Instance]

Sorts a collection of Knapsack Instances Genotypes based on lexicograph order by (w_i, p_i)

Parameters:	`instances` (`ndarray \| Sequence[Instance]`) – Instances to sort

Raises:	`ValueError` – If the dimension of the genotypes (minus Q) is not even. Note that KP instances should contain N pairs of values plus the capacity.

Returns:	`ndarray \| List[Instance]` – np.ndarray \| Sequence[Instance]: Sorted instances

Source code in digneapy/utils/sorting.py

def sort_knapsack_instances(
    instances: np.ndarray | Sequence[Instance],
) -> np.ndarray | List[Instance]:
    """Sorts a collection of Knapsack Instances Genotypes based on lexicograph order by (w_i, p_i)

    Args:
        instances (np.ndarray | Sequence[Instance]): Instances to sort

    Raises:
        ValueError: If the dimension of the genotypes (minus Q) is not even. Note that KP instances should contain N pairs of values plus the capacity.

    Returns:
        np.ndarray | Sequence[Instance]: Sorted instances
    """
    genotypes = np.empty(0)
    if isinstance(instances, np.ndarray) and (instances.shape[1] - 1) % 2 != 0:
        raise ValueError(
            f"Something is wrong with these KP instances. Shape 1 should be even and got {instances.shape[1]}"
        )
    elif dimension := (len(instances[0]) - 1) % 2 != 0:
        raise ValueError(
            f"Something is wrong with these KP instances. Shape 1 should be even and got {dimension}"
        )

    genotypes = np.asarray(instances, copy=True)
    M, N = genotypes.shape

    pairs = genotypes[:, 1:].reshape(M, -1, 2)
    order = np.lexsort((pairs[:, :, 1], pairs[:, :, 0]), axis=1)
    sorted_pairs = np.take_along_axis(pairs, order[:, :, None], axis=1)
    genotypes[:, 1:] = sorted_pairs.reshape(M, -1)

    if isinstance(instances, np.ndarray):
        return genotypes
    else:
        return [
            instances[i].clone_with(variables=genotypes[i])
            for i in range(len(instances))
        ]

`to_json(obj)`

Convert an object to a JSON string.

Source code in digneapy/utils/serializer.py

def to_json(obj):
    """
    Convert an object to a JSON string.
    """
    return json.dumps(serialize(obj), cls=CustomJSONEncoder, indent=4)