@File : init.py @Time : 2024/06/14 13:45:31 @Author : Alejandro Marrero @Version : 1.0 @Contact : amarrerd@ull.edu.es @License : (C)Copyright 2024, Alejandro Marrero @Desc : None

save_results_to_files(filename_pattern, result, only_instances=True, only_genotypes=False, solvers_names=None, features_names=None, vars_names=None, files_format='parquet')

Saves the results of the generation to CSV files. Args: filename_pattern (str): Pattern for the filenames. result (GenResult): Result of the generation. only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files. only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores). solvers_names (Sequence[str]): Names of the solvers. features_names (Sequence[str]): Names of the features. vars_names (Sequence[str]): Names of the variables. files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file. Parquet is the most efficient for large datasets.

Source code in digneapy/utils/save_data.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def save_results_to_files(
    filename_pattern: str,
    result: GenResult,
    only_instances: bool = True,
    only_genotypes: bool = False,
    solvers_names: Optional[Sequence[str]] = None,
    features_names: Optional[Sequence[str]] = None,
    vars_names: Optional[Sequence[str]] = None,
    files_format: Literal["csv", "parquet"] = "parquet",
):
    """Saves the results of the generation to CSV files.
    Args:
        filename_pattern (str): Pattern for the filenames.
        result (GenResult): Result of the generation.
        only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files.
        only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores).
        solvers_names (Sequence[str]): Names of the solvers.
        features_names (Sequence[str]): Names of the features.
        vars_names (Sequence[str]): Names of the variables.
        files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file.
            Parquet is the most efficient for large datasets.
    """
    if files_format not in ("csv", "parquet"):
        print(f"Unrecognised file format: {files_format}. Selecting parquet.")
        files_format = "parquet"
    df = pd.DataFrame(
        [
            i.to_series(
                only_genotype=only_genotypes,
                variables_names=vars_names,
                features_names=features_names,
                score_names=solvers_names,
            )
            for i in result.instances
        ]
    )
    if not df.empty:
        df.insert(0, "target", result.target)
        if files_format == "csv":
            df.to_csv(f"{filename_pattern}_instances.csv", index=False)
        elif files_format == "parquet":
            df.to_parquet(f"{filename_pattern}_instances.parquet", index=False)

    if not only_instances:
        result.history.to_df().to_csv(f"{filename_pattern}_history.csv", index=False)
        if result.metrics is not None:
            result.metrics.to_csv(f"{filename_pattern}_archive_metrics.csv")

sort_knapsack_instances(instances)

sort_knapsack_instances(
    instances: np.ndarray,
) -> np.ndarray
sort_knapsack_instances(
    instances: Sequence[Instance],
) -> List[Instance]

Sorts a collection of Knapsack Instances Genotypes based on lexicograph order by (w_i, p_i)

Parameters:
  • instances (ndarray | Sequence[Instance]) –

    Instances to sort

Raises:
  • ValueError

    If the dimension of the genotypes (minus Q) is not even. Note that KP instances should contain N pairs of values plus the capacity.

Returns:
  • ndarray | List[Instance]

    np.ndarray | Sequence[Instance]: Sorted instances

Source code in digneapy/utils/sorting.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def sort_knapsack_instances(
    instances: np.ndarray | Sequence[Instance],
) -> np.ndarray | List[Instance]:
    """Sorts a collection of Knapsack Instances Genotypes based on lexicograph order by (w_i, p_i)

    Args:
        instances (np.ndarray | Sequence[Instance]): Instances to sort

    Raises:
        ValueError: If the dimension of the genotypes (minus Q) is not even. Note that KP instances should contain N pairs of values plus the capacity.

    Returns:
        np.ndarray | Sequence[Instance]: Sorted instances
    """
    genotypes = np.empty(0)
    if isinstance(instances, np.ndarray) and (instances.shape[1] - 1) % 2 != 0:
        raise ValueError(
            f"Something is wrong with these KP instances. Shape 1 should be even and got {instances.shape[1]}"
        )
    elif dimension := (len(instances[0]) - 1) % 2 != 0:
        raise ValueError(
            f"Something is wrong with these KP instances. Shape 1 should be even and got {dimension}"
        )

    genotypes = np.asarray(instances, copy=True)
    M, N = genotypes.shape

    pairs = genotypes[:, 1:].reshape(M, -1, 2)
    order = np.lexsort((pairs[:, :, 1], pairs[:, :, 0]), axis=1)
    sorted_pairs = np.take_along_axis(pairs, order[:, :, None], axis=1)
    genotypes[:, 1:] = sorted_pairs.reshape(M, -1)

    if isinstance(instances, np.ndarray):
        return genotypes
    else:
        return [
            instances[i].clone_with(variables=genotypes[i])
            for i in range(len(instances))
        ]

to_json(obj)

Convert an object to a JSON string.

Source code in digneapy/utils/serializer.py
75
76
77
78
79
def to_json(obj):
    """
    Convert an object to a JSON string.
    """
    return json.dumps(serialize(obj), cls=CustomJSONEncoder, indent=4)