Saves the results of the generation to CSV files.
Args:
filename_pattern (str): Pattern for the filenames.
result (GenResult): Result of the generation.
only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files.
only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores).
solvers_names (Sequence[str]): Names of the solvers.
features_names (Sequence[str]): Names of the features.
vars_names (Sequence[str]): Names of the variables.
files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file.
Parquet is the most efficient for large datasets.
Source code in digneapy/utils/save_data.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 | def save_results_to_files(
filename_pattern: str,
result: GenResult,
only_instances: bool = True,
only_genotypes: bool = False,
solvers_names: Optional[Sequence[str]] = None,
features_names: Optional[Sequence[str]] = None,
vars_names: Optional[Sequence[str]] = None,
files_format: Literal["csv", "parquet"] = "parquet",
):
"""Saves the results of the generation to CSV files.
Args:
filename_pattern (str): Pattern for the filenames.
result (GenResult): Result of the generation.
only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files.
only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores).
solvers_names (Sequence[str]): Names of the solvers.
features_names (Sequence[str]): Names of the features.
vars_names (Sequence[str]): Names of the variables.
files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file.
Parquet is the most efficient for large datasets.
"""
if files_format not in ("csv", "parquet"):
print(f"Unrecognised file format: {files_format}. Selecting parquet.")
files_format = "parquet"
df = pd.DataFrame(
[
i.to_series(
only_genotype=only_genotypes,
variables_names=vars_names,
features_names=features_names,
score_names=solvers_names,
)
for i in result.instances
]
)
if not df.empty:
df.insert(0, "target", result.target)
if files_format == "csv":
df.to_csv(f"{filename_pattern}_instances.csv", index=False)
elif files_format == "parquet":
df.to_parquet(f"{filename_pattern}_instances.parquet", index=False)
if not only_instances:
result.history.to_df().to_csv(f"{filename_pattern}_history.csv", index=False)
if result.metrics is not None:
result.metrics.to_csv(f"{filename_pattern}_archive_metrics.csv")
|