Save data

`save_results_to_files(filename_pattern, result, only_instances=True, only_genotypes=False, solvers_names=None, features_names=None, vars_names=None, files_format='parquet')`

Saves the results of the generation to CSV files. Args: filename_pattern (str): Pattern for the filenames. result (GenResult): Result of the generation. only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files. only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores). solvers_names (Sequence[str]): Names of the solvers. features_names (Sequence[str]): Names of the features. vars_names (Sequence[str]): Names of the variables. files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file. Parquet is the most efficient for large datasets.

Source code in digneapy/utils/save_data.py

def save_results_to_files(
    filename_pattern: str,
    result: GenResult,
    only_instances: bool = True,
    only_genotypes: bool = False,
    solvers_names: Optional[Sequence[str]] = None,
    features_names: Optional[Sequence[str]] = None,
    vars_names: Optional[Sequence[str]] = None,
    files_format: Literal["csv", "parquet"] = "parquet",
):
    """Saves the results of the generation to CSV files.
    Args:
        filename_pattern (str): Pattern for the filenames.
        result (GenResult): Result of the generation.
        only_instances (bool): Generate only the files with the resulting instances. Default True. If False, it would generate an history and arhice_metrics files.
        only_genotypes (bool): Extract only the genotype of each instance. Default False (extracts features and portfolio scores).
        solvers_names (Sequence[str]): Names of the solvers.
        features_names (Sequence[str]): Names of the features.
        vars_names (Sequence[str]): Names of the variables.
        files_format (Literal[str] = "csv" or "parquet"): Format to store the resulting instances file.
            Parquet is the most efficient for large datasets.
    """
    if files_format not in ("csv", "parquet"):
        print(f"Unrecognised file format: {files_format}. Selecting parquet.")
        files_format = "parquet"
    df = pd.DataFrame(
        [
            i.to_series(
                only_genotype=only_genotypes,
                variables_names=vars_names,
                features_names=features_names,
                score_names=solvers_names,
            )
            for i in result.instances
        ]
    )
    if not df.empty:
        df.insert(0, "target", result.target)
        if files_format == "csv":
            df.to_csv(f"{filename_pattern}_instances.csv", index=False)
        elif files_format == "parquet":
            df.to_parquet(f"{filename_pattern}_instances.parquet", index=False)

    if not only_instances:
        result.history.to_df().to_csv(f"{filename_pattern}_history.csv", index=False)
        if result.metrics is not None:
            result.metrics.to_csv(f"{filename_pattern}_archive_metrics.csv")