diff --git a/src/nsga_exp.py b/src/nsga_exp.py index 3c77629..93b9606 100644 --- a/src/nsga_exp.py +++ b/src/nsga_exp.py @@ -13,6 +13,7 @@ import shap from deap import base, creator, tools, algorithms from algorithms import lasso, random_forest, gradient_boosting, decision_tree_regressor, ridge_regressor, stacking_lasso +from multiprocessing import Pool, cpu_count import argparse creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0)) # Minimize both objectives @@ -162,28 +163,33 @@ def run_nsga_experiment(output_base_directory="./results_nsga", start_time = time.time() pop = toolbox.population(n=population_size) - # Evaluate the initial population - fits = toolbox.map(toolbox.evaluate, pop) +# Use a Pool to parallelize evaluations +with Pool(processes=cpu_count()) as pool: + # Evaluate the initial population in parallel + fits = list(pool.map(toolbox.evaluate, pop)) for fit, ind in zip(fits, pop): ind.fitness.values = fit[:2] ind.shap_values = fit[2] - # Use the number of parents and offspring in the evolution process + # Evolution loop for gen in range(n_generations): if time.time() - start_time > time_limit: print("Time limit exceeded, stopping evolution.") break offspring = algorithms.varAnd(pop, toolbox, cxpb=0.7, mutpb=0.2) - fits = toolbox.map(toolbox.evaluate, offspring) + # Evaluate offspring in parallel + fits = list(pool.map(toolbox.evaluate, offspring)) for fit, ind in zip(fits, offspring): - ind.fitness.values = fit[:2] # Ensure only the first two values are assigned to fitness + ind.fitness.values = fit[:2] ind.shap_values = fit[2] - # Select the next generation of parents from the combined pool of parents and offspring + # Select next generation pop = toolbox.select(pop + offspring, k=population_size) + + for ind in pop: for fold_idx, shap_values in enumerate(ind.shap_values): shap_output_path = os.path.join(output_directory, f"shap_values_{int(ind[0])}_fold_{fold_idx + 1}.npy")