Updated bash scripts. Added background.txt file. Appended dataset file with optional OpenML caching
This commit is contained in:
@@ -1,5 +1,10 @@
|
|||||||
https://gitlab.com/university-of-prince-edward-isalnd/explanation-aware-optimization-and-automl/-/tree/main/src?ref_type=heads
|
https://gitlab.com/university-of-prince-edward-isalnd/explanation-aware-optimization-and-automl/-/tree/main/src?ref_type=heads
|
||||||
|
|
||||||
|
Operation:
|
||||||
|
Specify working directory (local repo location), cache directory (dataset download location), and
|
||||||
|
|
||||||
|
|
||||||
|
$WORK_DIR=
|
||||||
|
|
||||||
|
|
||||||
############################################################################################################################################################
|
############################################################################################################################################################
|
||||||
|
|||||||
12
setup.sh
Normal file
12
setup.sh
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
sudo apt install nfs-common -y
|
||||||
|
|
||||||
|
mkdir /mnt/data
|
||||||
|
|
||||||
|
mount 192.168.2.69:/mnt/user/ml_datasets0 /mnt/data
|
||||||
|
|
||||||
|
$WORK_DIR=/mnt/data
|
||||||
|
|
||||||
|
mkdir -p /mnt/data/cache # ensure directory exists
|
||||||
|
export OPENML_CACHE_DIR=/mnt/data/cache
|
||||||
50
src/dataset.py
Normal file
50
src/dataset.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import openml
|
||||||
|
|
||||||
|
# --- CACHE SETUP ---
|
||||||
|
# Change this path to your preferred local cache directory
|
||||||
|
#CACHE_DIR = os.path.expanduser("~/openml_cache")
|
||||||
|
#os.makedirs(CACHE_DIR, exist_ok=True)
|
||||||
|
#openml.config.cache_directory = CACHE_DIR
|
||||||
|
|
||||||
|
# OpenML CC18 classification tasks (task ids)
|
||||||
|
TASKS = {
|
||||||
|
"adult": 7592, # Adult Income classification
|
||||||
|
"spambase": 43, # Spambase classification
|
||||||
|
"optdigits": 28, # Optdigits classification
|
||||||
|
}
|
||||||
|
|
||||||
|
# Regression dataset (dataset id)
|
||||||
|
DATASETS = {
|
||||||
|
"cal_housing": 44025
|
||||||
|
}
|
||||||
|
|
||||||
|
def _load_task_dataframe(task_id: int):
|
||||||
|
task = openml.tasks.get_task(task_id)
|
||||||
|
dataset_id = task.dataset_id
|
||||||
|
dataset = openml.datasets.get_dataset(dataset_id)
|
||||||
|
X, y, categorical_indicator, _ = dataset.get_data(
|
||||||
|
dataset_format="dataframe",
|
||||||
|
target=task.target_name
|
||||||
|
)
|
||||||
|
# drop rows with NA target if any
|
||||||
|
if isinstance(y, pd.Series):
|
||||||
|
mask = ~y.isna()
|
||||||
|
X, y = X.loc[mask], y.loc[mask]
|
||||||
|
return X, y
|
||||||
|
|
||||||
|
def load_dataset(name: str):
|
||||||
|
if name in TASKS:
|
||||||
|
X, y = _load_task_dataframe(TASKS[name])
|
||||||
|
return X, y, "classification"
|
||||||
|
elif name in DATASETS:
|
||||||
|
ds_id = DATASETS[name]
|
||||||
|
ds = openml.datasets.get_dataset(ds_id)
|
||||||
|
X, y, categorical_indicator, _ = ds.get_data(
|
||||||
|
dataset_format="dataframe", target=ds.default_target_attribute
|
||||||
|
)
|
||||||
|
mask = ~y.isna()
|
||||||
|
return X.loc[mask], y.loc[mask], "regression"
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown dataset {name}")
|
||||||
@@ -8,16 +8,16 @@
|
|||||||
#SBATCH --error=%x-%j.err # Separate error log
|
#SBATCH --error=%x-%j.err # Separate error log
|
||||||
|
|
||||||
# Load necessary modules
|
# Load necessary modules
|
||||||
module load python/3.8
|
#module load python/3.8
|
||||||
|
|
||||||
# Activate your virtual environment
|
# Activate your virtual environment
|
||||||
source ~/envs/workdir/bin/activate
|
source /env0/bin/activate
|
||||||
|
|
||||||
# Parameters
|
# Parameters
|
||||||
TIME=$1
|
TIME=$1
|
||||||
|
|
||||||
# Run the Python script with the specified time parameter
|
# Run the Python script with the specified time parameter
|
||||||
srun python /home/dvera/scratch/Framework_EXP/grid_search_exp.py --time $TIME
|
srun python $WORK_DIR/src/grid_search_exp.py --time $TIME
|
||||||
|
|
||||||
# Deactivate the virtual environment
|
# Deactivate the virtual environment
|
||||||
deactivate
|
deactivate
|
||||||
|
|||||||
@@ -8,13 +8,13 @@
|
|||||||
#SBATCH --error=%x-%j.err # Separate error log
|
#SBATCH --error=%x-%j.err # Separate error log
|
||||||
|
|
||||||
# Load necessary modules
|
# Load necessary modules
|
||||||
module load python/3.8
|
#module load python/3.8
|
||||||
|
|
||||||
# Activate your virtual environment
|
# Activate your virtual environment
|
||||||
source ~/envs/workdir/bin/activate
|
source /env0/bin/activate
|
||||||
|
|
||||||
# Run the Python script with the specified time parameter
|
# Run the Python script with the specified time parameter
|
||||||
srun python /home/dvera/scratch/Framework_EXP/nsga_exp.py
|
srun python $WORK_DIR/src/nsga_exp.py
|
||||||
|
|
||||||
# Deactivate the virtual environment
|
# Deactivate the virtual environment
|
||||||
deactivate
|
deactivate
|
||||||
|
|||||||
Reference in New Issue
Block a user