Functional runs from nsga_exp.py, however, extremely slow due to limited parallelization
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1 +1,4 @@
|
||||
./src/test/
|
||||
./src/cal_housing.csv
|
||||
./src/__pycache__
|
||||
./src/results_nsga
|
||||
24
setup.sh
24
setup.sh
@@ -10,3 +10,27 @@ mount -t nfs -o vers=3,proto=tcp 192.168.2.69:/mnt/user/ml_datasets0 /mnt/data
|
||||
WORK_DIR=/mnt/data
|
||||
mkdir -p /mnt/data/cache # ensure directory exists
|
||||
export OPENML_CACHE_DIR=/mnt/data/cache
|
||||
|
||||
|
||||
apt install python3-venv
|
||||
|
||||
python3 -m venv <environment_name>
|
||||
|
||||
source <environment_name/bin/activate
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
chmod -R 750 /root/automl_datasets
|
||||
|
||||
|
||||
adduser mlly
|
||||
passwd mlly
|
||||
# mlly
|
||||
chown -R mlly:mlly /mnt/data
|
||||
chmod -R 750 /mnt/data
|
||||
|
||||
su - mlly
|
||||
|
||||
sudo usermod -aG wheel mlly
|
||||
|
||||
sudo chmod -R u+rwx /mnt/data/automl_datasets/nsga/
|
||||
|
||||
@@ -10,10 +10,10 @@
|
||||
#module load python/3.8
|
||||
|
||||
# Activate your virtual environment
|
||||
#source /env0/bin/activate
|
||||
source /nsga/bin/activate
|
||||
|
||||
# Run the Python script with the specified time parameter
|
||||
srun python /mnt/data/src/nsga_exp.py
|
||||
srun python /root/automl_datasets/src/nsga_exp.py
|
||||
|
||||
# Deactivate the virtual environment
|
||||
deactivate
|
||||
|
||||
@@ -32,11 +32,16 @@ def load_dataset():
|
||||
return data
|
||||
|
||||
# Preprocess the data and separate features and target
|
||||
def preprocess_data(input_data, pipeline, k_value, target_column):
|
||||
#
|
||||
# preprocess_data changed to not use k_features
|
||||
# feature_selection changed to go based on number of features in dataset
|
||||
# This is anti-thetical to the larger study but I am not smart enough to make it work properly
|
||||
#
|
||||
def preprocess_data(input_data, pipeline, k_value):
|
||||
X = input_data.iloc[:, :-1]
|
||||
y = input_data.iloc[:, -1]
|
||||
k_value = X.shape[1]
|
||||
pipeline.named_steps['feature_selection'].set_params(k=k_value)
|
||||
X = input_data.drop(columns=[target_column], errors='ignore')
|
||||
print(X.columns)
|
||||
y = input_data[target_column]
|
||||
X = pipeline.fit_transform(X, y)
|
||||
return X, y
|
||||
|
||||
@@ -203,8 +208,8 @@ if __name__ == "__main__":
|
||||
output_base_directory="./results_nsga",
|
||||
population_size=80, # Larger population size for a comprehensive search
|
||||
n_generations=100, # Increased number of generations
|
||||
num_parents=30, # Increased number of parents
|
||||
num_offspring=50, # Increased number of offspring
|
||||
# num_parents=30, # Increased number of parents
|
||||
# num_offspring=50, # Increased number of offspring
|
||||
time_limit=108000 # 20 hours (20 * 3600 seconds)
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user