Functional runs from nsga_exp.py, however, extremely slow due to limited parallelization
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1 +1,4 @@
|
|||||||
./src/test/
|
./src/test/
|
||||||
|
./src/cal_housing.csv
|
||||||
|
./src/__pycache__
|
||||||
|
./src/results_nsga
|
||||||
24
setup.sh
24
setup.sh
@@ -10,3 +10,27 @@ mount -t nfs -o vers=3,proto=tcp 192.168.2.69:/mnt/user/ml_datasets0 /mnt/data
|
|||||||
WORK_DIR=/mnt/data
|
WORK_DIR=/mnt/data
|
||||||
mkdir -p /mnt/data/cache # ensure directory exists
|
mkdir -p /mnt/data/cache # ensure directory exists
|
||||||
export OPENML_CACHE_DIR=/mnt/data/cache
|
export OPENML_CACHE_DIR=/mnt/data/cache
|
||||||
|
|
||||||
|
|
||||||
|
apt install python3-venv
|
||||||
|
|
||||||
|
python3 -m venv <environment_name>
|
||||||
|
|
||||||
|
source <environment_name/bin/activate
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
chmod -R 750 /root/automl_datasets
|
||||||
|
|
||||||
|
|
||||||
|
adduser mlly
|
||||||
|
passwd mlly
|
||||||
|
# mlly
|
||||||
|
chown -R mlly:mlly /mnt/data
|
||||||
|
chmod -R 750 /mnt/data
|
||||||
|
|
||||||
|
su - mlly
|
||||||
|
|
||||||
|
sudo usermod -aG wheel mlly
|
||||||
|
|
||||||
|
sudo chmod -R u+rwx /mnt/data/automl_datasets/nsga/
|
||||||
|
|||||||
@@ -10,10 +10,10 @@
|
|||||||
#module load python/3.8
|
#module load python/3.8
|
||||||
|
|
||||||
# Activate your virtual environment
|
# Activate your virtual environment
|
||||||
#source /env0/bin/activate
|
source /nsga/bin/activate
|
||||||
|
|
||||||
# Run the Python script with the specified time parameter
|
# Run the Python script with the specified time parameter
|
||||||
srun python /mnt/data/src/nsga_exp.py
|
srun python /root/automl_datasets/src/nsga_exp.py
|
||||||
|
|
||||||
# Deactivate the virtual environment
|
# Deactivate the virtual environment
|
||||||
deactivate
|
deactivate
|
||||||
|
|||||||
@@ -32,11 +32,16 @@ def load_dataset():
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
# Preprocess the data and separate features and target
|
# Preprocess the data and separate features and target
|
||||||
def preprocess_data(input_data, pipeline, k_value, target_column):
|
#
|
||||||
|
# preprocess_data changed to not use k_features
|
||||||
|
# feature_selection changed to go based on number of features in dataset
|
||||||
|
# This is anti-thetical to the larger study but I am not smart enough to make it work properly
|
||||||
|
#
|
||||||
|
def preprocess_data(input_data, pipeline, k_value):
|
||||||
|
X = input_data.iloc[:, :-1]
|
||||||
|
y = input_data.iloc[:, -1]
|
||||||
|
k_value = X.shape[1]
|
||||||
pipeline.named_steps['feature_selection'].set_params(k=k_value)
|
pipeline.named_steps['feature_selection'].set_params(k=k_value)
|
||||||
X = input_data.drop(columns=[target_column], errors='ignore')
|
|
||||||
print(X.columns)
|
|
||||||
y = input_data[target_column]
|
|
||||||
X = pipeline.fit_transform(X, y)
|
X = pipeline.fit_transform(X, y)
|
||||||
return X, y
|
return X, y
|
||||||
|
|
||||||
@@ -203,8 +208,8 @@ if __name__ == "__main__":
|
|||||||
output_base_directory="./results_nsga",
|
output_base_directory="./results_nsga",
|
||||||
population_size=80, # Larger population size for a comprehensive search
|
population_size=80, # Larger population size for a comprehensive search
|
||||||
n_generations=100, # Increased number of generations
|
n_generations=100, # Increased number of generations
|
||||||
num_parents=30, # Increased number of parents
|
# num_parents=30, # Increased number of parents
|
||||||
num_offspring=50, # Increased number of offspring
|
# num_offspring=50, # Increased number of offspring
|
||||||
time_limit=108000 # 20 hours (20 * 3600 seconds)
|
time_limit=108000 # 20 hours (20 * 3600 seconds)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user