import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

Load the CSV file

file_path = ‘/home/jmcphaul/qm9.csv’
data = pd.read_csv(file_path)

Display the first few rows of the dataset

print(data.head())

Assuming ‘target’ is the column you want to predict

and the rest are features

features = data.drop(columns=[‘target’])
target = data[‘target’]

Data preprocessing (example)

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

Building a simple neural network model

input_layer = Input(shape=(X_train.shape[1],))
hidden_layer = Dense(64, activation=‘relu’)(input_layer)
output_layer = Dense(1)(hidden_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=‘adam’, loss=‘mse’, metrics=[‘mae’])

Train the model

history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

Predict and evaluate the model

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f’Mean Squared Error: {mse}’)

Plotting the training loss

plt.plot(history.history[‘loss’], label=‘train’)
plt.plot(history.history[‘val_loss’], label=‘validation’)
plt.xlabel(‘Epochs’)
plt.ylabel(‘Loss’)
plt.legend()
plt.show()

Install required packages

!pip install tensorflow pandas scikit-learn matplotlib

Import libraries

import tensorflow as tf from tensorflow.keras.layers import Input, Dense from tensorflow.keras.models import Model import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error

Load the CSV file

file_path = ‘qm9.csv’
data = pd.read_csv(file_path)

Display the first few rows of the dataset

print(data.head())

Get basic information about the dataset

print(data.info())

Check the basic statistics of the numerical features

print(data.describe())

Check for missing values

print(data.isnull().sum())

Selecting features and target

features = data.drop(columns=[‘mol_id’, ‘smiles’, ‘gap’])
target = data[‘gap’]

Scale the features

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

Display the shapes of the datasets to confirm splitting

print(f”Train Features: {X_train.shape}, Train Target: {y_train.shape}“)
print(f”Test Features: {X_test.shape}, Test Target: {y_test.shape}“)

Define the neural network architecture

input_layer = Input(shape=(X_train.shape[1],))
hidden_layer = Dense(64, activation=‘relu’)(input_layer)
output_layer = Dense(1)(hidden_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=‘adam’, loss=‘mse’, metrics=[‘mae’])

Train the model

history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

Plotting the training loss

plt.plot(history.history[‘loss’], label=‘train’)
plt.plot(history.history[‘val_loss’], label=‘validation’)
plt.xlabel(‘Epochs’)
plt.ylabel(‘Loss’)
plt.legend()
plt.show()

Predict and evaluate the model on test set

y_pred = model.predict(X_test)

Calculate Mean Squared Error

mse = mean_squared_error(y_test, y_pred)
print(f’Mean Squared Error: {mse}’)

Additional metrics if needed

mae = tf.keras.metrics.mean_absolute_error(y_test, y_pred).numpy()
print(f’Mean Absolute Error: {mae}’)

Drop non-numeric columns if any

data = data.select_dtypes(include=[np.number])

Standardize the data

data_scaled = scaler.fit_transform(data)

Split the data into training and testing sets

X_train, X_test = train_test_split(data_scaled, test_size=0.2, random_state=42)

Define the autoencoder model

input_dim = X_train.shape[1] encoding_dim = 32 # This can be adjusted

input_layer = Input(shape=(input_dim,)) encoded = Dense(encoding_dim, activation=‘relu’)(input_layer) decoded = Dense(input_dim, activation=‘sigmoid’)(encoded)

autoencoder = Model(input_layer, decoded)

autoencoder.compile(optimizer=‘adam’, loss=‘mse’)

Train the autoencoder

history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=256, shuffle=True, validation_data=(X_test, X_test), verbose=1)

Get the reconstruction loss

X_train_pred = autoencoder.predict(X_train) train_loss = np.mean(np.square(X_train - X_train_pred), axis=1)

X_test_pred = autoencoder.predict(X_test) test_loss = np.mean(np.square(X_test - X_test_pred), axis=1)

Set the threshold for anomaly detection

threshold = np.percentile(train_loss, 95) # 95th percentile

Identify anomalies

anomalies = test_loss > threshold

Plot the reconstruction loss

plt.figure(figsize=(10, 6)) plt.hist(test_loss, bins=50) plt.axvline(threshold, color=‘r’, linestyle=‘dashed’, linewidth=2) plt.xlabel(‘Reconstruction loss’) plt.ylabel(‘Number of samples’) plt.title(‘Reconstruction Loss for Test Data’) plt.show()

Define a function to dynamically adjust the encoding dimension

def dynamic_encoding_dim(data_point): # Example: Adjust the encoding dimension based on the mean of the data point mean_val = np.mean(data_point) if mean_val < -1: return 16 elif mean_val < 0: return 32 else: return 64

Define the autoencoder model with dynamic encoding dimension

def create_autoencoder(input_dim, encoding_dim): input_layer = Input(shape=(input_dim,)) encoded = Dense(encoding_dim, activation=‘relu’)(input_layer) decoded = Dense(input_dim, activation=‘sigmoid’)(encoded) autoencoder = Model(input_layer, decoded) autoencoder.compile(optimizer=‘adam’, loss=‘mse’) return autoencoder

Train the autoencoder with dynamic encoding dimension

for epoch in range(50): # Number of epochs for batch_start in range(0, X_train.shape[0], 256): # Batch size batch_end = min(batch_start + 256, X_train.shape[0]) X_batch = X_train[batch_start:batch_end] encoding_dim = dynamic_encoding_dim(np.mean(X_batch, axis=0)) autoencoder = create_autoencoder(input_dim, encoding_dim) autoencoder.fit(X_batch, X_batch, epochs=1, verbose=0)

Istall miniconda to home diorectory for full package control:

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh # run installer bash Miniconda3-latest-Linux-x86_64.sh # Initialize ~/miniconda3/bin/conda init # Restart terminal source ~/.bashrc # or ~/.zshrc if use zsh

conda init mamba init

set base to active or change:

conda config –set auto_activate_base false # change mind: conda init –reverse $SHELL

conda activate rapids-23.02

conda init source ~/.bashrc # or ~/.zshrc

environments

conda env list conda install conda list conda update conda remove conda deactivate

remove and unistall the same

conda remove numpy scipy pandas # remove all packages conda remove –all # remove env conda remove -n my_env numpy # force removal conda remove –force numpy # remove strict channel mamba config –set channel_priority flexible

check network issues:

export HTTPS_PROXY=http://yourproxy:port
export HTTP_PROXY=http://yourproxy:port
unset HTTPS_PROXY unset HTTP_PROXY

From page

$ enroot import docker://nvcr.io#nvidia/rapidsai/rapidsai:cuda11.2-runtime-centos7-py3.10 $ enroot create nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh

srun -N1 -G1 -c10 –mem=6G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh --container-mounts=\)WORK –pty $SHELL srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh --container-mounts=\)WORK –pty $SHELL

Restart terminal

source ~/.bashrc

conda config –set auto_activate_base true conda config –set auto_activate_base false


ENV DISAPPEARED

remake nano file nano environment2.yaml name: rapids-23.02
channels:
- rapidsai
- conda-forge
- nvidia
- defaults
dependencies:
- rapids=23.02
- python=3.10
- cudatoolkit=11.8

conda config –set channel_priority flexible mamba config –set channel_priority flexible mamba env create -f environment2.yaml

dmesg | grep -i “killed”

Remove the directory for the incomplete environment

rm -rf /users/jmcphaul/my_conda_envs/rapids23.02

swap space: dd if=/dev/zero of=myswapfile bs=1M count=8192

only me:

chmod 600 myswapfile mkswap myswapfile swapon myswapfile

source activate ~/my_conda_env conda install mamba -c conda-forge mamba –version

-or-

wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh

conda config –set channel_priority flexible mamba config –set channel_priority flexible

source ~/mambaforge/bin/activate
condayes

conda config –set auto_activate_base true mamba config –set auto_activate_base true

conda activate rapids23.02 conda remove jupyter_server conda clean –all
y

mamba install jupyter_server
mamba install jupyterlab

environment location: /users/jmcphaul/my_conda_envs/rapids23.02

conda activate base
conda create -n conda_update python=3.10 conda activate conda_update conda install -c conda-forge conda conda activate rapids23.02 conda activate conda_update
conda update conda conda update -n rapids23.02 –all -c conda-forge conda activate rapids23.02

and :

alias conda-deactivate-all=‘while [[ “$CONDA_SHLVL” -gt 0 ]]; do conda deactivate; done’ conda config –set auto_activate_base false conda deactivate source ~/.bashrc

lastly: conda_deactivate_all

Cleaning Up:

rm readline-8.1.tar.gz

NEXT SECTION

enroot import docker://nvcr.io#nvidia/nemo:22.09 enroot create nvidia+nemo+22.09.sqsh enroot import docker://nvcr.io#nvidia/nemo_bert_text_classification:20.07 enroot create nvidia+nemo_bert_text_classification+20.07.sqsh

cd $WORK mkdir nemo && cd nemo curl -s -O https://dl.fbaipublicfiles.com/glue/data/SST-2.zip\ && unzip -o SST-2.zip -d ./
&& sed 1d ./SST-2/train.tsv > ./train_nemo_format.tsv
&& sed 1d ./SST-2/dev.tsv > ./dev_nemo_format.tsv &

srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+nemo+22.09.sqsh --container-mounts=\)WORK –pty bash -i

cd $WORK/nemo/SST-2 python /workspace/nemo/examples/nlp/text_classification/text_classification_with_bert.py
model.dataset.num_classes=2
model.dataset.max_seq_length=256
model.train_ds.batch_size=64
model.validation_ds.batch_size=64
model.language_model.pretrained_model_name=‘bert-base-cased’
model.train_ds.file_path=train_nemo_format.tsv
model.validation_ds.file_path=dev_nemo_format.tsv
trainer.num_nodes=1
trainer.max_epochs=20
trainer.precision=16
model.optim.name=adam
model.optim.lr=1e-4 Check the GPU usage with nvidia-smi command

find / -name text_classification_with_bert.py 2>/dev/null

cd /work/users/jmcphaul/nemo git clone https://github.com/NVIDIA/NeMo.git cd NeMo/examples/nlp/text_classification conda activate rapids23.02 pip install pytorch_lightening

-or-

pip install -r /work/users/jmcphaul/nemo/NeMo/requirements.txt (Install package and Requirements)

VERIFY INSTALL:

python -c “import pytorch_lightning as pl; print(pl.__version__)”

python text_classification_with_bert.py
model.dataset.num_classes=2
model.dataset.max_seq_length=256
model.train_ds.batch_size=64
model.validation_ds.batch_size=64
model.language_model.pretrained_model_name=‘bert-base-cased’
model.train_ds.file_path=/work/users/jmcphaul/nemo/train_nemo_format.tsv
model.validation_ds.file_path=/work/users/jmcphaul/nemo/dev_nemo_format.tsv
trainer.num_nodes=1
trainer.max_epochs=20
trainer.precision=16
model.optim.name=adam
model.optim.lr=1e-4

curl –proto ‘=https’ –tlsv1.2 -sSf https://sh.rustup.rs | sh source $HOME/.cargo/env rustc –version

pip install –upgrade pip setuptools

pip freeze > pip_packages.txt nano pip_packages.txt cat pip_packages.txt | xargs pip uninstall -y mamba install -c conda-forge transformers==4.9.2 tokenizers==0.10.3 pytorch-lightning omegaconf cython pandas numpy scikit-learn scipy matplotlib jupyterlab conda activate /users/jmcphaul/my_conda_envs/rapids23.02

Step 1: Clean the pip_packages.txt file

sed -i ‘/@/d’ pip_packages.txt

Step 2: Uninstall all packages listed in the cleaned file

cat pip_packages.txt | xargs pip uninstall -y

Step 3: Reinstall necessary packages with mamba

mamba install -c conda-forge transformers tokenizers pytorch-lightning omegaconf

mamba install -c conda-forge cython rust mamba install -c conda-forge cython conda activate /users/jmcphaul/my_conda_envs/rapids23.02 mamba install -c conda-forge cython rust

mamba install -c conda-forge cython rust

mamba install -c conda-forge setuptools packaging

sed -i ‘/@/d’ pip_packages.txt

cat pip_packages.txt | xargs pip uninstall -y # Step 1: Clean the pip_packages.txt file sed -i ‘/@/d’ pip_packages.txt

Step 2: Uninstall all packages listed in the cleaned file

cat pip_packages.txt | xargs pip uninstall -y

Step 3: Reinstall necessary packages with mamba

mamba install -c conda-forge transformers tokenizers pytorch-lightning omegaconf

mamba install -c conda-forge numpy

mamba install -c nvidia -c conda-forge nemo_toolkit[all]

python -c “import nemo; print(nemo.__version__)”

*** NANO *** Steps to Select and Copy All Text in nano: Open the File in nano:

sh Copy code nano pip_packages.txt Navigate to the Start of the File: Press Ctrl + _ (underscore) then Ctrl + A to move the cursor to the start of the file.

Start Marking Text: Press Ctrl + ^ (Ctrl key and the caret key) to start marking text. This sets the starting point of the selection. cd Move to the End of the File: Press Ctrl + _ (underscore) then Ctrl + E to move the cursor to the end of the file. This will highlight all the text from the start to the end of the file.

Copy the Selected Text: Press Alt + 6 (Option key and 6 on Mac) to copy the selected text to the clipboard.

Close the File: Press Ctrl + X to exit nano.

git clone https://github.com/NVIDIA/NeMo.git

mamba uninstall pytorch

conda list | grep cudatoolkit mamba list | grep cudatoolkit


srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+nemo+22.09.sqsh --container-mounts=\)WORK –pty bash -i

srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh --container-mounts=\)WORK –pty $SHELL

*** NOTE NEXT TIME FROM HERE:** Success. Logging you in… Success. Logging you in… Last login: Tue Jul 16 23:18:07 2024 from 129.119.70.150 :~$ srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+nemo+22.09.sqsh --container-mounts=\)WORK –pty bash -i bash: /hpc/sys/apps/lmod/lmod/libexec/lmod: No such file or directory :/workspace/nemo$ $ srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh --container-mounts=\)WORK –pty $SHELL bash: \(: command not found jmcphaul@bcm-dgxa100-0003:/workspace/nemo\) srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+rapidsai+rapidsai+cuda11.2-runtime-centos7-py3.10.sqsh --container-mounts=\)WORK –pty \(SHELL bash: srun: command not found jmcphaul@bcm-dgxa100-0003:/workspace/nemo\) ls examples scripts start-jupyter.sh tests tutorials :/workspace/nemo$ cd \(WORK jmcphaul@bcm-dgxa100-0003:/work/users/jmcphaul\) ls environment.yml environment1st.yml environment2.yml modelCNN.sh model_CNN_CIFAR10.py ncurses-6.2 nemo readline-8.1 requirements2.txt sqsh submit_job1.sh :/work/users/jmcphaul$ cd sqsh :/work/users/jmcphaul/sqsh$ ls lua-5.3.5 nvidia+nemo+22.09.sqsh nvidia+tensorflow+22.12-tf2-py3.sqsh readline-8.1 tensorflow_22.12-tf2-py3.sif :/work/users/jmcphaul/sqsh$

REDO THIS Page:

                                        https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/07-Application%20NEMO%20for%20Sentiment%20Analysis/index.html  ######################################
                                        

source ~/mambaforge/bin/activate
condayes

conda config –set auto_activate_base true mamba config –set auto_activate_base true srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+nemo+22.09.sqsh --container-mounts=\)WORK –pty bash -i

%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%% NEXT SECTION NEXT SECTION %%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%% nano cifar100m_job.sh #!/bin/bash
#SBATCH -J CIFAR100M # job name to display in squeue
#SBATCH -c 16 –mem=750G # requested partition
#SBATCH -o output-%j.txt # standard output file
#SBATCH -e error-%j.txt # standard error file
#SBATCH –gres=gpu:8
#SBATCH -t 1440 # maximum runtime in minutes
#SBATCH -D /work/users/tuev/cv1/cifar100/multi
#SBATCH –exclusive
#SBATCH –mail-user
#SBATCH –mail-type=end

srun –container-image=\(WORK/sqsh/nvidia+tensorflow+22.02-tf2-py3.sqsh --container-mounts=\)WORK mpirun -np 8 –allow-run-as-root –oversubscribe python /work/users/tuev/cv1/cifar100/multi/cifar100spod-hvod.py chmod +x cifar100m_job.sh sbatch cifar100m_job.sh

basic run: srun -n1 –gres=gpu:1 -c2 –mem=12gb –time=12:00:00 –pty $SHELL

mkdir -p /users/jmcphaul/my_work_directory

BE ON LOGIN PAGE TO SUBMIT BATCH

Be on login node to submit the batch script:

try: import comet_ml # must be imported before torch (if installed) except ImportError: comet_ml = None

import numpy as np import torch import torch.distributed as dist import torch.nn as nn import yaml from torch.optim import lr_scheduler from tqdm import tqdm

FILE = Path(file).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative

import val as validate # for end-of-epoch mAP from models.experimental import attempt_load from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size
from utils.callbacks import Callbacks from utils.dataloaders import create_dataloader from utils.downloads import attempt_download, is_url
from utils.general import (

** && (( &*&& &&& ** %% \[ ** ^& \]$$\[$ \]$$$$$$$$$$$$$$$$$$$$$$$$ %%%%%%%%%%%%%%%%% NEXT &*(&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& NEXT )))

git clone https://github.com/ultralytics/yolov5.git srun -n1 –gres=gpu:1 –container-image \(WORK/sqsh/nvidia+nemo+22.04.sqsh --container-mounts=\)WORK –time=12:00:00 –pty $SHELL

cd $WORK ls cd yolov5 pip install -r requirements.txt

** NEW RUN NODE: srun –partition=short –gres=gpu:1 –cpus-per-task=8 –mem=64GB –time=1:00:00 –pty $SHELL *** srun –partition=short –gres=gpu:1 –cpus-per-task=8 –mem=64GB –time=1:00:00 –pty $SHELL module load conda gcc module load cuda module load cudnn conda create -n nemo_text_classification python=3.8 conda activate nemo_text_classification pip install tensorflow==2.11 pip install ipykernel python3 -m ipykernel install –user -nemo_text_classification –display-name NeMo mamba install -c conda-forge numpy pandas matplotlib scikit-learn jupyter mamba install -c pytorch pytorch torchvision torchaudio cudatoolkit=11.1 pip install nemo_toolkit[‘nlp’] pytorch-lightning transformers mamba install cython

BRANCH=‘r2.0.0rc0’ python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]

https://chatgpt.com/c/23dae27c-b53d-4d23-9351-1f322b12c7c5

tensorgpu

load_script.py import ctypes import os

Paths to the CUDA libraries

cuda_path = “/hpc/mp/repos/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-13.2.0/cuda-12.4.1-vz7djzzlmvr6dgtrfgqletmtahxhwkm6/lib64/libcudart.so” cudnn_path = “/hpc/mp/repos/spack/opt/spack/linux-ubuntu22.04-zen2/gcc-13.2.0/cudnn-9.1.1.17-12-ld5h22cq2cbo6hpbi4dvlijb3mwenclg/lib/libcudnn.so”

Try loading the CUDA library

try: ctypes.CDLL(cuda_path) print(f”Successfully loaded {cuda_path}“) except OSError as e: print(f”Error loading {cuda_path}: {e}“)

Try loading the cuDNN library

try: ctypes.CDLL(cudnn_path) print(f”Successfully loaded {cudnn_path}“) except OSError as e: print(f”Error loading {cudnn_path}: {e}“)

$ srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+tensorflow+22.12-tf2-py3.sqsh --container-mounts=\)WORK –pty $SHELL :/workspace# jupyter lab –allow-root –no-browser –ip=0.0.0.0

cd $WORK python train_mnist.py

https://chatgpt.com/c/989a65f4-b75a-41d1-a025-22131aba3b7b https://gemini.google.com/app/221068a33333d467 https://chatgpt.com/g/g-phPzPLYLi-tensorflow-keras-gpt/c/8969e520-76fa-46b1-8148-4dd8b47359a3 https://chatgpt.com/c/989a65f4-b75a-41d1-a025-22131aba3b7b

srun -N1 -G1 -c10 –mem=64G –time=12:00:00 –container-image \(WORK/sqsh/nvidia+tensorflow+22.12-tf2-py3.sqsh --container-mounts=\)WORK –pty $SHELL conda –version

http://bcm-dgxa100-0008:8888/?token=6ceb3dee1350b0a72bf863516bb5733dc9a7f6ab1eb3c4af

http://bcm-dgxa100-0020:8888/?token=9cfcd29039a650d33396d64a4d623e18334a8d2fe1113eaa

qm9.csv

Great! Using mamba for managing your conda environment can be faster and more efficient. Here’s how you can resolve the issue using mamba:

  1. Activate your conda environment:

    conda activate /users/jmcphaul/my_conda_envs/rapids23.02
  2. Check the installed PyTorch version and its dependencies:

    python -c "import torch; print(torch.__version__)"

    If this command fails, it indicates an issue with the PyTorch installation.

  3. Reinstall PyTorch using mamba:

    First, uninstall the current PyTorch installation:

    mamba uninstall pytorch

    Then, reinstall PyTorch with the appropriate CUDA version (assuming you’re using CUDA 11.1):

    mamba install pytorch torchvision torchaudio cudatoolkit=11.1 -c pytorch -c nvidia
  4. Verify the installation again:

    python -c "import torch; print(torch.__version__)"

    Ensure that this command runs without any errors and prints the PyTorch version.

  5. Run your script again:

    python /work/users/jmcphaul/nemo/NeMo/examples/nlp/text_classification/text_classification_with_bert.py \
    model.dataset.num_classes=2 \
    model.dataset.max_seq_length=256 \
    model.train_ds.batch_size=64 \
    model.validation_ds.batch_size=64 \
    model.language_model.pretrained_model_name='bert-base-cased' \
    model.train_ds.file_path=/work/users/jmcphaul/nemo/train_nemo_format.tsv \
    model.validation_ds.file_path=/work/users/jmcphaul/nemo/dev_nemo_format.tsv \
    trainer.num_nodes=1 \
    trainer.max_epochs=20 \
    trainer.precision=16 \
    model.optim.name=adam \
    model.optim.lr=1e-4

By following these steps using mamba, you should be able to resolve the import error and run your script successfully. If you encounter any further issues, please let me know!

nvcr.io/nvidia/nemo_bert_text_classification:20.07 nvcr.io/nvidia/nemo:23.08 enroot import docker://nvcr.io#nvidia/nemo:23.08 enroot import docker://nvcr.io#nvidia/nemo_bert_text_classification:20.07

  1. read the folowing links. b)create an organized PDF booklet for me to use and have fo reference
  2. walk me through setting up access via my computer via terminal (linux/wsl/command)

https://southernmethodistuniversity.github.io/hpc_docs/accounts.html

https://southernmethodistuniversity.github.io/hpc_docs/index.html

https://southernmethodistuniversity.github.io/hpc_docs/access.html

https://southernmethodistuniversity.github.io/hpc_docs/portal.html

https://southernmethodistuniversity.github.io/hpc_docs/m3_migration.html

https://southernmethodistuniversity.github.io/hpc_docs/mp_update.html

https://southernmethodistuniversity.github.io/hpc_docs/examples/conda/README.html

https://southernmethodistuniversity.github.io/hpc_docs/examples/torch/README.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/unix/unix.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/unix/scripting.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/modules/modules.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/slurm/slurm.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/slurm/best_practices.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/containers/containers.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/intro.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/session_1.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/session_2.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/session_3.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/session_4.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/cpp/session_5.html

https://southernmethodistuniversity.github.io/hpc_docs/tutorials/r/overview.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/r/environments.html https://southernmethodistuniversity.github.io/hpc_docs/tutorials/r/slurm.html

https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/index.html https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/04-Using%20JupterLab/index.html

https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/02-Working%20with%20Conda/index.html

https://carpentries-incubator.github.io/introduction-to-conda-for-data-scientists/aio/index.html#:~:text=Conda%20is%20a%20platform%20agnostic,and%20targets%20multiple%20programming%20languages.

https://vpn.smu.edu/+CSCOE+/portal.html

https://www.tensorflow.org/install https://github.com/NVIDIA/NeMo/blob/main/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb https://carpentries.org/community-lessons/ https://www.sciencedirect.com/science/article/pii/S0165027020303848 https://nlp.stanford.edu/sentiment/index.html https://gluebenchmark.com/tasks/ https://github.com/NVIDIA/NeMo/tree/main/scripts/checkpoint_converters https://docs.nvidia.com/nemo-framework/user-guide/latest/sft_peft/index.html https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/nlp/text_classification.html https://www.google.com/search?q=raster-based+spatial+analysis&rlz=1C1ONGR_enUS1116US1116&oq=raster-based+spatial+analysis&gs_lcrp=EgZjaHJvbWUyBggAEEUYOTIICAEQABgWGB4yDQgCEAAYhgMYgAQYigUyDQgDEAAYhgMYgAQYigUyDQgEEAAYhgMYgAQYigUyCggFEAAYgAQYogQyCggGEAAYgAQYogQyCggHEAAYgAQYogQyCggIEAAYgAQYogTSAQgxMTU2ajBqN6gCALACAA&sourceid=chrome&ie=UTF-8 https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo https://docs.rapids.ai/visualization https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/08-Applications%20of%20Horovod%20for%20MultiGPUs/index.html https://rapids.ai/ https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/06-RAPIDS/index.html https://carpentries-incubator.github.io/introduction-to-conda-for-data-scientists/aio/index.html#:~:text=Conda%20is%20a%20platform%20agnostic,and%20targets%20multiple%20programming%20languages. https://slurm.schedmd.com/ https://www.smu.edu/oit/services/qualtrics https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/index.html https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/12-Pipeline%20with%20pretrained%20Hugging%20Face/index.html https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/03-Using%20container/index.html https://education.github.com/pack/offers https://southernmethodistuniversity.github.io/SMU_SuperPOD_101/06-RAPIDS/index.html https://www.smu.edu/oit/training https://catalog.ngc.nvidia.com/orgs/nvidia/teams/rapidsai/containers/rapidsai