Spring 2025
sklearn
)pip install scikit-learn
import sklearn.datasets as skds cancer = skds.load_breast_cancer() print("Target Values: ", cancer.target_names) print("Shape of data: ", cancer.data.shape) print("Attribute Variables: ", cancer.feature_names)
## Target Values: ['malignant' 'benign']
## Shape of data: (569, 30)
## Attribute Variables: ['mean radius' 'mean texture' 'mean perimeter' 'mean area' ## 'mean smoothness' 'mean compactness' 'mean concavity' ## 'mean concave points' 'mean symmetry' 'mean fractal dimension' ## 'radius error' 'texture error' 'perimeter error' 'area error' ## 'smoothness error' 'compactness error' 'concavity error' ## 'concave points error' 'symmetry error' 'fractal dimension error' ## 'worst radius' 'worst texture' 'worst perimeter' 'worst area' ## 'worst smoothness' 'worst compactness' 'worst concavity' ## 'worst concave points' 'worst symmetry' 'worst fractal dimension']
from sklearn.model_selection import train_test_split trainX, testX, trainY, testY = train_test_split(cancer.data, cancer.target, test_size=0.4, random_state=1) print("Training data shape: ", trainX.shape) print("Training target shape: ", trainY.shape) print("Testing data shape: ", testX.shape) print("Testing target shape: ", testY.shape)
Support vector machines are machine learning models that try to find the optimal decision surface between positive and negative points. This one is a linear surface:
import sklearn.svm as svm # Build then fit the model model = svm.SVC(kernel='linear', C=1000) fit = model.fit(trainX, trainY) # Predict with the model model.predict(testX) # Evaluate the model: metrics.accuracy_score(model.predict(testX), testY)
Decision Trees use information theory to build a tree that decides how to classify based on most informative variable values
import sklearn.tree as dt # Build then fit the model model = dt.DecisionTreeClassifier() fit = model.fit(trainX, trainY) # Predict with the model model.predict(testX) # Evaluate the model: metrics.accuracy_score(model.predict(testX), testY)
python3 -m venv --system-site-packages ~/tensorflow source ~/tensorflow/bin/activate # Do this every time you want to use TF pip3 install --upgrade tensorflow # Do this the first time, it will take a while
import sklearn.datasets as skds from sklearn.model_selection import train_test_split cancer = skds.load_breast_cancer() trainX, testX, trainY, testY = train_test_split(cancer.data, cancer.target, test_size=0.4, random_state=1) trainY = tf.keras.utils.to_categorical(trainY) # Turn the targets into one-hot representation testY = tf.keras.utils.to_categorical(testY) # Turn the targets into one-hot representation
model = tf.keras.models.Sequential() model.add( tf.keras.layers.Input(shape=(30,)) ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(2, activation="softmax") )
model.compile(optimizer="adam", loss="BinaryCrossentropy", metrics=["accuracy"]) trainingHistory = model.fit(trainX, trainY, epochs=50) # Learn the model model.evaluate(testX, testY) # Evaluate test-set performance
import tensorfloat as tf # Ignore the warnings it will spew # Get the MNIST data, convert them to float and scale the attribute data to be between 0 and 1 (trainX, trainY), (testX, testY) = tf.keras.datasets.mnist.load_data() trainX= trainX.astype('float32') / 255.0 testX = testX.astype('float32') / 255.0
trainX = trainX.reshape( (-1, 28, 28, 1)) # Make each image a 28x28x1 cube testX = testX.reshape( (-1, 28, 28, 1)) # Make each image a 28x28x1 cube trainY = tf.keras.utils.to_categorical(trainY) # Turn the targets into one-hot representation testY = tf.keras.utils.to_categorical(testY) # Turn the targets into one-hot representation
model = tf.keras.models.Sequential() model.add( tf.keras.layers.Input(shape=(28,28,1)) ) model.add( tf.keras.layers.Conv2D(70, (3,3), activation="relu") ) model.add( tf.keras.layers.MaxPooling2D((2, 2)) ) model.add( tf.keras.layers.Flatten() ) model.add( tf.keras.layers.Dense(70, activation="relu") ) model.add( tf.keras.layers.Dense(10, activation="softmax") )
model.compile(optimizer="adam", loss="CategoricalCrossentropy", metrics=["accuracy"]) trainingHistory = model.fit(trainX, trainY, epochs=10) # Learn the model model.evaluate(testX, testY) # Evaluate test-set performance