Spring 2025
sklearn)pip install scikit-learnimport sklearn.datasets as skds
cancer = skds.load_breast_cancer()
print("Target Values: ", cancer.target_names)
print("Shape of data: ", cancer.data.shape)
print("Attribute Variables: ", cancer.feature_names)
## Target Values: ['malignant' 'benign']
## Shape of data: (569, 30)
## Attribute Variables: ['mean radius' 'mean texture' 'mean perimeter' 'mean area' ## 'mean smoothness' 'mean compactness' 'mean concavity' ## 'mean concave points' 'mean symmetry' 'mean fractal dimension' ## 'radius error' 'texture error' 'perimeter error' 'area error' ## 'smoothness error' 'compactness error' 'concavity error' ## 'concave points error' 'symmetry error' 'fractal dimension error' ## 'worst radius' 'worst texture' 'worst perimeter' 'worst area' ## 'worst smoothness' 'worst compactness' 'worst concavity' ## 'worst concave points' 'worst symmetry' 'worst fractal dimension']
from sklearn.model_selection import train_test_split
trainX, testX, trainY, testY = train_test_split(cancer.data, cancer.target, test_size=0.4, random_state=1)
print("Training data shape: ", trainX.shape)
print("Training target shape: ", trainY.shape)
print("Testing data shape: ", testX.shape)
print("Testing target shape: ", testY.shape)
Support vector machines are machine learning models that try to find the optimal decision surface between positive and negative points. This one is a linear surface:
import sklearn.svm as svm import sklearn.metrics as metrics # Build then fit the model model = svm.SVC(kernel='linear', C=1000) fit = model.fit(trainX, trainY) # Predict with the model model.predict(testX) # Evaluate the model: metrics.accuracy_score(model.predict(testX), testY)
To d0
import sklearn.naive_bayes as nb # Build then fit the model model = nb.GaussianNB() fit = model.fit(trainX, trainY) # Predict with the model model.predict(testX) # Evaluate the model: metrics.accuracy_score(model.predict(testX), testY)
Decision Trees use information theory to build a tree that decides how to classify based on most informative variable values
import sklearn.tree as dt # Build then fit the model model = dt.DecisionTreeClassifier() fit = model.fit(trainX, trainY) # Predict with the model model.predict(testX) # Evaluate the model: metrics.accuracy_score(model.predict(testX), testY)
source /data/shared-venvs/tensorflow-standard/bin/activate
import sklearn.datasets as skds from sklearn.model_selection import train_test_split cancer = skds.load_breast_cancer() trainX, testX, trainY, testY = train_test_split(cancer.data, cancer.target, test_size=0.4, random_state=1) trainY = tf.keras.utils.to_categorical(trainY) # Turn the targets into one-hot representation testY = tf.keras.utils.to_categorical(testY) # Turn the targets into one-hot representation
model = tf.keras.models.Sequential() model.add( tf.keras.layers.Input(shape=(30,)) ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(50, activation="relu") ) model.add( tf.keras.layers.Dense(2, activation="softmax") )
model.compile(optimizer="adam", loss="BinaryCrossentropy", metrics=["accuracy"]) trainingHistory = model.fit(trainX, trainY, epochs=50) # Learn the model model.evaluate(testX, testY) # Evaluate test-set performance
import tensorfloat as tf # Ignore the warnings it will spew
# Get the MNIST data, convert them to float and scale the attribute data to be between 0 and 1
(trainX, trainY), (testX, testY) = tf.keras.datasets.mnist.load_data()
trainX= trainX.astype('float32') / 255.0
testX = testX.astype('float32') / 255.0
trainX = trainX.reshape( (-1, 28, 28, 1)) # Make each image a 28x28x1 cube testX = testX.reshape( (-1, 28, 28, 1)) # Make each image a 28x28x1 cube trainY = tf.keras.utils.to_categorical(trainY) # Turn the targets into one-hot representation testY = tf.keras.utils.to_categorical(testY) # Turn the targets into one-hot representation
model = tf.keras.models.Sequential() model.add( tf.keras.layers.Input(shape=(28,28,1)) ) model.add( tf.keras.layers.Conv2D(70, (3,3), activation="relu") ) model.add( tf.keras.layers.MaxPooling2D((2, 2)) ) model.add( tf.keras.layers.Flatten() ) model.add( tf.keras.layers.Dense(70, activation="relu") ) model.add( tf.keras.layers.Dense(10, activation="softmax") )
model.compile(optimizer="adam", loss="CategoricalCrossentropy", metrics=["accuracy"]) trainingHistory = model.fit(trainX, trainY, epochs=10) # Learn the model model.evaluate(testX, testY) # Evaluate test-set performance
More Info: https://juliapackages.com/c/machine-learning
using MLJ using DataFrames iris = DataFrames.DataFrame(load_iris()); y, X = unpack(iris, ==(:target); rng=123); # Strip the target column off (trainX, testX), (trainY, testY) = MLJ.partition( (X,y), 0.75, multi=true); # Test/train split models(matching(X,y)) # Show all models that might be applied to this data
More Info: https://juliaai.github.io/MLJ.jl/dev/getting_started/#Getting-Started
@load annotation to load a specific component from that libraryPkg.add("MLJLIBSVMInterface") # Just need to do this once for anything in LIBSVM
SVC = @load SVC pkg=LIBSVM # To get access to the SVM classifier modeling tool
import LIBSVM # To have access to LIBSVM mechanisms, such as different kernels, etc
model = SVC(kernel=LIBSVM.Kernel.Polynomial) # Create a specific SVM model fit_model = machine(model, trainX, trainY) |> fit! # Using training data to learn predict(fit_model, testX) # Predict classes for test set evaluate(model, testX, testY; resampling=CV(nfolds=2, rng=888), measure=[accuracy])
DecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree model = DecisionTreeClassifier() # Create a decision tree model fit_model = machine(model, trainX, trainY) |> fit! # Using training data to learn predict(fit_model, testX) # Predict classes for test set evaluate(model, testX, testY; resampling=CV(nfolds=2, rng=888), measure=[accuracy])
using Flux, Statistics using ProgressMeter, CUDA # optional device = gpu_device() # If using the GPU # Make some fake data: trainX = rand(Float32, 2, 500); # 2×500 Matrix trainY = [xor(col[1]>0.5, col[2]>0.5) for col in eachcol(trainX)]; # XOR t/f for each of those testX = rand(Float32, 2, 250); # 2×250 Matrix testY = [xor(col[1]>0.5, col[2]>0.5) for col in eachcol(testX)]; # XOR t/f for each of those
More Info: https://fluxml.ai/Flux.jl/stable/guide/models/quickstart/
# Setup model: Two-layer MLP: 2 inputs, 3 hidden nodes, 2 outputs
model = Chain( Dense(2 => 3, tanh), # 2 inputs, 3 hidden nodes, hyperbolic tangent activation
BatchNorm(3), # Rescale/normalize weights to keep them stable
Dense(3 => 2)) |> device # Output layer, 2D signal ... send model to the GPU
# Setup Data for Flux's learning system
trainYoh = Flux.onehotbatch(trainY, [true, false]); # Turn true/false into one-hot rep
# Create the batch loader & the optimizer for learning
loader = Flux.DataLoader( (trainX, trainYoh), batchsize=64, shuffle=true);
optimizer = Flux.setup(Flux.Adam(0.01), model);
losses = [] # Store loss values as you learn
@showprogress for epoch in 1:1_000 # Omit the showprogress if not using pkg
for batch_sample in loader
# Grab samples from batch, put it into the GPU
x, y = batch_sample |> device
loss, gradients = Flux.withgradient(model) do midstep_model
y_hat = midstep_model(x) # Apply the current model to x
Flux.logitcrossentropy(y_hat, y) # Collect crossentropy loss over the batch
end
Flux.update!( optimizer, model, gradients[1]) # Use optimizer to adjust wts
push!(losses, loss) # Record this epoch's losses
end
end
# How accurate is the model on training data? trainOut = model(trainX |> device); # Training data to GPU, apply model get output trainProb = softmax(trainOut) |> cpu; # Convert output to probabilities, put back on CPU mean( (trainProb[1,:] .> 0.5) .== trainY ) # Compute accuracy of prediction # How accurate is the model on testing data? testOut = model(testX |> device); # Testing data to GPU, apply model get output testProb = softmax(testOut) |> cpu; # Convert output to probabilities, put back on CPU mean( (testProb[1,:] .> 0.5) .== testY ) # Compute accuracy of prediction