rm(list = ls())
library(ggplot2)
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sigmoid <- function(z){1/(1+exp(-z))}
cost <- function(theta, X, y){
m <- length(y) # number of training examples
h <- sigmoid(X %*% theta)
J <- (t(-y)%*%log(h)-t(1-y)%*%log(1-h))/m
J
}
grad <- function(theta, X, y){
m <- length(y)
h <- sigmoid(X%*%theta)
grad <- (t(X)%*%(h - y))/m
grad
}
logisticReg <- function(X, y){
#remove NA rows
X <- na.omit(X)
y <- na.omit(y)
#add bias term and convert to matrix
X <- mutate(X, bias =1)
#move the bias column to col1
X <- as.matrix(X[, c(ncol(X), 1:(ncol(X)-1))])
y <- as.matrix(y)
#initialize theta
theta <- matrix(rep(0, ncol(X)), nrow = ncol(X))
#use the optim function to perform gradient descent
costOpti <- optim(theta, fn = cost, gr = grad, X = X, y = y)
#return coefficients
return(costOpti$par)
}
logisticProb <- function(theta, X){
X <- na.omit(X)
#add bias term and convert to matrix
X <- mutate(X, bias =1)
X <- as.matrix(X[,c(ncol(X), 1:(ncol(X)-1))])
return(sigmoid(X%*%theta))
}
# y prediction
logisticPred <- function(prob){
return(round(prob, 0))
}
set.seed(1024)
r1 = sqrt(runif(1500, min=0, max=1))
a1 = runif(1500, min=0, max=2*pi)
x1 = r1 * cos(a1) + rnorm(1500,mean=0,sd=0.2)
x2 = r1 * sin(a1) + rnorm(1500,mean=0,sd=0.2)
dt1 = data.frame(x1,x2)
dt1$y = 1
r2 = sqrt(runif(3500, min=2.5, max=4))
a2 = runif(3500, min=0, max=2*pi)
x1 = r2 * cos(a2)
x2 = r2 * sin(a2)
dt2 = data.frame(x1,x2)
dt2$y = 0
dtf = rbind(dt1,dt2)
#head(dt1)
ggplot(dtf) +
geom_point(aes(x=x1, y=x2,color = as.character(y) ), size = 2) +
scale_colour_discrete(name ="Label") +
xlim(-2.5, 2.5) +
ylim(-2.5, 2.5) + coord_fixed(ratio = 1) +
ggtitle('Data to be classified') +
theme_bw(base_size = 12)
Xd = dtf
X = Xd[,c(1,2)]
y = Xd$y
\(h_\theta(x) = g(\theta+\theta_1x_1+\theta_2x_2)\)
# training
Xm1 = X
theta1 <- logisticReg(Xm1, y)
# generate a grid for decision boundary, this is the test set
grid <- expand.grid(seq(-2.5, 2.5, length.out = 100), seq(-2.5, 2.5, length.out = 100))
colnames(grid) = c('x1','x2')
# predict the probability
probZ <- logisticProb(theta1, grid)
# predict the label
Z <- logisticPred(probZ)
gridPred = cbind(grid, Z)
# decision boundary visualization
M1_plot <- ggplot() +
geom_point(data = X, aes(x=x1, y=x2, color = as.character(y)), size = 2, show.legend = F) +
geom_tile(data = gridPred, aes(x = grid[, 1],y = grid[, 2], fill=as.character(Z)),
alpha = 0.3, show.legend = F) + ylim(-3, 3)+ xlim(-3, 3) +
ggtitle('Decision Boundary for Model I') +
coord_fixed(ratio = 1) +
theme_bw(base_size = 12)
M1_plot
\(h_\theta(x) = g(\theta+\theta_1x_1+\theta_2x_2 + \theta_3x_1^2 + \theta_4x_2^2)\)
Xm2 = X
Xm2$x3 = Xm2$x1^2
Xm2$x4 = Xm2$x2^2
# training
theta2 <- logisticReg(Xm2, y)
# generate a grid for decision boundary, this is the test set
grid <- expand.grid(seq(-2.5, 2.5, length.out = 100), seq(-2.5, 2.5, length.out = 100))
colnames(grid) = c('x1','x2')
grid$x3 = grid$x1^2
grid$x4 = grid$x2^2
# predict the probability
probZ <- logisticProb(theta2, grid)
# predict the label
Z <- logisticPred(probZ)
gridPred = cbind(grid, Z)
# decision boundary visualization
M2_plot <- ggplot() +
geom_point(data = X, aes(x=x1, y=x2, color = as.character(y)), size = 2, show.legend = F) +
geom_tile(data = gridPred, aes(x = grid[, 1],y = grid[, 2], fill=as.character(Z)),
alpha = 0.3, show.legend = F) + ylim(-3, 3)+ xlim(-3, 3) +
ggtitle('Decision Boundary for Model II') +
coord_fixed(ratio = 1) +
theme_bw(base_size = 12)
M2_plot
probZ1 <- logisticProb(theta1, Xm1)
Z1 <- logisticPred(probZ1)
mean(Z1==y)
## [1] 0.7
probZ2 <- logisticProb(theta2, Xm2)
Z2 <- logisticPred(probZ2)
mean(Z2==y)
## [1] 0.9994
We can see that with the quadratic term, the boundary is better.