#Make sure to pick a good bin width for the histogram.
#Make sure to pick a good bandwith for the kernel smoother.
kcTemp <- c(43.8,40.1,49.2,41.8,34.0,49.1,47.8,48.1,37.6,42.0,
43.7,47.1,47.7,46.9,36.5,45.0,48.0,37.6,42.2,38.7,
45.2,42.5,43.1,36.0,47.4,48.5,47.1,43.2,43.8,45.7)
n<-100
x = rnorm(n)
hist(kcTemp, main="Delta=1.5", freq=FALSE)
dens<-density(kcTemp, bw=1.5)
points(dens$x, dens$y, type="l", col="cyan4", lwd=3)
dat2 <- read.csv("https://www.dropbox.com/s/s4cp6q7xpdli7na/HW6stateData.csv?dl=1")
# Initial plot
plot(dat2$HS.Grad, dat2$Income)
lin.mod <- lm(dat2$Income ~ dat2$HS.Grad)
plot(dat2$HS.Grad, dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col='cyan4', lwd=2)
dat2$HS.Grad.2 = dat2$HS.Grad^2
quad.mod = lm(Income~HS.Grad + HS.Grad.2, data=dat2)
x <- seq(25, 75, 0.1)
pred = predict(quad.mod, data.frame(HS.Grad=x, HS.Grad.2=x^2))
plot(dat2$HS.Grad, dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col="cyan4", lwd=2)
lines(x, pred, col="salmon", lwd=2, lty=3)
legend("topleft", lwd=2, lty=c(1, 3), col=c("cyan4", "salmon"), legend=c("Linear", "Quadratic"))
ls<-loess(Income~HS.Grad, span=1, degree=2, data=dat2)
pred2 = predict(ls, data.frame(HS.Grad = x))
plot(dat2$HS.Grad,dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col="cyan4", lwd=2, lty=1)
lines(x, pred, col="salmon", lwd=2, lty=2)
lines(x, pred2, col="darkorchid4", lwd=2, lty=5)
legend("topleft", lwd=2, lty=c(1, 2, 5), col=c("cyan4", "salmon", "darkorchid4"), legend=c("Linear", "Quadratic", "Loess"))
Well, it was clear immediately that the linear model was going to have a poor fit. With just an initial plot of the data you can tell that the slope is increasing at a different speed as graduation rate and income increase. The quadratic model fits the data better and forms a sort of concave-down shape that you suspect is present just by looking at the plotted data points. The loess model definitely fits the data best, as it captured a lot of the intricacies going on with the data at the 55 - 65 graduation rate that the quadratic model missed.
set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,.3)
ls<-loess(y~x, span=0.15, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")
ls<-loess(y~x, span=0.2, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")
ls<-loess(y~x, span=0.5, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")
A reasonable span range for this data would be somewhere between 0.2 and 0.6.
As sigma increased and more noise was introduced, the lower bound of our span range kept increasing. Aka, we needed a higher span to produce the same level of “smoothness” compared to data with a lower value of σ/less noise
#sigma is the amount of noise added.
set.seed(1234)
sigma.a<-.1
sigma.b<-.5
sigma.c<-1
sigma.d<-1.5
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.a)
ls0.1<-loess(y~x, span=.15, degree=2)
plot(x,y, main="σ = 0.1")
par(new=T)
j=order(x)
points(x[j], ls0.1$fitted[j], col="cyan4", lwd=3, type="l")
Reasonable span range for σ=0.1 is 0.15 to 0.45
set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.b)
ls0.5<-loess(y~x, span=.2, degree=2)
plot(x,y, main="σ = 0.5")
par(new=T)
j=order(x)
points(x[j], ls0.5$fitted[j], col="cyan4", lwd=3, type="l")
Reasonable span range for σ=0.5 is 0.2 to 0.6
set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.c)
ls1<-loess(y~x, span=.35, degree=2)
plot(x,y, main="σ = 1.0")
par(new=T)
j=order(x)
points(x[j], ls1$fitted[j], col="cyan4", lwd=3, type="l")
Reasonable span range for σ=1 is 0.35 to 0.75
set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.d)
ls1.5<-loess(y~x, span=.5, degree=2)
plot(x,y, main="σ = 1.5")
par(new=T)
j=order(x)
points(x[j], ls1.5$fitted[j], col="cyan4", lwd=3, type="l")
Reasonable span range for σ=1.5 is 0.5 to 0.9