488 Homework #6

Question #1

#Make sure to pick a good bin width for the histogram.
#Make sure to pick a good bandwith for the kernel smoother.
kcTemp <- c(43.8,40.1,49.2,41.8,34.0,49.1,47.8,48.1,37.6,42.0,
43.7,47.1,47.7,46.9,36.5,45.0,48.0,37.6,42.2,38.7,
45.2,42.5,43.1,36.0,47.4,48.5,47.1,43.2,43.8,45.7)

n<-100
x = rnorm(n) 

hist(kcTemp, main="Delta=1.5", freq=FALSE)  
dens<-density(kcTemp, bw=1.5) 
points(dens$x, dens$y, type="l", col="cyan4", lwd=3)

Question #2

dat2 <- read.csv("https://www.dropbox.com/s/s4cp6q7xpdli7na/HW6stateData.csv?dl=1")

# Initial plot
plot(dat2$HS.Grad, dat2$Income)

Plot HS graduation rate (x-axis) vs per capita income (y-axis)
Fit a simple linear regression model to this data. Plot this line over the points.

lin.mod <- lm(dat2$Income ~ dat2$HS.Grad)

plot(dat2$HS.Grad, dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col='cyan4', lwd=2)

Fit a linear regression model with a quadratic effect for HS graduation rate to this data. Plot this line over the points.

dat2$HS.Grad.2 = dat2$HS.Grad^2
quad.mod = lm(Income~HS.Grad + HS.Grad.2, data=dat2)

x <- seq(25, 75, 0.1)
pred = predict(quad.mod, data.frame(HS.Grad=x, HS.Grad.2=x^2))

plot(dat2$HS.Grad, dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col="cyan4", lwd=2)
lines(x, pred, col="salmon", lwd=2, lty=3)
legend("topleft", lwd=2, lty=c(1, 3), col=c("cyan4", "salmon"), legend=c("Linear", "Quadratic"))

Fit a loess model to this data with an appropriate bandwidth. Plot this line over the points.

ls<-loess(Income~HS.Grad, span=1, degree=2, data=dat2)
pred2 = predict(ls, data.frame(HS.Grad = x))

plot(dat2$HS.Grad,dat2$Income, main="Graduation Rate v. Income", ylab="Income", xlab="Graduation Rate")
par(new=T)
abline(lin.mod, col="cyan4", lwd=2, lty=1)
lines(x, pred, col="salmon", lwd=2, lty=2)
lines(x, pred2, col="darkorchid4", lwd=2, lty=5)
legend("topleft", lwd=2, lty=c(1, 2, 5), col=c("cyan4", "salmon", "darkorchid4"), legend=c("Linear", "Quadratic", "Loess"))

Comments on the similarities and differences between these three different fits.

Well, it was clear immediately that the linear model was going to have a poor fit. With just an initial plot of the data you can tell that the slope is increasing at a different speed as graduation rate and income increase. The quadratic model fits the data better and forms a sort of concave-down shape that you suspect is present just by looking at the plotted data points. The loess model definitely fits the data best, as it captured a lot of the intricacies going on with the data at the 55 - 65 graduation rate that the quadratic model missed.

Question #3

set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,.3)

ls<-loess(y~x, span=0.15, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")

ls<-loess(y~x, span=0.2, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")

ls<-loess(y~x, span=0.5, degree=2)
plot(x, y)
par(new=T)
j=order(x)
points(x[j], ls$fitted[j], col="firebrick", lwd=2, type="l")

A reasonable span range for this data would be somewhere between 0.2 and 0.6.

Question #4

As sigma increased and more noise was introduced, the lower bound of our span range kept increasing. Aka, we needed a higher span to produce the same level of “smoothness” compared to data with a lower value of σ/less noise

#sigma is the amount of noise added.
set.seed(1234)
sigma.a<-.1
sigma.b<-.5
sigma.c<-1
sigma.d<-1.5
x <- runif(100,0,10)

σ equal to 0.1

y <- sin(x)+rnorm(100,0,sigma.a)
ls0.1<-loess(y~x, span=.15, degree=2)
plot(x,y, main="σ = 0.1")
par(new=T)
j=order(x)
points(x[j], ls0.1$fitted[j], col="cyan4", lwd=3, type="l")

Reasonable span range for σ=0.1 is 0.15 to 0.45

σ equal to 0.5

set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.b)

ls0.5<-loess(y~x, span=.2, degree=2)
plot(x,y, main="σ = 0.5")
par(new=T)
j=order(x)
points(x[j], ls0.5$fitted[j], col="cyan4", lwd=3, type="l")

Reasonable span range for σ=0.5 is 0.2 to 0.6

σ equal to 1

set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.c)

ls1<-loess(y~x, span=.35, degree=2)
plot(x,y, main="σ = 1.0")
par(new=T)
j=order(x)
points(x[j], ls1$fitted[j], col="cyan4", lwd=3, type="l")

Reasonable span range for σ=1 is 0.35 to 0.75

σ equal to 1.5

set.seed(1234)
x <- runif(100,0,10)
y <- sin(x)+rnorm(100,0,sigma.d)

ls1.5<-loess(y~x, span=.5, degree=2)
plot(x,y, main="σ = 1.5")
par(new=T)
j=order(x)
points(x[j], ls1.5$fitted[j], col="cyan4", lwd=3, type="l")

Reasonable span range for σ=1.5 is 0.5 to 0.9

488 Homework #6

Sofia Bello

4/14/2019