** Load data **
# chagne working directory to where oldfaithful.csv is at
setwd("~/..")
# load Old Faithful Geyser Data
oldfaithful <- read.csv("faithful.csv", header = TRUE)
# preview oldfaithful data
head(oldfaithful,3)
## X eruptions waiting
## 1 1 3.600 79
## 2 2 1.800 54
## 3 3 3.333 74
2. Discuss some of the advantages and disadvantages of LOWESS from a computational standpoint..
# lowess regression for waiting ~ eruptions, we used color to show clusters of eruptions
with(data = oldfaithful,
plot(waiting ~ eruptions,
col=eruptions,
main="Scatter Plot for Waiting ~ Eruptions"))
# Add fit lines
abline(lm(waiting ~ eruptions, data = oldfaithful),col="black") # regression line (waiting~eruptions)
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.20), col="magenta") # lowess line (eruptions,waiting) 20% smoothing span
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.50), col="blue") # lowess line (eruptions,waiting) 50% smoothing span
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.80), col="brown") # lowess line (eruptions,waiting) 80% smoothing span
setwd("~/..")
# load Credit Card Default
credit.card <- read.csv("Default.csv", header = TRUE)
# do some clean up the data
credit.card$student <- as.numeric(credit.card$student, levels=c("No", "Yes"), labels=c(1,2)) # change student ("No", "Yes") to (0,1)
credit.card$default <- as.numeric(credit.card$default, levels=c("No", "Yes"), labels=c(1,2)) # change default ("No", "Yes") to (0,1)
credit.card <- credit.card[!(rowSums(is.na(credit.card))),] #remove rows that contain na
credit.card <- subset(credit.card, select = c(default, student, balance, income) ) # remove columns seq
# preview the final credit.card data
head(credit.card,3)
## default student balance income
## 1 1 1 729.5265 44361.63
## 2 1 2 817.1804 12106.13
## 3 1 1 1073.5492 31767.14
if (!require("scatterplot3d")) install.packages("scatterplot3d")
## Loading required package: scatterplot3d
# load scatterplot3d
library(scatterplot3d)
# compute multiple regression model
mr.model <- lm(default~ balance + student + income, data = credit.card)
# sumamry of the model
summary(mr.model)
##
## Call:
## lm(formula = default ~ balance + student + income, data = credit.card)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24610 -0.06979 -0.02645 0.02018 0.98542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.292e-01 1.308e-02 71.052 <2e-16 ***
## balance 1.327e-04 3.547e-06 37.412 <2e-16 ***
## student -1.033e-02 5.663e-03 -1.824 0.0682 .
## income 1.992e-07 1.917e-07 1.039 0.2990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.168 on 9996 degrees of freedom
## Multiple R-squared: 0.124, Adjusted R-squared: 0.1238
## F-statistic: 471.7 on 3 and 9996 DF, p-value: < 2.2e-16
# plot multiple regression
mr.model.3dplot <- scatterplot3d( credit.card$balance ,credit.card$student , credit.card$income, highlight.3d=TRUE, main="Multiple Regression 3D Scatterplot", xlab="balance", ylab="student", zlab="income")
mr.model.3dplot$plane3d(mr.model$coefficients[1]+c(mr.model$coefficients["balance"],2,8000), col = "blue")