** Load data **

# chagne working directory to where oldfaithful.csv is at
setwd("~/..")

# load  Old Faithful Geyser Data
oldfaithful <- read.csv("faithful.csv", header = TRUE)

# preview oldfaithful data
head(oldfaithful,3)
##   X eruptions waiting
## 1 1     3.600      79
## 2 2     1.800      54
## 3 3     3.333      74

2. Discuss some of the advantages and disadvantages of LOWESS from a computational standpoint..

# lowess regression for waiting  ~ eruptions, we used color to show clusters of eruptions
with(data = oldfaithful,
     plot(waiting ~ eruptions, 
     col=eruptions,
     main="Scatter Plot for Waiting  ~ Eruptions"))

# Add fit lines
abline(lm(waiting ~ eruptions, data = oldfaithful),col="black")  # regression line (waiting~eruptions) 
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.20), col="magenta") # lowess line (eruptions,waiting) 20% smoothing span
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.50), col="blue") # lowess line (eruptions,waiting) 50% smoothing span
lines(lowess(oldfaithful$eruptions , oldfaithful$waiting,f=0.80), col="brown") # lowess line (eruptions,waiting) 80% smoothing span

Loading data

setwd("~/..")
# load  Credit Card Default
credit.card <- read.csv("Default.csv", header = TRUE)
# do some clean up the data
credit.card$student <- as.numeric(credit.card$student, levels=c("No", "Yes"), labels=c(1,2)) # change student ("No", "Yes") to (0,1)
credit.card$default <- as.numeric(credit.card$default, levels=c("No", "Yes"), labels=c(1,2)) # change default ("No", "Yes") to (0,1)
credit.card <- credit.card[!(rowSums(is.na(credit.card))),] #remove rows that contain na
credit.card <- subset(credit.card, select = c(default, student, balance, income) ) # remove columns seq
# preview the final credit.card data
head(credit.card,3)
##   default student   balance   income
## 1       1       1  729.5265 44361.63
## 2       1       2  817.1804 12106.13
## 3       1       1 1073.5492 31767.14
if (!require("scatterplot3d")) install.packages("scatterplot3d")
## Loading required package: scatterplot3d
# load scatterplot3d
library(scatterplot3d)
# compute multiple regression model
mr.model <- lm(default~ balance + student + income, data = credit.card)
# sumamry of the model
summary(mr.model)
## 
## Call:
## lm(formula = default ~ balance + student + income, data = credit.card)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24610 -0.06979 -0.02645  0.02018  0.98542 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.292e-01  1.308e-02  71.052   <2e-16 ***
## balance      1.327e-04  3.547e-06  37.412   <2e-16 ***
## student     -1.033e-02  5.663e-03  -1.824   0.0682 .  
## income       1.992e-07  1.917e-07   1.039   0.2990    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.168 on 9996 degrees of freedom
## Multiple R-squared:  0.124,  Adjusted R-squared:  0.1238 
## F-statistic: 471.7 on 3 and 9996 DF,  p-value: < 2.2e-16
# plot multiple regression
mr.model.3dplot <- scatterplot3d( credit.card$balance ,credit.card$student , credit.card$income,  highlight.3d=TRUE, main="Multiple Regression 3D Scatterplot", xlab="balance", ylab="student", zlab="income")
mr.model.3dplot$plane3d(mr.model$coefficients[1]+c(mr.model$coefficients["balance"],2,8000), col = "blue")