#set working directory setwd(“~/Desktop/Labs”) library(readxl) NBATeam<-read_excel(“16-17 NBA Stats.xlsx”) View(NBATeam)
#Name the variables #Use FG% as a predictor variable
won<-NBATeam\(W
FieldGoalPrecent<-NBATeam\)FG%
#combine the variables in columns summary(cbind(won,FieldGoalPrecent)) boxplot(won, xlab=“Wins”, ylab=“# of Wins”) hist(won)
boxplot(FieldGoalPrecent, ylab=“FG%”) hist(FieldGoalPrecent) shapiro.test(FieldGoalPrecent) #normality test
model1<-lm(won~FieldGoalPrecent) summary(model1)
##Create a linear regression model of our data yHat<-function(x){ y=-163.15+x*446.51 return(y) }
##PLot of our data with linear regression line plot(FieldGoalPrecent, won) lines(FieldGoalPrecent, yHat(FieldGoalPrecent))
plot(model1)
###Using FG FieldGoal<-NBATeam$FG summary(cbind(won,FieldGoal)) boxplot(won, xlab=“Wins”, ylab=“# of Wins”) hist(won)
boxplot(FieldGoal) hist(FieldGoal) shapiro.test(FieldGoal) #normality test
model2<-lm(won~FieldGoal) summary(model2)
YHat<-function(x){ y=-192.76528+x*0.07301 return(y) }
plot(FieldGoal,won) lines(FieldGoal,YHat(FieldGoal)) plot(model2) ####P-Value too high FG% is better####
OffensiveRebounds<-NBATeam$ORB summary(cbind(won,OffensiveRebounds)) model3<-lm(won~OffensiveRebounds) summary(model3) yHat=function(x){ y=29.36118+x*0.01400 return(y) } plot(OffensiveRebounds, won) lines(OffensiveRebounds, yHat(OffensiveRebounds)) plot(model3)
###Defensive Rebounds DefensiveRebounds<-NBATeam$DRB summary(cbind(won,DefensiveRebounds)) model4<-lm(won~DefensiveRebounds) summary(model4)
yHat<-function(x){ y=-58.69678+x*0.03643 return(y) }
plot(DefensiveRebounds, won) lines(DefensiveRebounds, yHat(DefensiveRebounds)) plot(model4)
##Total Rebounds TotalRebounds<-NBATeam$TRB summary(cbind(won,TotalRebounds)) model5<-lm(won~TotalRebounds) summary(model5)
yHat<-function(x){ y=-46.02463+x*0.02439 return(y) }
plot(TotalRebounds, won) lines(TotalRebounds, yHat(TotalRebounds)) plot(model5) rmse <- sqrt(mean(TotalRebounds^2))
##########SIMPLIFED########## # Read the data NBATeam <- read_excel(“16-17 NBA Stats.xlsx”)
won <- NBATeam\(W FieldGoalPrecent <-
NBATeam\)FG% FieldGoal <- NBATeam\(FG OffensiveRebounds <- NBATeam\)ORB
DefensiveRebounds <- NBATeam\(DRB
TotalRebounds <- NBATeam\)TRB
#perform linear regression for each predictor variable model1 <- lm(won ~ FieldGoalPrecent, data = NBATeam) model2 <- lm(won ~ FieldGoal, data = NBATeam) model3 <- lm(won ~ OffensiveRebounds, data = NBATeam) model4 <- lm(won ~ DefensiveRebounds, data = NBATeam) model5 <- lm(won ~ TotalRebounds, data = NBATeam)
multmodel <- lm(won ~ OffensiveRebounds + DefensiveRebounds, data = NBATeam)
par(mfrow = c(2, 3)) # Set up a grid for multiple plots plot(model1, which = 2, main = “Field Goal Precent”) # QQ plot for residuals of model1 plot(model2, which = 2, main= “Field Goal”) # QQ plot for residuals of model2 plot(model3, which = 2, main = “Offensive Rebounds”) # QQ plot for residuals of model3 plot(model4, which = 2, main= “Defensive Rebounds”) # QQ plot for residuals of model4 plot(model5, which = 2, main = “Total Rebounds”) # QQ plot for residuals of model5 plot(multmodel, which = 2) # QQ plot for residuals of multimodel