#set working directory setwd(“~/Desktop/Labs”) library(readxl) NBATeam<-read_excel(“16-17 NBA Stats.xlsx”) View(NBATeam)

#Name the variables #Use FG% as a predictor variable won<-NBATeam\(W FieldGoalPrecent<-NBATeam\)FG%

#combine the variables in columns summary(cbind(won,FieldGoalPrecent)) boxplot(won, xlab=“Wins”, ylab=“# of Wins”) hist(won)

boxplot(FieldGoalPrecent, ylab=“FG%”) hist(FieldGoalPrecent) shapiro.test(FieldGoalPrecent) #normality test

model1<-lm(won~FieldGoalPrecent) summary(model1)

##Create a linear regression model of our data yHat<-function(x){ y=-163.15+x*446.51 return(y) }

##PLot of our data with linear regression line plot(FieldGoalPrecent, won) lines(FieldGoalPrecent, yHat(FieldGoalPrecent))

plot(model1)

###Using FG FieldGoal<-NBATeam$FG summary(cbind(won,FieldGoal)) boxplot(won, xlab=“Wins”, ylab=“# of Wins”) hist(won)

boxplot(FieldGoal) hist(FieldGoal) shapiro.test(FieldGoal) #normality test

model2<-lm(won~FieldGoal) summary(model2)

YHat<-function(x){ y=-192.76528+x*0.07301 return(y) }

plot(FieldGoal,won) lines(FieldGoal,YHat(FieldGoal)) plot(model2) ####P-Value too high FG% is better####

Offensive Rebounds

OffensiveRebounds<-NBATeam$ORB summary(cbind(won,OffensiveRebounds)) model3<-lm(won~OffensiveRebounds) summary(model3) yHat=function(x){ y=29.36118+x*0.01400 return(y) } plot(OffensiveRebounds, won) lines(OffensiveRebounds, yHat(OffensiveRebounds)) plot(model3)

###Defensive Rebounds DefensiveRebounds<-NBATeam$DRB summary(cbind(won,DefensiveRebounds)) model4<-lm(won~DefensiveRebounds) summary(model4)

yHat<-function(x){ y=-58.69678+x*0.03643 return(y) }

plot(DefensiveRebounds, won) lines(DefensiveRebounds, yHat(DefensiveRebounds)) plot(model4)

##Total Rebounds TotalRebounds<-NBATeam$TRB summary(cbind(won,TotalRebounds)) model5<-lm(won~TotalRebounds) summary(model5)

yHat<-function(x){ y=-46.02463+x*0.02439 return(y) }

plot(TotalRebounds, won) lines(TotalRebounds, yHat(TotalRebounds)) plot(model5) rmse <- sqrt(mean(TotalRebounds^2))

##########SIMPLIFED########## # Read the data NBATeam <- read_excel(“16-17 NBA Stats.xlsx”)

Define predictor variables

won <- NBATeam\(W FieldGoalPrecent <- NBATeam\)FG% FieldGoal <- NBATeam\(FG OffensiveRebounds <- NBATeam\)ORB DefensiveRebounds <- NBATeam\(DRB TotalRebounds <- NBATeam\)TRB

#perform linear regression for each predictor variable model1 <- lm(won ~ FieldGoalPrecent, data = NBATeam) model2 <- lm(won ~ FieldGoal, data = NBATeam) model3 <- lm(won ~ OffensiveRebounds, data = NBATeam) model4 <- lm(won ~ DefensiveRebounds, data = NBATeam) model5 <- lm(won ~ TotalRebounds, data = NBATeam)

Perform multiple linear regression

multmodel <- lm(won ~ OffensiveRebounds + DefensiveRebounds, data = NBATeam)

Plotting residuals QQ plots for each model

par(mfrow = c(2, 3)) # Set up a grid for multiple plots plot(model1, which = 2, main = “Field Goal Precent”) # QQ plot for residuals of model1 plot(model2, which = 2, main= “Field Goal”) # QQ plot for residuals of model2 plot(model3, which = 2, main = “Offensive Rebounds”) # QQ plot for residuals of model3 plot(model4, which = 2, main= “Defensive Rebounds”) # QQ plot for residuals of model4 plot(model5, which = 2, main = “Total Rebounds”) # QQ plot for residuals of model5 plot(multmodel, which = 2) # QQ plot for residuals of multimodel