Loading Packages

library(graphics)
library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## v purrr   0.3.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)
## Warning: package 'knitr' was built under R version 3.6.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.3
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(rstatix)
## Warning: package 'rstatix' was built under R version 3.6.3
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter

Preparing Data

setwd("C:/Users/Daivik/Desktop/EDA/Assignments/Assignment 3")

nba <- read.csv ("nba2017-18.csv", header = TRUE, sep =,)
data <- subset(nba, select=c(PTS,TRB,MP,Age))

Question 1: Plot rebounds as a function of points, and add a regression line. Is there a strong relationship?

rebounds_fun_points <- ggplot(data, aes(x = TRB, y = PTS )) + geom_point() + 
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Rebounds as a function of Points") + 
  xlab("Rebounds") + 
  ylab("Points") + 
  theme_bw()
rebounds_fun_points
## `geom_smooth()` using formula 'y ~ x'

Conclusion: From the above graph, we can see a strong relationship

Question 2: Plot rebounds per minute played as a function ofpoints per minute played. Is there a strong relationship?

rbpm_fun_ppm<- ggplot(data, aes(x = TRB/MP, y = PTS/MP)) + geom_point() + 
  geom_smooth(method = "lm", se=FALSE) + 
  ggtitle("Rebounds per minute as a function of Points per minute") + 
  xlab("Rebounds per minute") + 
  ylab("Points per minute") + 
  theme_bw()
rbpm_fun_ppm
## `geom_smooth()` using formula 'y ~ x'

Conclusion: From the graph, we can see a weak relationship

Question 3: Can you find a way of showing (graphically or mathematically) that there really is a weak but meaningful relationship between points per minute played and rebounds per minute played? Is this relationship positive or negative?

Rebounds_per_minute<- data$TRB/data$MP
Points_per_minute<- data$PTS/data$MP
data$Rebounds_per_minute<- Rebounds_per_minute
data$Points_per_minute<- Points_per_minute
fitting_lm<- lm(Points_per_minute~Rebounds_per_minute, data=data)
plot(fitting_lm)

summary(fitting_lm)
## 
## Call:
## lm(formula = Points_per_minute ~ Rebounds_per_minute, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44670 -0.09754 -0.01050  0.07975  2.61653 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.38347    0.01697  22.594   <2e-16 ***
## Rebounds_per_minute  0.06323    0.08355   0.757     0.45    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1897 on 538 degrees of freedom
## Multiple R-squared:  0.001063,   Adjusted R-squared:  -0.0007935 
## F-statistic: 0.5727 on 1 and 538 DF,  p-value: 0.4495

Conclusion: For an increase in Rebound per minute, there is an increase of 0.0623 Points per minute

Question 4: Plot minutes played as a function of age, and add TWO different curves (not lines.) Describe,in words, how you think the average minutes played among NBA players changes with age

min_age <- ggplot(data, aes(x = MP, y = Age)) + 
  geom_jitter(height = 0.25, width = 0.5, size = 0.7) + 
  geom_smooth(method = "loess", span = 0.1, se = FALSE) + 
  geom_smooth(method = "loess", span = 0.3, se = FALSE) + 
  scale_x_log10() + scale_y_log10() + 
  ggtitle("Minutes Played as a function of Age") + 
  xlab("Minutes Played") + 
  ylab("Age") + 
  theme_bw()
min_age
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

fitting_min_age<- lm(MP~Age, data=data)
summary(fitting_min_age)
## 
## Call:
## lm(formula = MP ~ Age, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1273.86  -819.15   -77.57   703.77  1971.55 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  511.130    232.496   2.198   0.0283 *
## Age           22.560      8.799   2.564   0.0106 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 852.3 on 538 degrees of freedom
## Multiple R-squared:  0.01207,    Adjusted R-squared:  0.01023 
## F-statistic: 6.573 on 1 and 538 DF,  p-value: 0.01062

Conclusion: For an increase in Age, there is 22.5 minutes increase

Question 5: Plot minutes played as a function of age, and this time add a regression line. What does this line tell you?

ggplot(data,aes(x = MP, y = Age )) + 
  geom_jitter(height = 0.25, width = 0.5, size = 0.7) + 
  geom_smooth(method = "lm",se = FALSE) + 
  scale_x_log10() + scale_y_log10() + 
  ggtitle("Minutes Played as a function of Age") + 
  xlab("Minutes Played") + 
  ylab("Age") + 
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

Conclusion: There are many players who have played the game for more than 100 minutes. The line gives us an understanding about the relationship between the two variables. As we keep increasing the number of minutes the player plays, the relationship with age becomes stronger. There is a slight increase in the age, when there is an increase in the number of minutes played.