Loading Packages
library(graphics)
library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## v purrr 0.3.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(knitr)
## Warning: package 'knitr' was built under R version 3.6.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.3
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(rstatix)
## Warning: package 'rstatix' was built under R version 3.6.3
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
Preparing Data
setwd("C:/Users/Daivik/Desktop/EDA/Assignments/Assignment 3")
nba <- read.csv ("nba2017-18.csv", header = TRUE, sep =,)
data <- subset(nba, select=c(PTS,TRB,MP,Age))
Question 1: Plot rebounds as a function of points, and add a regression line. Is there a strong relationship?
rebounds_fun_points <- ggplot(data, aes(x = TRB, y = PTS )) + geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Rebounds as a function of Points") +
xlab("Rebounds") +
ylab("Points") +
theme_bw()
rebounds_fun_points
## `geom_smooth()` using formula 'y ~ x'

Conclusion: From the above graph, we can see a strong relationship
Question 2: Plot rebounds per minute played as a function ofpoints per minute played. Is there a strong relationship?
rbpm_fun_ppm<- ggplot(data, aes(x = TRB/MP, y = PTS/MP)) + geom_point() +
geom_smooth(method = "lm", se=FALSE) +
ggtitle("Rebounds per minute as a function of Points per minute") +
xlab("Rebounds per minute") +
ylab("Points per minute") +
theme_bw()
rbpm_fun_ppm
## `geom_smooth()` using formula 'y ~ x'

Conclusion: From the graph, we can see a weak relationship
Question 4: Plot minutes played as a function of age, and add TWO different curves (not lines.) Describe,in words, how you think the average minutes played among NBA players changes with age
min_age <- ggplot(data, aes(x = MP, y = Age)) +
geom_jitter(height = 0.25, width = 0.5, size = 0.7) +
geom_smooth(method = "loess", span = 0.1, se = FALSE) +
geom_smooth(method = "loess", span = 0.3, se = FALSE) +
scale_x_log10() + scale_y_log10() +
ggtitle("Minutes Played as a function of Age") +
xlab("Minutes Played") +
ylab("Age") +
theme_bw()
min_age
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

fitting_min_age<- lm(MP~Age, data=data)
summary(fitting_min_age)
##
## Call:
## lm(formula = MP ~ Age, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1273.86 -819.15 -77.57 703.77 1971.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 511.130 232.496 2.198 0.0283 *
## Age 22.560 8.799 2.564 0.0106 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 852.3 on 538 degrees of freedom
## Multiple R-squared: 0.01207, Adjusted R-squared: 0.01023
## F-statistic: 6.573 on 1 and 538 DF, p-value: 0.01062
Conclusion: For an increase in Age, there is 22.5 minutes increase
Question 5: Plot minutes played as a function of age, and this time add a regression line. What does this line tell you?
ggplot(data,aes(x = MP, y = Age )) +
geom_jitter(height = 0.25, width = 0.5, size = 0.7) +
geom_smooth(method = "lm",se = FALSE) +
scale_x_log10() + scale_y_log10() +
ggtitle("Minutes Played as a function of Age") +
xlab("Minutes Played") +
ylab("Age") +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'

Conclusion: There are many players who have played the game for more than 100 minutes. The line gives us an understanding about the relationship between the two variables. As we keep increasing the number of minutes the player plays, the relationship with age becomes stronger. There is a slight increase in the age, when there is an increase in the number of minutes played.