#Correlation is a statistical concept that measures the degree to which two or more variables tend to fluctuate together. A positive correlation indicates that both variables tend to increase or decrease in tandem, while a negative correlation signifies that one variable tends to increase as the other decreases. The Pearson correlation coefficient, represented by ‘r’, is a widely used statistical measure that quantifies the strength and direction of a linear relationship between two continuous variables. This coefficient ranges from -1 (perfect negative correlation) to +1 (perfect positive correlation), with 0 indicating no linear correlation.
library(readr)
setwd("C:/Users/user/Downloads/week12")
MData <- read.csv("jordan_playoffs.csv") # Downloading M Data
# Lebron Regular Season Data
LData <- read.csv("lebron_playoffs.csv") # Downloading L Data
library(visdat)
vis_dat(MData)
vis_dat(LData)
#Merge Chapter and Dialogue
Merged = merge(x = MData,
y = LData,
by = c("trb"))
vis_dat(Merged)
# 4 4. Create a summary statistics table of the merged dataset, and the
two unmerged datasets (so that one can see the before/after datasets).
You can use stargazerLinks to an external site. package (you will have
to load the package first if you do not have it installed). This will
give the reader some idea about the variables in your data, and their
distribution.
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(Merged,
type = "text",
title = "Summary")
##
## Summary
## =================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------
## trb 3,606 7.244 2.035 1 19
## game.x 3,606 8.792 5.399 1 22
## series_game.x 3,606 3.069 1.649 1 7
## mp.x 3,606 41.761 3.849 29 57
## fg.x 3,606 12.286 3.970 3 24
## fga.x 3,606 25.288 6.129 8 45
## fgp.x 3,606 0.484 0.099 0.167 0.833
## three.x 3,606 0.773 1.011 0 6
## threeatt.x 3,606 2.344 2.153 0 11
## threep.x 2,909 0.323 0.323 0.000 1.000
## ft.x 3,606 8.039 3.984 0 23
## fta.x 3,606 9.798 4.575 0 28
## ftp.x 3,595 0.814 0.160 0.250 1.000
## orb.x 3,606 1.906 1.367 0 8
## drb.x 3,606 5.338 1.837 1 15
## ast.x 3,606 5.837 2.972 1 14
## stl.x 3,606 2.142 1.416 0 6
## blk.x 3,606 0.907 1.056 0 5
## tov.x 3,606 3.103 1.759 0 8
## pts.x 3,606 33.384 8.888 15 63
## game_score.x 3,606 25.361 8.002 2.600 49.800
## game.y 3,606 9.748 5.587 1 23
## series_game.y 3,606 3.134 1.640 1 7
## mp.y 3,606 40.979 4.737 24 53
## fg.y 3,606 9.996 3.321 2 20
## fga.y 3,606 19.940 4.997 10 38
## fgp.y 3,606 0.503 0.118 0.111 0.846
## three.y 3,606 1.491 1.329 0 7
## threeatt.y 3,606 4.446 2.163 0 12
## threep.y 3,544 0.314 0.245 0.000 1.000
## ft.y 3,606 6.503 3.724 0 18
## fta.y 3,606 8.763 4.328 0 24
## ftp.y 3,591 0.727 0.190 0.000 1.000
## orb.y 3,606 1.160 1.175 0 6
## drb.y 3,606 6.083 2.017 1 16
## ast.y 3,606 6.730 2.675 1 14
## stl.y 3,606 1.771 1.227 0 6
## blk.y 3,606 0.935 1.030 0 5
## tov.y 3,606 3.531 2.020 0 10
## pts.y 3,606 27.984 8.035 7 51
## game_score.y 3,606 22.417 8.259 -0.700 44.700
## plus_minus.y 3,606 4.692 15.812 -32 46
## -------------------------------------------------
summary(Merged)
## trb game.x date.x series.x
## Min. : 1.000 Min. : 1.000 Length:3606 Length:3606
## 1st Qu.: 6.000 1st Qu.: 4.000 Class :character Class :character
## Median : 7.000 Median : 9.000 Mode :character Mode :character
## Mean : 7.244 Mean : 8.792
## 3rd Qu.: 8.000 3rd Qu.:13.000
## Max. :19.000 Max. :22.000
##
## series_game.x team.x opp.x result.x
## Min. :1.000 Length:3606 Length:3606 Length:3606
## 1st Qu.:2.000 Class :character Class :character Class :character
## Median :3.000 Mode :character Mode :character Mode :character
## Mean :3.069
## 3rd Qu.:4.000
## Max. :7.000
##
## mp.x fg.x fga.x fgp.x
## Min. :29.00 Min. : 3.00 Min. : 8.00 Min. :0.1670
## 1st Qu.:40.00 1st Qu.: 9.00 1st Qu.:21.00 1st Qu.:0.4230
## Median :42.00 Median :12.00 Median :25.00 Median :0.4860
## Mean :41.76 Mean :12.29 Mean :25.29 Mean :0.4838
## 3rd Qu.:44.00 3rd Qu.:15.00 3rd Qu.:29.00 3rd Qu.:0.5450
## Max. :57.00 Max. :24.00 Max. :45.00 Max. :0.8330
##
## three.x threeatt.x threep.x ft.x
## Min. :0.0000 Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.: 1.000 1st Qu.:0.0000 1st Qu.: 5.000
## Median :1.0000 Median : 2.000 Median :0.3330 Median : 8.000
## Mean :0.7726 Mean : 2.344 Mean :0.3229 Mean : 8.039
## 3rd Qu.:1.0000 3rd Qu.: 4.000 3rd Qu.:0.5000 3rd Qu.:11.000
## Max. :6.0000 Max. :11.000 Max. :1.0000 Max. :23.000
## NA's :697
## fta.x ftp.x orb.x drb.x
## Min. : 0.000 Min. :0.2500 Min. :0.000 Min. : 1.000
## 1st Qu.: 6.000 1st Qu.:0.7140 1st Qu.:1.000 1st Qu.: 4.000
## Median :10.000 Median :0.8460 Median :2.000 Median : 5.000
## Mean : 9.798 Mean :0.8142 Mean :1.906 Mean : 5.338
## 3rd Qu.:13.000 3rd Qu.:0.9330 3rd Qu.:3.000 3rd Qu.: 6.000
## Max. :28.000 Max. :1.0000 Max. :8.000 Max. :15.000
## NA's :11
## ast.x stl.x blk.x tov.x
## Min. : 1.000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.: 4.000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:2.000
## Median : 5.000 Median :2.000 Median :1.0000 Median :3.000
## Mean : 5.837 Mean :2.142 Mean :0.9074 Mean :3.103
## 3rd Qu.: 8.000 3rd Qu.:3.000 3rd Qu.:1.0000 3rd Qu.:4.000
## Max. :14.000 Max. :6.000 Max. :5.0000 Max. :8.000
##
## pts.x game_score.x plus_minus.x game.y
## Min. :15.00 Min. : 2.60 Mode:logical Min. : 1.000
## 1st Qu.:28.00 1st Qu.:20.00 NA's:3606 1st Qu.: 5.000
## Median :32.00 Median :24.60 Median : 9.000
## Mean :33.38 Mean :25.36 Mean : 9.748
## 3rd Qu.:38.00 3rd Qu.:31.00 3rd Qu.:14.000
## Max. :63.00 Max. :49.80 Max. :23.000
##
## date.y series.y series_game.y team.y
## Length:3606 Length:3606 Min. :1.000 Length:3606
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :3.000 Mode :character
## Mean :3.134
## 3rd Qu.:4.000
## Max. :7.000
##
## opp.y result.y mp.y fg.y
## Length:3606 Length:3606 Min. :24.00 Min. : 2.000
## Class :character Class :character 1st Qu.:39.00 1st Qu.: 8.000
## Mode :character Mode :character Median :41.00 Median : 9.000
## Mean :40.98 Mean : 9.996
## 3rd Qu.:44.00 3rd Qu.:12.000
## Max. :53.00 Max. :20.000
##
## fga.y fgp.y three.y threeatt.y
## Min. :10.00 Min. :0.1110 Min. :0.000 Min. : 0.000
## 1st Qu.:16.00 1st Qu.:0.4290 1st Qu.:0.000 1st Qu.: 3.000
## Median :19.00 Median :0.5000 Median :1.000 Median : 4.000
## Mean :19.94 Mean :0.5026 Mean :1.491 Mean : 4.446
## 3rd Qu.:23.00 3rd Qu.:0.5880 3rd Qu.:2.000 3rd Qu.: 6.000
## Max. :38.00 Max. :0.8460 Max. :7.000 Max. :12.000
##
## threep.y ft.y fta.y ftp.y
## Min. :0.000 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.:0.125 1st Qu.: 4.000 1st Qu.: 6.000 1st Qu.:0.6250
## Median :0.333 Median : 6.000 Median : 9.000 Median :0.7500
## Mean :0.314 Mean : 6.503 Mean : 8.763 Mean :0.7272
## 3rd Qu.:0.500 3rd Qu.: 9.000 3rd Qu.:11.000 3rd Qu.:0.8750
## Max. :1.000 Max. :18.000 Max. :24.000 Max. :1.0000
## NA's :62 NA's :15
## orb.y drb.y ast.y stl.y
## Min. :0.00 Min. : 1.000 Min. : 1.00 Min. :0.000
## 1st Qu.:0.00 1st Qu.: 5.000 1st Qu.: 5.00 1st Qu.:1.000
## Median :1.00 Median : 6.000 Median : 7.00 Median :2.000
## Mean :1.16 Mean : 6.083 Mean : 6.73 Mean :1.771
## 3rd Qu.:2.00 3rd Qu.: 7.000 3rd Qu.: 8.00 3rd Qu.:2.000
## Max. :6.00 Max. :16.000 Max. :14.00 Max. :6.000
##
## blk.y tov.y pts.y game_score.y
## Min. :0.0000 Min. : 0.000 Min. : 7.00 Min. :-0.70
## 1st Qu.:0.0000 1st Qu.: 2.000 1st Qu.:22.00 1st Qu.:16.60
## Median :1.0000 Median : 3.000 Median :27.00 Median :22.30
## Mean :0.9348 Mean : 3.531 Mean :27.98 Mean :22.42
## 3rd Qu.:1.0000 3rd Qu.: 5.000 3rd Qu.:32.00 3rd Qu.:27.40
## Max. :5.0000 Max. :10.000 Max. :51.00 Max. :44.70
##
## plus_minus.y
## Min. :-32.000
## 1st Qu.: -6.000
## Median : 6.000
## Mean : 4.692
## 3rd Qu.: 16.000
## Max. : 46.000
##
correlation = cor(Merged$game_score.x,
Merged$three.x,
)
correlation
## [1] 0.1747846
covariance = cov(Merged$game_score.x,
Merged$three.x,
)
covariance
## [1] 1.41332
#The correlation of 0.1748 between game_score.x and three.x shows a weak positive connection. It means that if one variable goes up, the other one might go up a bit too, but not by much. The covariance of 1.41332 supports this by suggesting that they both tend to increase together.