Prepping Data for EDA
library(readxl)
Conc <- read_excel("~/Desktop/NCEA/NCEA.xlsx")
## New names:
## * `` -> ...7
## * `` -> ...14
## * `` -> ...19
## * `` -> ...23
## * `` -> ...30
#View(Conc)
Conc<-Conc[,-c(7,14,19,23,30)]
str(Conc)
## tibble [123 × 28] (S3: tbl_df/tbl/data.frame)
## $ Entry # : num [1:123] 198 197 196 195 194 193 192 191 190 189 ...
## $ BMI : num [1:123] 26 20.6 22 20.2 21 ...
## $ BMI Ordinal classification : num [1:123] 3 2 2 2 2 2 3 2 2 2 ...
## $ Concussion Raw : chr [1:123] "1" "0" "0" "0" ...
## $ Concussion Binary : num [1:123] 1 2 2 2 2 2 2 1 2 1 ...
## $ Concussion Ordinal : num [1:123] 1 0 0 0 0 0 0 1 0 1 ...
## $ P! Arth Nk Binary : num [1:123] 2 2 2 2 2 1 2 2 2 1 ...
## $ P! Arth Nk Ordinal : num [1:123] 0 0 0 0 0 1 0 0 0 1 ...
## $ P! Athr TS Binary : num [1:123] 1 2 2 2 2 2 2 2 2 2 ...
## $ P! Athr TS Ordinal : num [1:123] 2 0 0 0 0 0 0 0 0 0 ...
## $ P! Arth LS/Tailbone Binary : num [1:123] 2 2 2 2 2 2 2 2 2 1 ...
## $ P! Arth LS/Tailbone Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ R Sh RC P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ L Sh RC P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ R Wrist Fx. Binary : num [1:123] 1 0 0 0 0 0 0 0 0 0 ...
## $ L Wrist Fx. Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Generalized kn P! Ordinal (non arth): num [1:123] 0 1 0 0 0 0 0 0 0 0 ...
## $ L. Generalized kn P! Ordinal (non arth): num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Kn P! Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Ankle Spr Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ L. Ankle Spr Ordinal: : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Ankle Sprain Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Ankle P! Ordinal : num [1:123] 0 0 0 0 0 3 0 0 0 0 ...
## $ L. Ankle P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Ankle P! Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Hp P!/Impingement Ordinal : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
## $ L. Hp P!/Impingement Ordinal : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
## $ B. Hip P! Binary : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
Conc$`Entry #`<-as.character(Conc$`Entry #`)
Conc$`Concussion Raw`<-as.numeric(Conc$`Concussion Raw`)
## Warning: NAs introduced by coercion
Conc$`BMI Ordinal classification`<-as.factor(Conc$`BMI Ordinal classification`)
Conc$`Concussion Binary`<-as.factor(Conc$`Concussion Binary`)
Conc$`Concussion Ordinal`<-as.factor(Conc$`Concussion Ordinal`)
Conc$`P! Arth Nk Ordinal`<-as.factor(Conc$`P! Arth Nk Ordinal`)
str(Conc)
## tibble [123 × 28] (S3: tbl_df/tbl/data.frame)
## $ Entry # : chr [1:123] "198" "197" "196" "195" ...
## $ BMI : num [1:123] 26 20.6 22 20.2 21 ...
## $ BMI Ordinal classification : Factor w/ 4 levels "1","2","3","4": 3 2 2 2 2 2 3 2 2 2 ...
## $ Concussion Raw : num [1:123] 1 0 0 0 0 0 0 1 0 1 ...
## $ Concussion Binary : Factor w/ 2 levels "1","2": 1 2 2 2 2 2 2 1 2 1 ...
## $ Concussion Ordinal : Factor w/ 4 levels "0","1","2","3": 2 1 1 1 1 1 1 2 1 2 ...
## $ P! Arth Nk Binary : num [1:123] 2 2 2 2 2 1 2 2 2 1 ...
## $ P! Arth Nk Ordinal : Factor w/ 4 levels "0","1","2","3": 1 1 1 1 1 2 1 1 1 2 ...
## $ P! Athr TS Binary : num [1:123] 1 2 2 2 2 2 2 2 2 2 ...
## $ P! Athr TS Ordinal : num [1:123] 2 0 0 0 0 0 0 0 0 0 ...
## $ P! Arth LS/Tailbone Binary : num [1:123] 2 2 2 2 2 2 2 2 2 1 ...
## $ P! Arth LS/Tailbone Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ R Sh RC P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ L Sh RC P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 1 ...
## $ R Wrist Fx. Binary : num [1:123] 1 0 0 0 0 0 0 0 0 0 ...
## $ L Wrist Fx. Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Generalized kn P! Ordinal (non arth): num [1:123] 0 1 0 0 0 0 0 0 0 0 ...
## $ L. Generalized kn P! Ordinal (non arth): num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Kn P! Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Ankle Spr Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ L. Ankle Spr Ordinal: : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Ankle Sprain Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Ankle P! Ordinal : num [1:123] 0 0 0 0 0 3 0 0 0 0 ...
## $ L. Ankle P! Ordinal : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ B. Ankle P! Binary : num [1:123] 0 0 0 0 0 0 0 0 0 0 ...
## $ R. Hp P!/Impingement Ordinal : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
## $ L. Hp P!/Impingement Ordinal : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
## $ B. Hip P! Binary : num [1:123] 0 0 1 0 0 0 0 0 0 1 ...
#remove rows with NA value in any column data frame
Conc<- Conc[complete.cases(Conc), ]
#Make output more readable
options(digits = 3)
#Compute summary statistics
mean(~ BMI, data = Conc,na.rm = TRUE)
## [1] 22.5
#Other summary statistics work similarly: Table of summary statistics
favstats(~ BMI, data = Conc,na.rm = TRUE)
## min Q1 median Q3 max mean sd n missing
## 18 21.1 22.2 23.8 33.5 22.5 2.47 115 0
#Summary statistics by group
favstats(BMI~`Concussion Ordinal`, data = Conc,na.rm = TRUE)
## Concussion Ordinal min Q1 median Q3 max mean sd n missing
## 1 0 18.0 21.0 22.0 23.4 30.1 22.4 2.35 62 0
## 2 1 18.1 21.1 22.5 23.8 33.5 22.6 2.62 46 0
## 3 2 20.4 21.8 23.8 25.4 27.5 23.7 2.58 7 0
## 4 3 NA NA NA NA NA NaN NA 0 0
#Quantiles
quantile(~BMI, data = Conc,prob = c(0.25, 0.5, 0.8),na.rm = TRUE)
## 25% 50% 80%
## 21.1 22.2 24.3
#Histogram
gf_histogram(~BMI, data = Conc, binwidth = 5, center = 2.5)

#pie chart
# 3D Exploded Pie Chart
require(kableExtra)
t<-tally(~`Concussion Ordinal`, data = Conc, margins = TRUE)
as.table(t) %>%
kbl() %>%
kable_styling()
|
Concussion.Ordinal
|
Freq
|
|
0
|
62
|
|
1
|
46
|
|
2
|
7
|
|
3
|
0
|
|
Total
|
115
|
###########pie chart############
library(plotrix)
##
## Attaching package: 'plotrix'
## The following object is masked from 'package:mosaic':
##
## rescale
slices <- c(62, 46, 7)
lbls <- c("0", "1", "2")
pie3D(slices,labels=lbls,explode=0.1,
main="Pie Chart of Concussions ")

require(kableExtra)
require(mosaic)
#Contingency table with margins
t1<-tally(~`BMI Ordinal classification`+`Concussion Ordinal`, data = Conc, margins = TRUE)
kbl(as.table(t1))
|
|
0
|
1
|
2
|
3
|
Total
|
|
1
|
1
|
2
|
0
|
0
|
3
|
|
2
|
51
|
39
|
5
|
0
|
95
|
|
3
|
9
|
4
|
2
|
0
|
15
|
|
4
|
1
|
1
|
0
|
0
|
2
|
|
Total
|
62
|
46
|
7
|
0
|
115
|
#Percentages by column
t2<-tally(~ `BMI Ordinal classification` | `Concussion Ordinal`, data = Conc,format = "percent")
kbl(t2)
|
0
|
1
|
2
|
3
|
|
1.61
|
4.35
|
0.0
|
NaN
|
|
82.26
|
84.78
|
71.4
|
NaN
|
|
14.52
|
8.70
|
28.6
|
NaN
|
|
1.61
|
2.17
|
0.0
|
NaN
|
# Mosaic plot
my_tbl <- tally(`BMI Ordinal classification` ~ `Concussion Ordinal`, data = Conc)
mosaicplot(my_tbl, color = TRUE)

library(ggplot2)
t3<-tally(`Concussion Ordinal`~`P! Arth Nk Ordinal`, data=Conc)
t3 %>%
kbl() %>%
kable_classic_2(full_width = F)
|
|
0
|
1
|
2
|
3
|
|
0
|
55
|
4
|
3
|
0
|
|
1
|
32
|
8
|
3
|
3
|
|
2
|
6
|
0
|
1
|
0
|
|
3
|
0
|
0
|
0
|
0
|
# grouped bar plot
ggplot(Conc,
aes(x =`Concussion Ordinal` ,
fill = `P! Arth Nk Ordinal`)) +
geom_bar(position = "dodge")

# Binary
t4<-tally(`Concussion Binary`~`P! Arth Nk Binary`, data=Conc)
t4 %>%
kbl() %>%
kable_classic_2(full_width = F)
#plot
ggplot(Conc,
aes(x =`Concussion Binary` ,
fill = `P! Arth Nk Ordinal`)) +
geom_bar(position = "dodge")
