library session
library(tidyverse)
## Warning: 패키지 'tidyverse'는 R 버전 4.3.2에서 작성되었습니다
## Warning: 패키지 'ggplot2'는 R 버전 4.3.2에서 작성되었습니다
## Warning: 패키지 'dplyr'는 R 버전 4.3.2에서 작성되었습니다
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(haven)
## Warning: 패키지 'haven'는 R 버전 4.3.2에서 작성되었습니다
library(ggplot2)
library(GGally)
## Warning: 패키지 'GGally'는 R 버전 4.3.2에서 작성되었습니다
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(epitools)
library(BSDA)
## 필요한 패키지를 로딩중입니다: lattice
##
## 다음의 패키지를 부착합니다: 'BSDA'
##
## The following object is masked from 'package:datasets':
##
## Orange
library(lme4)
## Warning: 패키지 'lme4'는 R 버전 4.3.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: Matrix
##
## 다음의 패키지를 부착합니다: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(daewr)
## Warning: 패키지 'daewr'는 R 버전 4.3.2에서 작성되었습니다
library(rsm)
## Warning: 패키지 'rsm'는 R 버전 4.3.2에서 작성되었습니다
library(openxlsx)
## Warning: 패키지 'openxlsx'는 R 버전 4.3.2에서 작성되었습니다
library(swirl)
## Warning: 패키지 'swirl'는 R 버전 4.3.2에서 작성되었습니다
##
## | Hi! Type swirl() when you are ready to begin.
library(dplyr)
3.0.Reading Data File & 3.1.Table Screenshot of Data
rb_stats <- readxl::read_excel("D:/OneDrive - 인하대학교/문서/Biostatistics/RB_stats_raw.xlsx",
sheet = 2)
head(rb_stats)
## # A tibble: 6 × 15
## `Player Name` `Height(ft-in)` `Height(cm)` `Weight(lb)` `Weight(kg)` Age
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Christian McCaff… 5-11 180. 210 95.3 25
## 2 Raheem Mostert 5-10 178. 205 93.0 29
## 3 Derrick Henry 6-3 190. 247 112. 27
## 4 Travis Etienne 5-10 178. 215 97.5 22
## 5 Josh Jacobs 5-10 178. 223 101. 23
## 6 Saquon Barkely 6-0 183. 232 105. 24
## # ℹ 9 more variables: Exp <dbl>, Pick <chr>, Games <dbl>, Att <dbl>, Yds <dbl>,
## # YBC <dbl>, YAC <dbl>, BTK <dbl>, Year <dbl>
summary(rb_stats)
## Player Name Height(ft-in) Height(cm) Weight(lb)
## Length:60 Length:60 Min. :170.2 Min. :200.0
## Class :character Class :character 1st Qu.:177.8 1st Qu.:210.0
## Mode :character Mode :character Median :179.1 Median :221.5
## Mean :180.0 Mean :219.8
## 3rd Qu.:182.9 3rd Qu.:226.2
## Max. :190.5 Max. :247.0
## Weight(kg) Age Exp Pick
## Min. : 90.72 Min. :22 Min. :1.00 Length:60
## 1st Qu.: 95.25 1st Qu.:25 1st Qu.:4.00 Class :character
## Median :100.47 Median :26 Median :5.00 Mode :character
## Mean : 99.70 Mean :26 Mean :5.15
## 3rd Qu.:102.63 3rd Qu.:27 3rd Qu.:6.25
## Max. :112.04 Max. :31 Max. :9.00
## Games Att Yds YBC
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.:12.00 1st Qu.:151.0 1st Qu.: 599.0 1st Qu.:379.5
## Median :14.00 Median :202.5 Median : 855.0 Median :478.0
## Mean :13.05 Mean :191.0 Mean : 831.3 Mean :464.7
## 3rd Qu.:16.25 3rd Qu.:234.0 3rd Qu.:1079.8 3rd Qu.:586.0
## Max. :17.00 Max. :349.0 Max. :1811.0 Max. :865.0
## YAC BTK Year
## Min. : 0.0 Min. : 0.00 Min. :2021
## 1st Qu.:259.5 1st Qu.: 7.00 1st Qu.:2021
## Median :377.0 Median :12.00 Median :2022
## Mean :366.6 Mean :12.72 Mean :2022
## 3rd Qu.:451.5 3rd Qu.:16.00 3rd Qu.:2023
## Max. :946.0 Max. :35.00 Max. :2023
3.2. More Calculation with stats
rb_stats$Avg <- rb_stats$`Yds`/rb_stats$`Att` #AVG
rb_stats$ABR <- rb_stats$YAC / rb_stats$YBC #ABR
rb_stats$BTG <- rb_stats$BTK / rb_stats$Games #BTG
rb_stats$MR <- rb_stats$`Weight(kg)`/(rb_stats$`Height(cm)`/100)^2 #Muscle Rate
rb_stats_filtered <- filter(rb_stats, rb_stats$Games >= 5 ) #Filtering
head(rb_stats_filtered)
## # A tibble: 6 × 19
## `Player Name` `Height(ft-in)` `Height(cm)` `Weight(lb)` `Weight(kg)` Age
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Christian McCaff… 5-11 180. 210 95.3 25
## 2 Derrick Henry 6-3 190. 247 112. 27
## 3 Josh Jacobs 5-10 178. 223 101. 23
## 4 Saquon Barkely 6-0 183. 232 105. 24
## 5 David Montgomery 5-11 180. 224 102. 24
## 6 Joe Mixon 6-1 185. 220 99.8 25
## # ℹ 13 more variables: Exp <dbl>, Pick <chr>, Games <dbl>, Att <dbl>,
## # Yds <dbl>, YBC <dbl>, YAC <dbl>, BTK <dbl>, Year <dbl>, Avg <dbl>,
## # ABR <dbl>, BTG <dbl>, MR <dbl>
5.0. The Effect of Excluding Outlier.
#5.0. The Effect of Excluding Outlier.
lm(data = rb_stats, Avg ~ `Height(cm)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ `Height(cm)`, data = rb_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3046 -0.5535 -0.1093 0.3502 5.5136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.01740 5.40872 2.037 0.0464 *
## `Height(cm)` -0.03673 0.03005 -1.222 0.2267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9703 on 56 degrees of freedom
## (결측으로 인하여 2개의 관측치가 삭제되었습니다.)
## Multiple R-squared: 0.02599, Adjusted R-squared: 0.008597
## F-statistic: 1.494 on 1 and 56 DF, p-value: 0.2267
lm(data = rb_stats_filtered, Avg ~ `Height(cm)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ `Height(cm)`, data = rb_stats_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.18741 -0.37057 0.00899 0.34022 1.25324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.91284 3.26696 2.728 0.00858 **
## `Height(cm)` -0.02575 0.01815 -1.419 0.16162
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5846 on 54 degrees of freedom
## Multiple R-squared: 0.03595, Adjusted R-squared: 0.0181
## F-statistic: 2.014 on 1 and 54 DF, p-value: 0.1616
lm(data = rb_stats, Avg ~ `Weight(kg)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ `Weight(kg)`, data = rb_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3934 -0.5103 -0.1132 0.3202 5.3388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.18726 2.24729 3.643 0.00059 ***
## `Weight(kg)` -0.03792 0.02251 -1.685 0.09765 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9591 on 56 degrees of freedom
## (결측으로 인하여 2개의 관측치가 삭제되었습니다.)
## Multiple R-squared: 0.04823, Adjusted R-squared: 0.03123
## F-statistic: 2.838 on 1 and 56 DF, p-value: 0.09765
lm(data = rb_stats_filtered, Avg ~ `Weight(kg)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ `Weight(kg)`, data = rb_stats_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22337 -0.42973 -0.01373 0.30021 1.31090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.34588 1.38955 4.567 2.91e-05 ***
## `Weight(kg)` -0.02073 0.01391 -1.490 0.142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5835 on 54 degrees of freedom
## Multiple R-squared: 0.03951, Adjusted R-squared: 0.02173
## F-statistic: 2.222 on 1 and 54 DF, p-value: 0.1419
4.1. Statistical Questions & 5. Analysis Result
#S.Q.1. 키가 180cm 보다 작으면 average yards에 유리할까?
#우선 180cm를 기준으로 group을 나누어 주었다.
rb_stats_filtered$h_gr <- ifelse(rb_stats_filtered$`Height(cm)`< 180, "under 180cm", "over 180cm")
lm(data = rb_stats_filtered, Avg ~ (`Height(cm)`< 180) + `Weight(kg)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ (`Height(cm)` < 180) + `Weight(kg)`, data = rb_stats_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26544 -0.40323 0.00254 0.31699 1.29893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.87928 1.69979 4.047 0.00017 ***
## `Height(cm)` < 180TRUE -0.10223 0.18514 -0.552 0.58316
## `Weight(kg)` -0.02557 0.01651 -1.548 0.12750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5873 on 53 degrees of freedom
## Multiple R-squared: 0.04501, Adjusted R-squared: 0.00897
## F-statistic: 1.249 on 2 and 53 DF, p-value: 0.2951
lm(data = rb_stats_filtered, Avg ~ h_gr + `Weight(kg)`) %>% summary()
##
## Call:
## lm(formula = Avg ~ h_gr + `Weight(kg)`, data = rb_stats_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26544 -0.40323 0.00254 0.31699 1.29893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.87928 1.69979 4.047 0.00017 ***
## h_grunder 180cm -0.10223 0.18514 -0.552 0.58316
## `Weight(kg)` -0.02557 0.01651 -1.548 0.12750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5873 on 53 degrees of freedom
## Multiple R-squared: 0.04501, Adjusted R-squared: 0.00897
## F-statistic: 1.249 on 2 and 53 DF, p-value: 0.2951
#둘이 결과가가 같다.
#S.Q.2.Weight와 average yard 사이에 관계가 있을까?
#lm(data = rb_stats_filtered, AVG ~ `Weight(kg)`) %>% anova()
#lm(data = rb_stats_filtered, AVG ~ `Weight(kg)`) %>% summary()
#S.Q.3.Height와 Weight 모두 고려했을 때 average yards와 유의미한 상관관계를 보일까?
#lm(data = rb_stats_filtered, AVG ~ `Height(cm)`+`Weight(kg)`+ Age + Exp + MSM) %>% anova()
#lm(data = rb_stats_filtered, AVG ~ `Height(cm)`+`Weight(kg)`+ Age + Exp + MSM) %>% summary()
#S.Q.4.근육비가 충돌 전에 대한 충돌 후 yard 비율에 차이를 만들어 낼까?
#lm(data = rb_stats, AVG ~ `Height(cm)` + `Weight(kg)` + `MSM` + `Age` + Exp) %>% summary()
#lm(data = rb_stats_filtered, YAC ~ `Height(cm)` * `Weight(kg)`* `MSM` + `Age` + Exp ) %>% summary()
#S.Q.5.더 많은 근육비을 가진 선수가 태클을 더 많이 부술까?
#lm(data = rb_stats, YAC ~ `Height(cm)` + `Weight(kg)` + `Age` + Exp) %>% summary()
#lm(data = rb_stats_filtered, YAC ~ `Height(cm)` + `Weight(kg)` + `Age` + Exp ) %>% summary()
#lm(data = rb_stats_filtered, BTG ~ `Height(cm)` + `Weight(kg)` + `Age` + Exp ) %>% summary()
ggplot(rb_stats_filtered, aes(x=`Weight(kg)`, y=Avg, color = h_gr)) +
geom_point() +
ylab("Average Yards") +
ggtitle("Weight - Average Yards scatter plot, colored by height")
#연도에 따른 평균 야드 비교(Unfiltered)
#ggplot(data = rb_stats, aes(as.factor(rb_stats$Year), rb_stats$AVG)) +
# geom_boxplot() +
# xlab("Year") +
# ylab("Average Yards") +
# ggtitle("Yards by Year(Unfiltered)")
#연도에 따른 평균 야드 비교(Filtered)
#ggplot(data = rb_stats_filtered, aes(as.factor(rb_stats_filtered$Year), rb_stats_filtered$AVG)) +
# geom_boxplot() +
# xlab("Year") +
# ylab("Average Yards") +
# ggtitle("Yards by Year(Filtered)")
#키에 따른 평균 야드
#ggplot(data = rb_stats_filtered, aes(x = `Height(cm)`, y = Avg)) +
# geom_point() +
# xlab("Height(cm)") +
# ylab("Average Yards") +
# ggtitle("Yards by Height(Filtered)") +
# geom_smooth()
#몸무게에 따른 평균 야드
#ggplot(data = rb_stats_filtered, aes(x = `Weight(kg)`, y = Avg)) +
# geom_point() +
# theme_classic(base_family = "serif", base_size = 15) +
# xlab("Weight(kg)") +
# ylab("Average Yards") +
# ggtitle("Yards by Weight(Filtered)") +
# geom_smooth(method = "auto")
#MR에 따른 평균 야드
#ggplot(data = rb_stats_filtered, aes(x = `MR`, y = Avg)) +
# geom_point() +
# theme_classic(base_family = "serif", base_size = 15) +
# xlab("MR") +
# ylab("Average Yards") +
# ggtitle("Yards by MR(Filtered)") +
# geom_smooth(method = "auto")
#픽 순위에 따른 평균 야드 비교
#ggplot(data = rb_stats_filtered, aes(x = Pick, y = AVG)) +
# geom_point() +
# theme_classic(base_family = "serif", base_size = 10) +
# scale_x_continuous() +
# xlab("Overall Pick") +
# ylab("Average Yards") +
# ggtitle("Yards by Pick(Filtered)") +
# geom_smooth()
#boxplot(data = rb_stats_filtered, AVG ~ Pick)
#boxplot(data = rb_stats_filtered, AVG ~ MSM)
#plot(data = rb_stats_filtered, ABR ~ MSM)
RSM(실패)
#rsm(data = rb_stats_filtered, AVG ~ `Height(cm)` + `Weight(kg)`) %>% summary()
#rsm(data = rb_stats_filtered, Avg ~ SO(`Height(cm)`, `Weight(kg)`)) %>% summary()
교수님 code
#rb_stats_with_average <- rb_stats %>% group_by(`Player Name`) %>%
# summarise( `Height(cm)` = mean(`Height(cm)`),
# avg_Att = mean(Att),
# avg_Yds = mean(Yds),
# avg_YBC = mean(YBC),
# avg_YAC = mean(YAC),
# avg_BTK = mean(BTK))
#rb_stats$avg_y <- rb_stats$Yds/rb_stats$Att
#5.1. Test 1
#lm(rb_stats$avg_y ~ rb_stats$`Height(cm)`) %>% summary
#lm(rb_stats$avg_y ~ rb_stats$`Height(cm)`)
#avg_y_fit <- lm(rb_stats$avg_y ~ rb_stats$`Height(cm)` + rb_stats$`Weight(kg)` + rb_stats$`Height(cm)`*rb_stats$`Weight(kg)`)
#plot(rb_stats$`Height(cm)`, rb_stats$avg_y, data = rb_stats) +
# abline(avg_y_fit, col = "red")
#5.2. Test 2
#5.3. Test 3
#5.4. Test 4
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.