library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.1.9000 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(corrplot)
## corrplot 0.88 loaded
error_19<-read_xlsx("/Users/yongkookkim/Library/Mobile Documents/com~apple~CloudDocs/Baseball_stat_study/errors_2019.xlsx",col_names = T)
error_19$error_percentage<-round(error_19$error_percentage,2)
error_19
## # A tibble: 10 x 8
## team rank win win_percentage error_n chance ds error_percentage
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 doosan 1 88 0.615 50 1721 90.9 2.91
## 2 sk 2 88 0.615 57 1561 90.5 3.65
## 3 kiwoom 3 86 0.601 70 1676 89.6 4.18
## 4 lg 4 79 0.552 56 1707 91.2 3.28
## 5 nc 5 73 0.514 56 1702 89.7 3.29
## 6 kt 6 71 0.5 65 1781 90.2 3.65
## 7 kia 7 62 0.437 69 1643 90.0 4.2
## 8 samsung 8 60 0.42 77 1594 89.5 4.83
## 9 hanhwa 9 58 0.403 68 1656 87.9 4.11
## 10 lotte 10 48 0.39 76 1696 88.5 4.48
승률과 내야수비 에러율은 반비례 한다 p value= 0.2829 (0.05이하 임으로 통계적으로 유의 하다라고 볼수 있다.)
cor: -0.6866658 =음수 0.6566658 이므로 반비례 관계임을 알수 있다.
cor.test(x=error_19$win_percentage,y=error_19$error_percentage)
##
## Pearson's product-moment correlation
##
## data: error_19$win_percentage and error_19$error_percentage
## t = -2.6696, df = 8, p-value = 0.02838
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.91889834 -0.09997298
## sample estimates:
## cor
## -0.6863938
히트맵 정렬하기
히트맵 시각화 하기 (한눈에 잘 보이게 만들기 )
error_19<-as_tibble(error_19)
str(error_19 )
## tibble [10 × 8] (S3: tbl_df/tbl/data.frame)
## $ team : chr [1:10] "doosan" "sk" "kiwoom" "lg" ...
## $ rank : num [1:10] 1 2 3 4 5 6 7 8 9 10
## $ win : num [1:10] 88 88 86 79 73 71 62 60 58 48
## $ win_percentage : num [1:10] 0.615 0.615 0.601 0.552 0.514 0.5 0.437 0.42 0.403 0.39
## $ error_n : num [1:10] 50 57 70 56 56 65 69 77 68 76
## $ chance : num [1:10] 1721 1561 1676 1707 1702 ...
## $ ds : num [1:10] 90.9 90.5 89.6 91.2 89.7 ...
## $ error_percentage: num [1:10] 2.91 3.65 4.18 3.28 3.29 3.65 4.2 4.83 4.11 4.48
error_19_1 <- error_19 %>% select(-team)
error_cor <- cor(error_19_1)
round(error_cor,2)
## rank win win_percentage error_n chance ds
## rank 1.00 -0.99 -0.98 0.79 -0.02 -0.78
## win -0.99 1.00 0.99 -0.74 0.01 0.74
## win_percentage -0.98 0.99 1.00 -0.74 0.05 0.73
## error_n 0.79 -0.74 -0.74 1.00 -0.23 -0.69
## chance -0.02 0.01 0.05 -0.23 1.00 0.15
## ds -0.78 0.74 0.73 -0.69 0.15 1.00
## error_percentage 0.72 -0.68 -0.69 0.97 -0.46 -0.66
## error_percentage
## rank 0.72
## win -0.68
## win_percentage -0.69
## error_n 0.97
## chance -0.46
## ds -0.66
## error_percentage 1.00
corrplot(error_cor)
corrplot(error_cor,method = "number")
corrplot_col<-colorRampPalette(c("#BB4444","#EE9988","#FFFFFF","#77AADD","#4477AA"))
mycorrplot_theme<- corrplot(error_cor,
method="color",
col=corrplot_col(200),
type = "lower",
order = "hclust",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
diag = F)