data <- read_excel("myData.xlsx")
data
## # A tibble: 236 × 20
## TEAMID TEAM PAKE PAKERANK PASE PASERANK GAMES W L WINPERCENT R64
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Abil… 0.7 45 0.7 52 3 1 2 0.333 2
## 2 2 Akron -0.9 179 -1.1 187 4 0 4 0 4
## 3 3 Alab… -2.1 211 -2.9 220 10 5 5 0.5 5
## 4 4 Alba… -0.4 147 -0.3 138 3 0 3 0 3
## 5 6 Amer… -0.5 160 -0.4 150 3 0 3 0 3
## 6 8 Ariz… -1.7 206 -2.5 216 28 17 11 0.607 11
## 7 9 Ariz… -2 209 -1.9 206 5 1 4 0.2 4
## 8 10 Arka… 4.3 11 3.5 16 18 11 7 0.611 7
## 9 11 Arka… 0 76 0 78 1 0 1 0 1
## 10 12 Aubu… 0.6 53 1.4 30 11 7 4 0.636 4
## # ℹ 226 more rows
## # ℹ 9 more variables: R32 <dbl>, S16 <dbl>, E8 <dbl>, F4 <dbl>, F2 <dbl>,
## # CHAMP <dbl>, `2` <dbl>, F4PERCENT <dbl>, CHAMPPERCENT <dbl>
data_small <- data %>%
select(TEAMID, PASERANK, WINPERCENT, F4PERCENT) %>%
sample_n(10)
data_smaller <- data %>%
select(TEAMID, PASERANK, W, L) %>%
sample_n(10)
data_small
## # A tibble: 10 × 4
## TEAMID PASERANK WINPERCENT F4PERCENT
## <dbl> <dbl> <dbl> <dbl>
## 1 230 210 0.286 0.192
## 2 72 78 0 0
## 3 218 164 0.417 0.209
## 4 139 103 0 0
## 5 233 59 0.667 0.12
## 6 14 138 0.655 0.808
## 7 168 78 0 0
## 8 187 59 0.5 0.035
## 9 125 54 0.25 0.001
## 10 23 78 0.333 0.019
data_smaller
## # A tibble: 10 × 4
## TEAMID PASERANK W L
## <dbl> <dbl> <dbl> <dbl>
## 1 193 29 4 2
## 2 215 159 0 2
## 3 11 78 0 1
## 4 32 224 6 9
## 5 157 8 15 8
## 6 56 18 4 1
## 7 30 201 2 4
## 8 42 34 2 3
## 9 43 41 9 8
## 10 73 54 2 4
Describe the two datasets:
Data1
Data 2
Describe the resulting data:
There is no data
data_small %>% inner_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 0 × 6
## # ℹ 6 variables: TEAMID <dbl>, PASERANK <dbl>, WINPERCENT <dbl>,
## # F4PERCENT <dbl>, W <dbl>, L <dbl>
Describe the resulting data:
It shows winpercentage and final 4 percentage from different teams.
How is it different from the original two datasets? It does not include W or L
data_small %>% left_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 10 × 6
## TEAMID PASERANK WINPERCENT F4PERCENT W L
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 230 210 0.286 0.192 NA NA
## 2 72 78 0 0 NA NA
## 3 218 164 0.417 0.209 NA NA
## 4 139 103 0 0 NA NA
## 5 233 59 0.667 0.12 NA NA
## 6 14 138 0.655 0.808 NA NA
## 7 168 78 0 0 NA NA
## 8 187 59 0.5 0.035 NA NA
## 9 125 54 0.25 0.001 NA NA
## 10 23 78 0.333 0.019 NA NA
Describe the resulting data:
It shows wins and losses from different teams, along with their
PASERANK.
How is it different from the original two datasets? It does not include WINPERCENT ot F4PERCENT
data_small %>% right_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 10 × 6
## TEAMID PASERANK WINPERCENT F4PERCENT W L
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 193 29 NA NA 4 2
## 2 215 159 NA NA 0 2
## 3 11 78 NA NA 0 1
## 4 32 224 NA NA 6 9
## 5 157 8 NA NA 15 8
## 6 56 18 NA NA 4 1
## 7 30 201 NA NA 2 4
## 8 42 34 NA NA 2 3
## 9 43 41 NA NA 9 8
## 10 73 54 NA NA 2 4
Describe the resulting data:
It combines the right and left join into one dataset.
How is it different from the original two datasets? It includes all data
data_small %>% full_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 20 × 6
## TEAMID PASERANK WINPERCENT F4PERCENT W L
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 230 210 0.286 0.192 NA NA
## 2 72 78 0 0 NA NA
## 3 218 164 0.417 0.209 NA NA
## 4 139 103 0 0 NA NA
## 5 233 59 0.667 0.12 NA NA
## 6 14 138 0.655 0.808 NA NA
## 7 168 78 0 0 NA NA
## 8 187 59 0.5 0.035 NA NA
## 9 125 54 0.25 0.001 NA NA
## 10 23 78 0.333 0.019 NA NA
## 11 193 29 NA NA 4 2
## 12 215 159 NA NA 0 2
## 13 11 78 NA NA 0 1
## 14 32 224 NA NA 6 9
## 15 157 8 NA NA 15 8
## 16 56 18 NA NA 4 1
## 17 30 201 NA NA 2 4
## 18 42 34 NA NA 2 3
## 19 43 41 NA NA 9 8
## 20 73 54 NA NA 2 4
Describe the resulting data:
it shows no data
How is it different from the original two datasets? It shows no data
data_small %>% semi_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 0 × 4
## # ℹ 4 variables: TEAMID <dbl>, PASERANK <dbl>, WINPERCENT <dbl>,
## # F4PERCENT <dbl>
Describe the resulting data:
it shows the first dataset
How is it different from the original two datasets? It shows the first data set
data_small %>% anti_join(data_smaller, by = c("TEAMID" , "PASERANK"))
## # A tibble: 10 × 4
## TEAMID PASERANK WINPERCENT F4PERCENT
## <dbl> <dbl> <dbl> <dbl>
## 1 230 210 0.286 0.192
## 2 72 78 0 0
## 3 218 164 0.417 0.209
## 4 139 103 0 0
## 5 233 59 0.667 0.12
## 6 14 138 0.655 0.808
## 7 168 78 0 0
## 8 187 59 0.5 0.035
## 9 125 54 0.25 0.001
## 10 23 78 0.333 0.019