Exam 1

library(GLMsData)
data(AIS)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ISLR2)
library(gt)
library(ggplot2)

Problem1

glimpse(AIS)
Rows: 202
Columns: 13
$ Sex   <fct> F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F…
$ Sport <fct> BBall, BBall, BBall, BBall, BBall, BBall, BBall, BBall, BBall, B…
$ LBM   <dbl> 63.32, 58.55, 55.36, 57.18, 53.20, 53.77, 60.17, 48.33, 54.57, 5…
$ Ht    <dbl> 195.9, 189.7, 177.8, 185.0, 184.6, 174.0, 186.2, 173.8, 171.4, 1…
$ Wt    <dbl> 78.9, 74.4, 69.1, 74.9, 64.6, 63.7, 75.2, 62.3, 66.5, 62.9, 96.3…
$ BMI   <dbl> 20.56, 20.67, 21.86, 21.88, 18.96, 21.04, 21.69, 20.62, 22.64, 1…
$ SSF   <dbl> 109.1, 102.8, 104.6, 126.4, 80.3, 75.2, 87.2, 97.9, 75.1, 65.1, …
$ RBC   <dbl> 3.96, 4.41, 4.14, 4.11, 4.45, 4.10, 4.31, 4.42, 4.30, 4.51, 4.71…
$ WBC   <dbl> 7.5, 8.3, 5.0, 5.3, 6.8, 4.4, 5.3, 5.7, 8.9, 4.4, 5.3, 7.3, 7.8,…
$ HCT   <dbl> 37.5, 38.2, 36.4, 37.3, 41.5, 37.4, 39.6, 39.9, 41.1, 41.6, 41.4…
$ HGB   <dbl> 12.3, 12.7, 11.6, 12.6, 14.0, 12.5, 12.8, 13.2, 13.5, 12.7, 14.0…
$ Ferr  <int> 60, 68, 21, 69, 29, 42, 73, 44, 41, 44, 38, 26, 30, 48, 30, 29, …
$ PBF   <dbl> 19.75, 21.30, 19.88, 23.66, 17.64, 15.58, 19.99, 22.43, 17.95, 1…
view(AIS)

Telford, R. D. and Cunningham, R. B. found the data to see if there were any hematological differences between athletes form different sports of very weights, heights and sexes. some things I noticed about the data table is there no character variables, there are no females in water polo or in sprints, and no males in gym

Problem 2

ais_sm <- AIS %>% 
  filter(Sport %in% c("BBall","Rowing","Field" ,"TSprnt")) %>% 
  select(Sport, Sex, contains("B")) %>% 
  arrange(desc(PBF))


glimpse(ais_sm)
Rows: 96
Columns: 8
$ Sport <fct> BBall, Rowing, Field, Rowing, Rowing, Rowing, BBall, BBall, Fiel…
$ Sex   <fct> F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, M, F…
$ LBM   <dbl> 68.53, 54.78, 63.03, 56.48, 60.05, 59.59, 57.18, 48.32, 72.98, 4…
$ BMI   <dbl> 25.75, 22.27, 28.13, 24.21, 24.24, 24.40, 21.88, 22.03, 31.93, 2…
$ RBC   <dbl> 4.71, 3.91, 4.51, 4.49, 4.16, 4.45, 4.11, 4.35, 4.75, 4.42, 4.41…
$ WBC   <dbl> 5.3, 7.3, 9.0, 7.6, 5.8, 6.6, 5.3, 7.8, 7.5, 5.7, 5.9, 6.9, 8.1,…
$ HGB   <dbl> 14.0, 12.9, 14.3, 14.4, 13.3, 14.1, 12.6, 14.1, 15.2, 13.2, 13.5…
$ PBF   <dbl> 28.83, 25.16, 24.88, 24.69, 23.70, 23.70, 23.66, 23.30, 23.01, 2…

Problem 3

ais_sm %>%
  filter(Sex == "F") %>% 
  arrange(desc(PBF)) %>%
  gt()
Sport Sex LBM BMI RBC WBC HGB PBF
BBall F 68.53 25.75 4.71 5.3 14.0 28.83
Rowing F 54.78 22.27 3.91 7.3 12.9 25.16
Field F 63.03 28.13 4.51 9.0 14.3 24.88
Rowing F 56.48 24.21 4.49 7.6 14.4 24.69
Rowing F 60.05 24.24 4.16 5.8 13.3 23.70
Rowing F 59.59 24.40 4.45 6.6 14.1 23.70
BBall F 57.18 21.88 4.11 5.3 12.6 23.66
BBall F 48.32 22.03 4.35 7.8 14.1 23.30
Field F 72.98 31.93 4.75 7.5 15.2 23.01
BBall F 48.33 20.62 4.42 5.7 13.2 22.43
Rowing F 61.70 23.97 4.41 5.9 13.5 22.39
Rowing F 56.68 23.19 4.90 6.9 14.5 22.25
Rowing F 62.96 23.47 4.37 8.1 14.3 21.79
Rowing F 52.78 20.81 4.57 6.6 14.5 21.47
BBall F 58.55 20.67 4.41 8.3 12.7 21.30
Field F 68.86 28.57 4.58 5.8 14.7 21.30
Rowing F 62.46 22.62 4.87 7.3 14.8 20.43
Rowing F 52.72 20.17 4.87 6.4 15.0 20.12
Field F 63.04 26.95 4.81 6.8 15.3 20.10
BBall F 60.17 21.69 4.31 5.3 12.8 19.99
BBall F 55.36 21.86 4.14 5.0 11.6 19.88
Rowing F 56.52 21.86 4.36 5.8 13.3 19.83
BBall F 63.32 20.56 3.96 7.5 12.3 19.75
Field F 59.89 25.27 5.33 9.3 15.0 19.51
Rowing F 56.05 22.96 4.46 9.5 14.5 19.35
Field F 66.85 26.85 4.77 7.1 14.6 19.26
Rowing F 53.65 19.76 5.02 6.4 15.2 19.20
Rowing F 57.92 22.63 4.63 6.0 14.7 18.77
BBall F 61.85 21.20 4.62 7.3 14.7 18.08
Rowing F 56.31 21.27 4.51 8.3 14.7 18.04
BBall F 54.57 22.64 4.30 8.9 13.5 17.95
Rowing F 65.45 23.36 4.26 5.8 14.1 17.89
Rowing F 66.24 25.44 4.26 6.2 13.9 17.71
BBall F 53.20 18.96 4.45 6.8 14.0 17.64
Rowing F 61.29 23.06 4.44 10.1 14.0 17.51
Rowing F 41.54 20.46 4.21 7.5 13.2 16.58
Rowing F 63.05 24.54 3.95 3.3 12.5 16.38
Rowing F 62.39 23.17 4.46 5.7 13.0 16.25
BBall F 53.77 21.04 4.10 4.4 12.5 15.58
BBall F 53.42 19.44 4.51 4.4 12.7 15.07
Rowing F 64.62 22.67 4.46 5.6 14.3 12.20
Field F 51.17 20.12 4.48 9.5 13.3 11.77
TSprnt F 53.11 22.37 4.82 7.6 14.4 11.64
TSprnt F 55.06 19.54 4.76 7.6 13.4 11.05
TSprnt F 53.20 20.15 4.97 7.8 14.2 10.74
TSprnt F 51.48 20.30 5.16 8.2 14.7 10.15

Problem 4

ggplot(AIS,
       aes(x = RBC,y = WBC, color = Sex))+
  geom_point()+
  geom_smooth()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Problem 5

The outlier in this data set is the male with a a high amount of white blood cell compared to the other people with he same amount of red blood cells

Problem 6

ggplot(ais_sm, aes(x=Sex,y = HGB, fill=Sex))+
  geom_boxplot()+
  geom_jitter()+
  labs(title = "Bloodsport: Hemoglobin concentrations of Australian athletes", y = "Hemoglobin concentration", x = "Sex",caption =    
         "Source: Telford and Cunningham (1991)") +
  theme(legend.position = "none")+
  theme_minimal()+
  facet_wrap(vars(Sport))+
  scale_fill_brewer(palette = "Dark2")

Problem 7

AIS %>% 
  group_by(Sex, Sport)%>%
  summarise(mean_ht = mean(Ht)) %>% 
ggplot(aes(x = mean_ht, y = Sport, fill = Sex))+
  geom_point(shape = 22, show.legend = FALSE)+
  facet_grid(vars(Sex),)+
  theme_minimal()+
  labs(x = "Average height", y = "Sport", caption = "Source: Telford and Cunningham (1991)")
`summarise()` has grouped output by 'Sex'. You can override using the `.groups`
argument.

Keeping the colors can be good because they are contrasting one another