## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## Warning: package 'openintro' was built under R version 4.3.3
## Warning: package 'airports' was built under R version 4.3.3
## Warning: package 'cherryblossom' was built under R version 4.3.3
## Warning: package 'usdata' was built under R version 4.3.3
## Warning: package 'infer' was built under R version 4.3.3
## Warning: package 'skimr' was built under R version 4.3.3
Exercise 1
## Rows: 13,583
## Columns: 13
## $ age <int> 14, 14, 15, 15, 15, 15, 15, 14, 15, 15, 15, 1…
## $ gender <chr> "female", "female", "female", "female", "fema…
## $ grade <chr> "9", "9", "9", "9", "9", "9", "9", "9", "9", …
## $ hispanic <chr> "not", "not", "hispanic", "not", "not", "not"…
## $ race <chr> "Black or African American", "Black or Africa…
## $ height <dbl> NA, NA, 1.73, 1.60, 1.50, 1.57, 1.65, 1.88, 1…
## $ weight <dbl> NA, NA, 84.37, 55.79, 46.72, 67.13, 131.54, 7…
## $ helmet_12m <chr> "never", "never", "never", "never", "did not …
## $ text_while_driving_30d <chr> "0", NA, "30", "0", "did not drive", "did not…
## $ physically_active_7d <int> 4, 2, 7, 0, 2, 1, 4, 4, 5, 0, 0, 0, 4, 7, 7, …
## $ hours_tv_per_school_day <chr> "5+", "5+", "5+", "2", "3", "5+", "5+", "5+",…
## $ strength_training_7d <int> 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 3, 0, 0, 7, 7, …
## $ school_night_hours_sleep <chr> "8", "6", "<5", "6", "9", "8", "9", "6", "<5"…
#Each case in this dataset represents a single student, the entire set being comprised of 13,583 cases.
Exercise 2
Data summary
| Name |
Piped data |
| Number of rows |
13583 |
| Number of columns |
13 |
| _______________________ |
|
| Column type frequency: |
|
| character |
8 |
| numeric |
5 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| gender |
12 |
1.00 |
4 |
6 |
0 |
2 |
0 |
| grade |
79 |
0.99 |
1 |
5 |
0 |
5 |
0 |
| hispanic |
231 |
0.98 |
3 |
8 |
0 |
2 |
0 |
| race |
2805 |
0.79 |
5 |
41 |
0 |
5 |
0 |
| helmet_12m |
311 |
0.98 |
5 |
12 |
0 |
6 |
0 |
| text_while_driving_30d |
918 |
0.93 |
1 |
13 |
0 |
8 |
0 |
| hours_tv_per_school_day |
338 |
0.98 |
1 |
12 |
0 |
7 |
0 |
| school_night_hours_sleep |
1248 |
0.91 |
1 |
3 |
0 |
7 |
0 |
Variable type: numeric
| age |
77 |
0.99 |
16.16 |
1.26 |
12.00 |
15.00 |
16.00 |
17.00 |
18.00 |
▁▂▅▅▇ |
| height |
1004 |
0.93 |
1.69 |
0.10 |
1.27 |
1.60 |
1.68 |
1.78 |
2.11 |
▁▅▇▃▁ |
| weight |
1004 |
0.93 |
67.91 |
16.90 |
29.94 |
56.25 |
64.41 |
76.20 |
180.99 |
▆▇▂▁▁ |
| physically_active_7d |
273 |
0.98 |
3.90 |
2.56 |
0.00 |
2.00 |
4.00 |
7.00 |
7.00 |
▆▂▅▃▇ |
| strength_training_7d |
1176 |
0.91 |
2.95 |
2.58 |
0.00 |
0.00 |
3.00 |
5.00 |
7.00 |
▇▂▅▂▅ |
#We are missing weights from 1,219 observations.
yrbss1 <- yrbss %>%
mutate(physical_3plus = if_else(physically_active_7d > 2, "yes", "no"))|>
filter(!is.na(weight) & !is.na(physical_3plus))
Exercise 3
ggplot(data = yrbss1, aes (physical_3plus, weight)) + geom_violin(position = "dodge")

#Yes, there is a relationship between the variables "physical_3plus" and "weight". I expected there to be a greater difference between the plots of the factors since one's physical activity greatly impacts their weight, but there are countless other factors that do so too, so the results are not too shocking.
yrbss1 %>%
group_by(physical_3plus) %>%
summarise(mean_weight = mean(weight, na.rm = TRUE))
## # A tibble: 2 × 2
## physical_3plus mean_weight
## <chr> <dbl>
## 1 no 66.7
## 2 yes 68.4
Exercise 4
yrbss1 %>%
group_by(physical_3plus) %>%
summarize(n = n())
## # A tibble: 2 × 2
## physical_3plus n
## <chr> <int>
## 1 no 4022
## 2 yes 8342
yrbss1 %>%
group_by(weight) %>%
summarize(n = n())
## # A tibble: 239 × 2
## weight n
## <dbl> <int>
## 1 29.9 1
## 2 31.8 1
## 3 33.1 1
## 4 34.0 3
## 5 34.9 1
## 6 35.4 3
## 7 35.8 1
## 8 36.3 7
## 9 36.7 1
## 10 37.2 1
## # ℹ 229 more rows
#The conditions necessary for inference are satisfied: there are more than 10 success and failures for "physical_3plus" and more than 30 numerical entries for "weight".
Exercise 5
#Ho: There is no difference in the average weights of adolescents who exercise at least 3 times a week and those who do not. Ho : mu1 = mu2
# Ha: There is a difference in the average weights of adolescents who exercise at least 3 times a week and those who do not. Ha : mu1 =/ mu2
obs_diff <- yrbss1 %>%
specify(weight ~ physical_3plus) %>%
calculate(stat = "diff in means", order = c("yes", "no"))
null_dist <- yrbss1 %>%
specify(weight ~ physical_3plus) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "diff in means", order = c("yes", "no"))
ggplot(data = null_dist, aes(x = stat)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Exercise 6
ggplot(data = null_dist, aes(x = stat)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

geom_vline(xintercept = 1.774584, color="red")
## mapping: xintercept = ~xintercept
## geom_vline: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
#The line does not fall on the distribution.
Exercise 7
#None of the "null_dist" permutations have a difference equal to or larger than the "obs_diff" value.
null_dist %>%
get_p_value(obs_stat = obs_diff, direction = "two_sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
Exercise 8
# I got a warning message to be cautious about reporting the p-value that the system procured as it is "0". I believe I got this warning because such a p-value is interpreted as there being absolutely no way that the null hypothesis can fail to be rejected, and such broad statements hardly hold true.
Exercise 9
percentile_ci <- null_dist |>
get_confidence_interval(level = 0.95)
percentile_ci
## # A tibble: 1 × 2
## lower_ci upper_ci
## <dbl> <dbl>
## 1 -0.611 0.625
#Given the inclusion of 0 in the confidence interval, we are 95% sure that there is no statistically significant difference between the average weights of adolescents who exercise at least 3 times a week and those who do not.
LS0tDQp0aXRsZTogIkxhYiAxOiBJbnRybyB0byBSIg0KYXV0aG9yOiAiQXV0aG9yIE5hbWUiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6IG9wZW5pbnRybzo6bGFiX3JlcG9ydA0KLS0tDQoNCmBgYHtyIGxvYWQtcGFja2FnZXMsIG1lc3NhZ2U9RkFMU0V9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkob3BlbmludHJvKQ0KbGlicmFyeShpbmZlcikNCmxpYnJhcnkoc2tpbXIpDQpgYGANCg0KYGBge3J9DQpkYXRhKCJ5cmJzcyIpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMQ0KDQpgYGB7cn0NCmdsaW1wc2UoeXJic3MpDQojRWFjaCBjYXNlIGluIHRoaXMgZGF0YXNldCByZXByZXNlbnRzIGEgc2luZ2xlIHN0dWRlbnQsIHRoZSBlbnRpcmUgc2V0IGJlaW5nIGNvbXByaXNlZCBvZiAxMyw1ODMgY2FzZXMuDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMg0KDQpgYGB7cn0NCnlyYnNzICU+JSANCiAgc2tpbSgpDQojV2UgYXJlIG1pc3Npbmcgd2VpZ2h0cyBmcm9tIDEsMjE5IG9ic2VydmF0aW9ucy4NCmBgYA0KDQoNCmBgYHtyIHRyZW5kLWdpcmxzfQ0KeXJic3MxIDwtIHlyYnNzICU+JSANCiAgbXV0YXRlKHBoeXNpY2FsXzNwbHVzID0gaWZfZWxzZShwaHlzaWNhbGx5X2FjdGl2ZV83ZCA+IDIsICJ5ZXMiLCAibm8iKSl8Pg0KICAgIGZpbHRlcighaXMubmEod2VpZ2h0KSAmICFpcy5uYShwaHlzaWNhbF8zcGx1cykpDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgMw0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0geXJic3MxLCBhZXMgKHBoeXNpY2FsXzNwbHVzLCB3ZWlnaHQpKSArIGdlb21fdmlvbGluKHBvc2l0aW9uID0gImRvZGdlIikNCmBgYA0KDQpgYGB7cn0NCiNZZXMsIHRoZXJlIGlzIGEgcmVsYXRpb25zaGlwIGJldHdlZW4gdGhlIHZhcmlhYmxlcyAicGh5c2ljYWxfM3BsdXMiIGFuZCAid2VpZ2h0Ii4gSSBleHBlY3RlZCB0aGVyZSB0byBiZSBhIGdyZWF0ZXIgZGlmZmVyZW5jZSBiZXR3ZWVuIHRoZSBwbG90cyBvZiB0aGUgZmFjdG9ycyBzaW5jZSBvbmUncyBwaHlzaWNhbCBhY3Rpdml0eSBncmVhdGx5IGltcGFjdHMgdGhlaXIgd2VpZ2h0LCBidXQgdGhlcmUgYXJlIGNvdW50bGVzcyBvdGhlciBmYWN0b3JzIHRoYXQgZG8gc28gdG9vLCBzbyB0aGUgcmVzdWx0cyBhcmUgbm90IHRvbyBzaG9ja2luZy4NCmBgYA0KDQpgYGB7cn0NCnlyYnNzMSAlPiUNCiAgZ3JvdXBfYnkocGh5c2ljYWxfM3BsdXMpICU+JQ0KICBzdW1tYXJpc2UobWVhbl93ZWlnaHQgPSBtZWFuKHdlaWdodCwgbmEucm0gPSBUUlVFKSkNCmBgYA0KDQoNCg0KIyMjIEV4ZXJjaXNlIDQNCg0KDQpgYGB7cn0NCnlyYnNzMSAlPiUNCiAgZ3JvdXBfYnkocGh5c2ljYWxfM3BsdXMpICU+JQ0KICBzdW1tYXJpemUobiA9IG4oKSkNCmBgYA0KDQpgYGB7cn0NCnlyYnNzMSAlPiUNCiAgZ3JvdXBfYnkod2VpZ2h0KSAlPiUNCiAgc3VtbWFyaXplKG4gPSBuKCkpDQpgYGANCg0KYGBge3J9DQojVGhlIGNvbmRpdGlvbnMgbmVjZXNzYXJ5IGZvciBpbmZlcmVuY2UgYXJlIHNhdGlzZmllZDogdGhlcmUgYXJlIG1vcmUgdGhhbiAxMCBzdWNjZXNzIGFuZCBmYWlsdXJlcyBmb3IgInBoeXNpY2FsXzNwbHVzIiBhbmQgbW9yZSB0aGFuIDMwIG51bWVyaWNhbCBlbnRyaWVzIGZvciAid2VpZ2h0Ii4NCmBgYA0KDQoNCiMjIyBFeGVyY2lzZSA1DQoNCg0KYGBge3J9DQojSG86IFRoZXJlIGlzIG5vIGRpZmZlcmVuY2UgaW4gdGhlIGF2ZXJhZ2Ugd2VpZ2h0cyBvZiBhZG9sZXNjZW50cyB3aG8gZXhlcmNpc2UgYXQgbGVhc3QgMyB0aW1lcyBhIHdlZWsgYW5kIHRob3NlIHdobyBkbyBub3QuIEhvIDogbXUxID0gbXUyDQojIEhhOiBUaGVyZSBpcyBhIGRpZmZlcmVuY2UgaW4gdGhlIGF2ZXJhZ2Ugd2VpZ2h0cyBvZiBhZG9sZXNjZW50cyB3aG8gZXhlcmNpc2UgYXQgbGVhc3QgMyB0aW1lcyBhIHdlZWsgYW5kIHRob3NlIHdobyBkbyBub3QuIEhhIDogbXUxID0vIG11Mg0KYGBgDQoNCmBgYHtyfQ0Kb2JzX2RpZmYgPC0geXJic3MxICU+JQ0KICBzcGVjaWZ5KHdlaWdodCB+IHBoeXNpY2FsXzNwbHVzKSAlPiUNCiAgY2FsY3VsYXRlKHN0YXQgPSAiZGlmZiBpbiBtZWFucyIsIG9yZGVyID0gYygieWVzIiwgIm5vIikpDQpgYGANCg0KYGBge3J9DQpudWxsX2Rpc3QgPC0geXJic3MxICU+JQ0KICBzcGVjaWZ5KHdlaWdodCB+IHBoeXNpY2FsXzNwbHVzKSAlPiUNCiAgaHlwb3RoZXNpemUobnVsbCA9ICJpbmRlcGVuZGVuY2UiKSAlPiUNCiAgZ2VuZXJhdGUocmVwcyA9IDEwMDAsIHR5cGUgPSAicGVybXV0ZSIpICU+JQ0KICBjYWxjdWxhdGUoc3RhdCA9ICJkaWZmIGluIG1lYW5zIiwgb3JkZXIgPSBjKCJ5ZXMiLCAibm8iKSkNCmBgYA0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gbnVsbF9kaXN0LCBhZXMoeCA9IHN0YXQpKSArDQogIGdlb21faGlzdG9ncmFtKCkNCmBgYA0KDQojIyMgRXhlcmNpc2UgNg0KDQpgYGB7cn0NCmdncGxvdChkYXRhID0gbnVsbF9kaXN0LCBhZXMoeCA9IHN0YXQpKSArDQogICBnZW9tX2hpc3RvZ3JhbSgpDQpnZW9tX3ZsaW5lKHhpbnRlcmNlcHQgPSAxLjc3NDU4NCwgY29sb3I9InJlZCIpDQojVGhlIGxpbmUgZG9lcyBub3QgZmFsbCBvbiB0aGUgZGlzdHJpYnV0aW9uLg0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDcNCg0KDQpgYGB7cn0NCiNOb25lIG9mIHRoZSAibnVsbF9kaXN0IiBwZXJtdXRhdGlvbnMgaGF2ZSBhIGRpZmZlcmVuY2UgZXF1YWwgdG8gb3IgbGFyZ2VyIHRoYW4gdGhlICJvYnNfZGlmZiIgdmFsdWUuDQpgYGANCg0KYGBge3J9DQpudWxsX2Rpc3QgJT4lDQogIGdldF9wX3ZhbHVlKG9ic19zdGF0ID0gb2JzX2RpZmYsIGRpcmVjdGlvbiA9ICJ0d29fc2lkZWQiKQ0KYGBgDQoNCg0KIyMjIEV4ZXJjaXNlIDgNCg0KYGBge3J9DQojIEkgZ290IGEgd2FybmluZyBtZXNzYWdlIHRvIGJlIGNhdXRpb3VzIGFib3V0IHJlcG9ydGluZyB0aGUgcC12YWx1ZSB0aGF0IHRoZSBzeXN0ZW0gcHJvY3VyZWQgYXMgaXQgaXMgIjAiLiBJIGJlbGlldmUgSSBnb3QgdGhpcyB3YXJuaW5nIGJlY2F1c2Ugc3VjaCBhIHAtdmFsdWUgaXMgaW50ZXJwcmV0ZWQgYXMgdGhlcmUgYmVpbmcgYWJzb2x1dGVseSBubyB3YXkgdGhhdCB0aGUgbnVsbCBoeXBvdGhlc2lzIGNhbiBmYWlsIHRvIGJlIHJlamVjdGVkLCBhbmQgc3VjaCBicm9hZCBzdGF0ZW1lbnRzIGhhcmRseSBob2xkIHRydWUuDQpgYGANCg0KDQojIyMgRXhlcmNpc2UgOQ0KDQpgYGB7cn0NCnBlcmNlbnRpbGVfY2kgPC0gbnVsbF9kaXN0IHw+DQogIGdldF9jb25maWRlbmNlX2ludGVydmFsKGxldmVsID0gMC45NSkNCnBlcmNlbnRpbGVfY2kNCmBgYA0KDQpgYGB7cn0NCiNHaXZlbiB0aGUgaW5jbHVzaW9uIG9mIDAgaW4gdGhlIGNvbmZpZGVuY2UgaW50ZXJ2YWwsIHdlIGFyZSA5NSUgc3VyZSB0aGF0IHRoZXJlIGlzIG5vIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgZGlmZmVyZW5jZSBiZXR3ZWVuIHRoZSBhdmVyYWdlIHdlaWdodHMgb2YgYWRvbGVzY2VudHMgd2hvIGV4ZXJjaXNlIGF0IGxlYXN0IDMgdGltZXMgYSB3ZWVrIGFuZCB0aG9zZSB3aG8gZG8gbm90Lg0KYGBgDQoNCg==