library(tidyverse)
library(openintro)
library(infer)
set.seed(74226)

Exercise 1

  • 4792 have reported 0 days
  • 925 have reported 1-2 days
  • 4646 do not drive
  • 827 have reported 30 days or texting and driving everyday in the past 30 days
yrbss %>%
  count(text_while_driving_30d, sort=TRUE)
## # A tibble: 9 x 2
##   text_while_driving_30d     n
##   <chr>                  <int>
## 1 0                       4792
## 2 did not drive           4646
## 3 1-2                      925
## 4 <NA>                     918
## 5 30                       827
## 6 3-5                      493
## 7 10-19                    373
## 8 6-9                      311
## 9 20-29                    298

Exercise 2

The proportion of students who have texted everyday in the past 30 days & have not worn a helmet out of all students who have not worn a helmet is 7.12%

danger <- yrbss %>%
  filter(helmet_12m=="never") %>%
  filter(!is.na(text_while_driving_30d)) %>%
  mutate(text_ind_everyday = ifelse(text_while_driving_30d == "30", "yes", "no"))

danger %>%
  count(text_ind_everyday)
## # A tibble: 2 x 2
##   text_ind_everyday     n
##   <chr>             <int>
## 1 no                 6040
## 2 yes                 463

Exercise 3

The 95% Confidence Interval for the proportion of students who text and drive everyday of all students who do not wear a helmet is between 6.5% and 7.7%.

The margin of error in this case is .006 or .06% We could have calculated this as 1.96 x sqrt((.0712*(1-.0712)/6503))

danger %>%
 specify(response = text_ind_everyday, success = "yes") %>%
 generate(reps = 1000, type = "bootstrap") %>%
 calculate(stat = "prop") %>%
 get_ci(level = 0.95)
## # A tibble: 1 x 2
##   lower_ci upper_ci
##      <dbl>    <dbl>
## 1   0.0646   0.0772

Exercise 4

glimpse(yrbss)
## Rows: 13,583
## Columns: 13
## $ age                      <int> 14, 14, 15, 15, 15, 15, 15, 14, 15, 15, 15...
## $ gender                   <chr> "female", "female", "female", "female", "f...
## $ grade                    <chr> "9", "9", "9", "9", "9", "9", "9", "9", "9...
## $ hispanic                 <chr> "not", "not", "hispanic", "not", "not", "n...
## $ race                     <chr> "Black or African American", "Black or Afr...
## $ height                   <dbl> NA, NA, 1.73, 1.60, 1.50, 1.57, 1.65, 1.88...
## $ weight                   <dbl> NA, NA, 84.37, 55.79, 46.72, 67.13, 131.54...
## $ helmet_12m               <chr> "never", "never", "never", "never", "did n...
## $ text_while_driving_30d   <chr> "0", NA, "30", "0", "did not drive", "did ...
## $ physically_active_7d     <int> 4, 2, 7, 0, 2, 1, 4, 4, 5, 0, 0, 0, 4, 7, ...
## $ hours_tv_per_school_day  <chr> "5+", "5+", "5+", "2", "3", "5+", "5+", "5...
## $ strength_training_7d     <int> 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 3, 0, 0, 7, ...
## $ school_night_hours_sleep <chr> "8", "6", "<5", "6", "9", "8", "9", "6", "...
yrbss %>%
  count(hours_tv_per_school_day, sort=TRUE)
## # A tibble: 8 x 2
##   hours_tv_per_school_day     n
##   <chr>                   <int>
## 1 2                        2705
## 2 <1                       2168
## 3 3                        2139
## 4 do not watch             1840
## 5 1                        1750
## 6 5+                       1595
## 7 4                        1048
## 8 <NA>                      338
yrbss %>%
  count(school_night_hours_sleep, sort=TRUE)
## # A tibble: 8 x 2
##   school_night_hours_sleep     n
##   <chr>                    <int>
## 1 7                         3461
## 2 8                         2692
## 3 6                         2658
## 4 5                         1480
## 5 <NA>                      1248
## 6 <5                         965
## 7 9                          763
## 8 10+                        316

TV time

Proportion of Interest: students who reported watching less than 1 hr of tv per school day: 16.37%

95% Confidence Interval generated: [.1575 - .1698]
We are 95% confident that the proportion of students who watch less than an hr of tv per schoolday is between 15.75% and 16.98%.

Margin of Error: .00615 or .0615%

tv_time<- yrbss %>%
  filter(!is.na(hours_tv_per_school_day)) %>%
  mutate(tv_ind_everyday = ifelse(hours_tv_per_school_day == "<1", "yes", "no"))

tv_time %>%
  count(tv_ind_everyday)
## # A tibble: 2 x 2
##   tv_ind_everyday     n
##   <chr>           <int>
## 1 no              11077
## 2 yes              2168
tv_time %>%
 specify(response = tv_ind_everyday, success = "yes") %>%
 generate(reps = 1000, type = "bootstrap") %>%
 calculate(stat = "prop") %>%
 get_ci(level = 0.95)
## # A tibble: 1 x 2
##   lower_ci upper_ci
##      <dbl>    <dbl>
## 1    0.157    0.170

Sleep time

Proportion of Interest: students who reported less than 5hrs of sleep on school nights: 7.82%

95% Confidence Interval generated: [.07377 - .0831]
We are 95% confident that the proportion of students who watch less than an hr of tv per schoolday is between 7.38% and 8.31%.

Margin of Error: .00465 or .0465%

sleep_time<- yrbss %>%
  filter(!is.na(school_night_hours_sleep)) %>%
  mutate(sleep_ind_everyday = ifelse(school_night_hours_sleep == "<5", "yes", "no"))

sleep_time %>%
  count(sleep_ind_everyday)
## # A tibble: 2 x 2
##   sleep_ind_everyday     n
##   <chr>              <int>
## 1 no                 11370
## 2 yes                  965
sleep_time %>%
 specify(response = sleep_ind_everyday, success = "yes") %>%
 generate(reps = 1000, type = "bootstrap") %>%
 calculate(stat = "prop") %>%
 get_ci(level = 0.95)
## # A tibble: 1 x 2
##   lower_ci upper_ci
##      <dbl>    <dbl>
## 1   0.0738   0.0831

Exercise 5

The margin of error increases as the population proportion increases. Margin of error is greatest at the population of 50%. The greatest that the numerator could ever be when calculating standard error by proportion is .5 x .5.

n <- 1000
p <- seq(from = 0, to = 1, by = 0.01)
me <- 2 * sqrt(p * (1 - p)/n)
dd <- data.frame(p = p, me = me)
ggplot(data = dd, aes(x = p, y = me)) + 
  geom_line() +
  labs(x = "Population Proportion", y = "Margin of Error")

Exercise 6

The distribution of sampling proportions with sampling size of 300 is mostly bell-curved and symmetrical - sampling proportions are clustering around center with symmetrical tapering on either side. Center is at .10 and standard deviation of the sample proportions is .017. Spread conforms fairly uniformly to the normal bell curve shape but with slightly larger tails where greater proportion of values are showing than would be expected in a perfectly normalized distribution.

Exercise 7

As proportion increases, spread gets wider and sampling proportions disperse out. There is less and less clustering around center of range and less overall conformity to a normal distribution.

Excercise 8

As sample size increases, there gradually can be seen more data clustering around center and more tapering from center. Symmetry increases with sample size. This falls in line with the general principal of CLT and assumption of enough sampling proportions/means reasonably approximating normal distribution shape.

LS0tDQp0aXRsZTogIkxhYiA2Ig0KYXV0aG9yOiAiQ2Fzc2llIEJveWxhbiINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogb3BlbmludHJvOjpsYWJfcmVwb3J0DQotLS0NCg0KYGBge3IgbG9hZC1wYWNrYWdlcywgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShvcGVuaW50cm8pDQpsaWJyYXJ5KGluZmVyKQ0Kc2V0LnNlZWQoNzQyMjYpDQpgYGANCg0KIyBFeGVyY2lzZSAxDQoNCiAgDQoqIDQ3OTIgaGF2ZSByZXBvcnRlZCAwIGRheXMNCiogOTI1IGhhdmUgcmVwb3J0ZWQgMS0yIGRheXMNCiogNDY0NiBkbyBub3QgZHJpdmUNCiogODI3IGhhdmUgcmVwb3J0ZWQgMzAgZGF5cyBvciB0ZXh0aW5nIGFuZCBkcml2aW5nIGV2ZXJ5ZGF5IGluIHRoZSBwYXN0IDMwIGRheXMNCg0KYGBge3J9DQp5cmJzcyAlPiUNCiAgY291bnQodGV4dF93aGlsZV9kcml2aW5nXzMwZCwgc29ydD1UUlVFKQ0KYGBgDQoNCiMgRXhlcmNpc2UgMg0KVGhlIHByb3BvcnRpb24gb2Ygc3R1ZGVudHMgd2hvIGhhdmUgdGV4dGVkIGV2ZXJ5ZGF5IGluIHRoZSBwYXN0IDMwIGRheXMgJiBoYXZlIG5vdCB3b3JuIGEgaGVsbWV0IG91dCBvZiBhbGwgc3R1ZGVudHMgd2hvIGhhdmUgbm90IHdvcm4gYSBoZWxtZXQgaXMgNy4xMiUNCmBgYHtyfQ0KZGFuZ2VyIDwtIHlyYnNzICU+JQ0KICBmaWx0ZXIoaGVsbWV0XzEybT09Im5ldmVyIikgJT4lDQogIGZpbHRlcighaXMubmEodGV4dF93aGlsZV9kcml2aW5nXzMwZCkpICU+JQ0KICBtdXRhdGUodGV4dF9pbmRfZXZlcnlkYXkgPSBpZmVsc2UodGV4dF93aGlsZV9kcml2aW5nXzMwZCA9PSAiMzAiLCAieWVzIiwgIm5vIikpDQoNCmRhbmdlciAlPiUNCiAgY291bnQodGV4dF9pbmRfZXZlcnlkYXkpDQogIA0KYGBgDQojIEV4ZXJjaXNlIDMNClRoZSA5NSUgQ29uZmlkZW5jZSBJbnRlcnZhbCBmb3IgdGhlIHByb3BvcnRpb24gb2Ygc3R1ZGVudHMgd2hvIHRleHQgYW5kIGRyaXZlIGV2ZXJ5ZGF5IG9mICphbGwqIHN0dWRlbnRzIHdobyBkbyBub3Qgd2VhciBhIGhlbG1ldCBpcyBiZXR3ZWVuIDYuNSUgYW5kIDcuNyUuDQoNClRoZSBtYXJnaW4gb2YgZXJyb3IgaW4gdGhpcyBjYXNlIGlzIC4wMDYgb3IgLjA2JQ0KV2UgY291bGQgaGF2ZSBjYWxjdWxhdGVkIHRoaXMgYXMgMS45NiB4IHNxcnQoKC4wNzEyKigxLS4wNzEyKS82NTAzKSkNCmBgYHtyfQ0KZGFuZ2VyICU+JQ0KIHNwZWNpZnkocmVzcG9uc2UgPSB0ZXh0X2luZF9ldmVyeWRheSwgc3VjY2VzcyA9ICJ5ZXMiKSAlPiUNCiBnZW5lcmF0ZShyZXBzID0gMTAwMCwgdHlwZSA9ICJib290c3RyYXAiKSAlPiUNCiBjYWxjdWxhdGUoc3RhdCA9ICJwcm9wIikgJT4lDQogZ2V0X2NpKGxldmVsID0gMC45NSkNCmBgYA0KIyBFeGVyY2lzZSA0DQpgYGB7cn0NCmdsaW1wc2UoeXJic3MpDQoNCnlyYnNzICU+JQ0KICBjb3VudChob3Vyc190dl9wZXJfc2Nob29sX2RheSwgc29ydD1UUlVFKQ0KDQp5cmJzcyAlPiUNCiAgY291bnQoc2Nob29sX25pZ2h0X2hvdXJzX3NsZWVwLCBzb3J0PVRSVUUpDQoNCmBgYA0KDQojIyBUViB0aW1lDQoqKlByb3BvcnRpb24gb2YgSW50ZXJlc3Q6KioNCnN0dWRlbnRzIHdobyByZXBvcnRlZCB3YXRjaGluZyBsZXNzIHRoYW4gMSBociBvZiB0diBwZXIgc2Nob29sIGRheTogIDE2LjM3JSAgDQogIA0KKio5NSUgQ29uZmlkZW5jZSBJbnRlcnZhbCBnZW5lcmF0ZWQ6KiogWy4xNTc1IC0gLjE2OThdICAgIA0KV2UgYXJlIDk1JSBjb25maWRlbnQgdGhhdCB0aGUgcHJvcG9ydGlvbiBvZiBzdHVkZW50cyB3aG8gd2F0Y2ggbGVzcyB0aGFuIGFuIGhyIG9mIHR2IHBlciBzY2hvb2xkYXkgaXMgYmV0d2VlbiAxNS43NSUgYW5kIDE2Ljk4JS4gIA0KICANCioqTWFyZ2luIG9mIEVycm9yOioqIC4wMDYxNSBvciAuMDYxNSUNCmBgYHtyfQ0KdHZfdGltZTwtIHlyYnNzICU+JQ0KICBmaWx0ZXIoIWlzLm5hKGhvdXJzX3R2X3Blcl9zY2hvb2xfZGF5KSkgJT4lDQogIG11dGF0ZSh0dl9pbmRfZXZlcnlkYXkgPSBpZmVsc2UoaG91cnNfdHZfcGVyX3NjaG9vbF9kYXkgPT0gIjwxIiwgInllcyIsICJubyIpKQ0KDQp0dl90aW1lICU+JQ0KICBjb3VudCh0dl9pbmRfZXZlcnlkYXkpDQoNCnR2X3RpbWUgJT4lDQogc3BlY2lmeShyZXNwb25zZSA9IHR2X2luZF9ldmVyeWRheSwgc3VjY2VzcyA9ICJ5ZXMiKSAlPiUNCiBnZW5lcmF0ZShyZXBzID0gMTAwMCwgdHlwZSA9ICJib290c3RyYXAiKSAlPiUNCiBjYWxjdWxhdGUoc3RhdCA9ICJwcm9wIikgJT4lDQogZ2V0X2NpKGxldmVsID0gMC45NSkNCmBgYA0KIyMgU2xlZXAgdGltZQ0KKipQcm9wb3J0aW9uIG9mIEludGVyZXN0OioqDQpzdHVkZW50cyB3aG8gcmVwb3J0ZWQgbGVzcyB0aGFuIDVocnMgb2Ygc2xlZXAgb24gc2Nob29sIG5pZ2h0czogNy44MiUgIA0KDQoqKjk1JSBDb25maWRlbmNlIEludGVydmFsIGdlbmVyYXRlZDoqKiBbLjA3Mzc3IC0gLjA4MzFdICANCldlIGFyZSA5NSUgY29uZmlkZW50IHRoYXQgdGhlIHByb3BvcnRpb24gb2Ygc3R1ZGVudHMgd2hvIHdhdGNoIGxlc3MgdGhhbiBhbiBociBvZiB0diBwZXIgc2Nob29sZGF5IGlzIGJldHdlZW4gNy4zOCUgYW5kIDguMzElLiAgDQoNCioqTWFyZ2luIG9mIEVycm9yOioqIC4wMDQ2NSBvciAuMDQ2NSUNCmBgYHtyfQ0Kc2xlZXBfdGltZTwtIHlyYnNzICU+JQ0KICBmaWx0ZXIoIWlzLm5hKHNjaG9vbF9uaWdodF9ob3Vyc19zbGVlcCkpICU+JQ0KICBtdXRhdGUoc2xlZXBfaW5kX2V2ZXJ5ZGF5ID0gaWZlbHNlKHNjaG9vbF9uaWdodF9ob3Vyc19zbGVlcCA9PSAiPDUiLCAieWVzIiwgIm5vIikpDQoNCnNsZWVwX3RpbWUgJT4lDQogIGNvdW50KHNsZWVwX2luZF9ldmVyeWRheSkNCg0Kc2xlZXBfdGltZSAlPiUNCiBzcGVjaWZ5KHJlc3BvbnNlID0gc2xlZXBfaW5kX2V2ZXJ5ZGF5LCBzdWNjZXNzID0gInllcyIpICU+JQ0KIGdlbmVyYXRlKHJlcHMgPSAxMDAwLCB0eXBlID0gImJvb3RzdHJhcCIpICU+JQ0KIGNhbGN1bGF0ZShzdGF0ID0gInByb3AiKSAlPiUNCiBnZXRfY2kobGV2ZWwgPSAwLjk1KQ0KYGBgDQojIEV4ZXJjaXNlIDUNCg0KVGhlIG1hcmdpbiBvZiBlcnJvciBpbmNyZWFzZXMgYXMgdGhlIHBvcHVsYXRpb24gcHJvcG9ydGlvbiBpbmNyZWFzZXMuIE1hcmdpbiBvZiBlcnJvciBpcyBncmVhdGVzdCBhdCB0aGUgcG9wdWxhdGlvbiBvZiA1MCUuIFRoZSBncmVhdGVzdCB0aGF0IHRoZSBudW1lcmF0b3IgY291bGQgZXZlciBiZSB3aGVuIGNhbGN1bGF0aW5nIHN0YW5kYXJkIGVycm9yIGJ5IHByb3BvcnRpb24gaXMgLjUgeCAuNS4NCg0KYGBge3J9DQpuIDwtIDEwMDANCnAgPC0gc2VxKGZyb20gPSAwLCB0byA9IDEsIGJ5ID0gMC4wMSkNCm1lIDwtIDIgKiBzcXJ0KHAgKiAoMSAtIHApL24pDQpgYGANCg0KYGBge3J9DQpkZCA8LSBkYXRhLmZyYW1lKHAgPSBwLCBtZSA9IG1lKQ0KZ2dwbG90KGRhdGEgPSBkZCwgYWVzKHggPSBwLCB5ID0gbWUpKSArIA0KICBnZW9tX2xpbmUoKSArDQogIGxhYnMoeCA9ICJQb3B1bGF0aW9uIFByb3BvcnRpb24iLCB5ID0gIk1hcmdpbiBvZiBFcnJvciIpDQpgYGANCg0KIyBFeGVyY2lzZSA2DQoNClRoZSBkaXN0cmlidXRpb24gb2Ygc2FtcGxpbmcgcHJvcG9ydGlvbnMgd2l0aCBzYW1wbGluZyBzaXplIG9mIDMwMCBpcyBtb3N0bHkgYmVsbC1jdXJ2ZWQgYW5kIHN5bW1ldHJpY2FsIC0gc2FtcGxpbmcgcHJvcG9ydGlvbnMgYXJlIGNsdXN0ZXJpbmcgYXJvdW5kIGNlbnRlciB3aXRoIHN5bW1ldHJpY2FsIHRhcGVyaW5nIG9uIGVpdGhlciBzaWRlLiAgQ2VudGVyIGlzIGF0IC4xMCBhbmQgc3RhbmRhcmQgZGV2aWF0aW9uIG9mIHRoZSBzYW1wbGUgcHJvcG9ydGlvbnMgaXMgLjAxNy4gIFNwcmVhZCBjb25mb3JtcyBmYWlybHkgdW5pZm9ybWx5IHRvIHRoZSBub3JtYWwgYmVsbCBjdXJ2ZSBzaGFwZSBidXQgd2l0aCBzbGlnaHRseSBsYXJnZXIgdGFpbHMgd2hlcmUgZ3JlYXRlciBwcm9wb3J0aW9uIG9mIHZhbHVlcyBhcmUgc2hvd2luZyB0aGFuIHdvdWxkIGJlIGV4cGVjdGVkIGluIGEgcGVyZmVjdGx5IG5vcm1hbGl6ZWQgZGlzdHJpYnV0aW9uLg0KDQoNCiMgRXhlcmNpc2UgNw0KQXMgcHJvcG9ydGlvbiBpbmNyZWFzZXMsIHNwcmVhZCBnZXRzIHdpZGVyIGFuZCBzYW1wbGluZyBwcm9wb3J0aW9ucyBkaXNwZXJzZSBvdXQuICBUaGVyZSBpcyBsZXNzIGFuZCBsZXNzIGNsdXN0ZXJpbmcgYXJvdW5kIGNlbnRlciBvZiByYW5nZSBhbmQgbGVzcyBvdmVyYWxsIGNvbmZvcm1pdHkgdG8gYSBub3JtYWwgZGlzdHJpYnV0aW9uLg0KDQoNCiMgRXhjZXJjaXNlIDgNCg0KQXMgc2FtcGxlIHNpemUgaW5jcmVhc2VzLCB0aGVyZSBncmFkdWFsbHkgY2FuIGJlIHNlZW4gbW9yZSBkYXRhIGNsdXN0ZXJpbmcgYXJvdW5kIGNlbnRlciBhbmQgbW9yZSB0YXBlcmluZyBmcm9tIGNlbnRlci4gIFN5bW1ldHJ5IGluY3JlYXNlcyB3aXRoIHNhbXBsZSBzaXplLiAgVGhpcyBmYWxscyBpbiBsaW5lIHdpdGggdGhlIGdlbmVyYWwgcHJpbmNpcGFsIG9mIENMVCBhbmQgYXNzdW1wdGlvbiBvZiBlbm91Z2ggc2FtcGxpbmcgcHJvcG9ydGlvbnMvbWVhbnMgcmVhc29uYWJseSBhcHByb3hpbWF0aW5nIG5vcm1hbCBkaXN0cmlidXRpb24gc2hhcGUuDQoNCg0KDQouLi4NCg0K