library(openintro)
## Warning: package 'openintro' was built under R version 4.2.3
## Loading required package: airports
## Warning: package 'airports' was built under R version 4.2.3
## Loading required package: cherryblossom
## Warning: package 'cherryblossom' was built under R version 4.2.3
## Loading required package: usdata
## Warning: package 'usdata' was built under R version 4.2.3
yrbss
## # A tibble: 13,583 × 13
##      age gender grade hispanic race        height weight helme…¹ text_…² physi…³
##    <int> <chr>  <chr> <chr>    <chr>        <dbl>  <dbl> <chr>   <chr>     <int>
##  1    14 female 9     not      Black or A…  NA      NA   never   0             4
##  2    14 female 9     not      Black or A…  NA      NA   never   <NA>          2
##  3    15 female 9     hispanic Native Haw…   1.73   84.4 never   30            7
##  4    15 female 9     not      Black or A…   1.6    55.8 never   0             0
##  5    15 female 9     not      Black or A…   1.5    46.7 did no… did no…       2
##  6    15 female 9     not      Black or A…   1.57   67.1 did no… did no…       1
##  7    15 female 9     not      Black or A…   1.65  132.  did no… <NA>          4
##  8    14 male   9     not      Black or A…   1.88   71.2 never   <NA>          4
##  9    15 male   9     not      Black or A…   1.75   63.5 never   <NA>          5
## 10    15 male   10    not      Black or A…   1.37   97.1 did no… <NA>          0
## # … with 13,573 more rows, 3 more variables: hours_tv_per_school_day <chr>,
## #   strength_training_7d <int>, school_night_hours_sleep <chr>, and abbreviated
## #   variable names ¹​helmet_12m, ²​text_while_driving_30d, ³​physically_active_7d
yrbss$height<-as.numeric(yrbss$height)
yrbss
## # A tibble: 13,583 × 13
##      age gender grade hispanic race        height weight helme…¹ text_…² physi…³
##    <int> <chr>  <chr> <chr>    <chr>        <dbl>  <dbl> <chr>   <chr>     <int>
##  1    14 female 9     not      Black or A…  NA      NA   never   0             4
##  2    14 female 9     not      Black or A…  NA      NA   never   <NA>          2
##  3    15 female 9     hispanic Native Haw…   1.73   84.4 never   30            7
##  4    15 female 9     not      Black or A…   1.6    55.8 never   0             0
##  5    15 female 9     not      Black or A…   1.5    46.7 did no… did no…       2
##  6    15 female 9     not      Black or A…   1.57   67.1 did no… did no…       1
##  7    15 female 9     not      Black or A…   1.65  132.  did no… <NA>          4
##  8    14 male   9     not      Black or A…   1.88   71.2 never   <NA>          4
##  9    15 male   9     not      Black or A…   1.75   63.5 never   <NA>          5
## 10    15 male   10    not      Black or A…   1.37   97.1 did no… <NA>          0
## # … with 13,573 more rows, 3 more variables: hours_tv_per_school_day <chr>,
## #   strength_training_7d <int>, school_night_hours_sleep <chr>, and abbreviated
## #   variable names ¹​helmet_12m, ²​text_while_driving_30d, ³​physically_active_7d
str(yrbss)
## tibble [13,583 × 13] (S3: tbl_df/tbl/data.frame)
##  $ age                     : int [1:13583] 14 14 15 15 15 15 15 14 15 15 ...
##  $ gender                  : chr [1:13583] "female" "female" "female" "female" ...
##  $ grade                   : chr [1:13583] "9" "9" "9" "9" ...
##  $ hispanic                : chr [1:13583] "not" "not" "hispanic" "not" ...
##  $ race                    : chr [1:13583] "Black or African American" "Black or African American" "Native Hawaiian or Other Pacific Islander" "Black or African American" ...
##  $ height                  : num [1:13583] NA NA 1.73 1.6 1.5 1.57 1.65 1.88 1.75 1.37 ...
##  $ weight                  : num [1:13583] NA NA 84.4 55.8 46.7 ...
##  $ helmet_12m              : chr [1:13583] "never" "never" "never" "never" ...
##  $ text_while_driving_30d  : chr [1:13583] "0" NA "30" "0" ...
##  $ physically_active_7d    : int [1:13583] 4 2 7 0 2 1 4 4 5 0 ...
##  $ hours_tv_per_school_day : chr [1:13583] "5+" "5+" "5+" "2" ...
##  $ strength_training_7d    : int [1:13583] 0 0 0 0 1 0 2 0 3 0 ...
##  $ school_night_hours_sleep: chr [1:13583] "8" "6" "<5" "6" ...
yrbss1<-na.omit(yrbss)
aggregate(yrbss1$height, list(yrbss1$race), mean)
##                                     Group.1        x
## 1          American Indian or Alaska Native 1.666298
## 2                                     Asian 1.660901
## 3                 Black or African American 1.691227
## 4 Native Hawaiian or Other Pacific Islander 1.664167
## 5                                     White 1.704979
set.seed(123)
n=1000
yrbss_samp = rep(NA,n)
for(i in 1:n) {
  yrbss_samp[i] = mean(rnorm(100, mean = 3.90, sd=2.6))}

# Plot the histogram of sample means
hist(yrbss_samp, breaks = 30, main = "Histogram of Sample Means (n = 100)",
     xlab = "(Sample Mean)",col = "pink")