getwd(

)
## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Coursera/Statistics_with_R_Specialization/Course_2_Inferential_Stats/Week3"
library(statsr)
## Warning: package 'statsr' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
data(nc)
nc <- nc
 write.csv (nc, file = "nc.csv", row.names = FALSE)
 nc <- read.csv("nc.csv", header = TRUE)
str(nc)
## 'data.frame':    1000 obs. of  13 variables:
##  $ fage          : int  NA NA 19 21 NA NA 18 17 NA 20 ...
##  $ mage          : int  13 14 15 15 15 15 15 15 16 16 ...
##  $ mature        : chr  "younger mom" "younger mom" "younger mom" "younger mom" ...
##  $ weeks         : int  39 42 37 41 39 38 37 35 38 37 ...
##  $ premie        : chr  "full term" "full term" "full term" "full term" ...
##  $ visits        : int  10 15 11 6 9 19 12 5 9 13 ...
##  $ marital       : chr  "married" "married" "married" "married" ...
##  $ gained        : int  38 20 38 34 27 22 76 15 NA 52 ...
##  $ weight        : num  7.63 7.88 6.63 8 6.38 5.38 8.44 4.69 8.81 6.94 ...
##  $ lowbirthweight: chr  "not low" "not low" "not low" "not low" ...
##  $ gender        : chr  "male" "male" "female" "male" ...
##  $ habit         : chr  "nonsmoker" "nonsmoker" "nonsmoker" "nonsmoker" ...
##  $ whitemom      : chr  "not white" "not white" "white" "white" ...
summary(nc$gained)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   20.00   30.00   30.33   38.00   85.00      27
p3 <- nc %>%
  ggplot(aes(habit, weight, fill = habit)) + 
  ggtitle("Birth Weight by Smoke Status") +
  xlab("Smoke Status") +
  ylab("Birth WEight") +
  geom_boxplot() +
  scale_fill_discrete(name = "habit")
p3

nc %>%
group_by(habit) %>%
summarise(mean_weight = mean(weight))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
##   habit     mean_weight
##   <chr>           <dbl>
## 1 nonsmoker        7.14
## 2 smoker           6.83
## 3 <NA>             3.63
  summary(nc$weight)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   6.380   7.310   7.101   8.060  11.750
#install.packages("tables")
library("tables")
## Warning: package 'tables' was built under R version 4.0.5
tapply(nc$weight, nc$habit, summary)
## $nonsmoker
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   6.440   7.310   7.144   8.060  11.750 
## 
## $smoker
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.690   6.077   7.060   6.829   7.735   9.190
inference(y = weight, x = habit, data = nc, statistic = "mean", type = "ht", null = 0,
alternative = "twosided", method = "theoretical")
## Response variable: numerical
## Explanatory variable: categorical (2 levels) 
## n_nonsmoker = 873, y_bar_nonsmoker = 7.1443, s_nonsmoker = 1.5187
## n_smoker = 126, y_bar_smoker = 6.8287, s_smoker = 1.3862
## H0: mu_nonsmoker =  mu_smoker
## HA: mu_nonsmoker != mu_smoker
## t = 2.359, df = 125
## p_value = 0.0199

inference(y = weight, x = habit, data = nc, statistic = "mean", type = "ci", 
 method = "theoretical")
## Response variable: numerical, Explanatory variable: categorical (2 levels)
## n_nonsmoker = 873, y_bar_nonsmoker = 7.1443, s_nonsmoker = 1.5187
## n_smoker = 126, y_bar_smoker = 6.8287, s_smoker = 1.3862
## 95% CI (nonsmoker - smoker): (0.0508 , 0.5803)

inference(y = weight, x = habit, data = nc, statistic = "mean", type = "ci", conf_level = .99,
 method = "theoretical")
## Response variable: numerical, Explanatory variable: categorical (2 levels)
## n_nonsmoker = 873, y_bar_nonsmoker = 7.1443, s_nonsmoker = 1.5187
## n_smoker = 126, y_bar_smoker = 6.8287, s_smoker = 1.3862
## 99% CI (nonsmoker - smoker): (-0.0343 , 0.6654)

inference(y = weeks, data = nc, statistic = "mean", type = "ci", conf_level = .99,
 method = "theoretical")
## Single numerical variable
## n = 998, y-bar = 38.3347, s = 2.9316
## 99% CI: (38.0952 , 38.5742)

inference(y = weeks, data = nc, statistic = "mean", type = "ci", conf_level = .90,
 method = "theoretical")
## Single numerical variable
## n = 998, y-bar = 38.3347, s = 2.9316
## 90% CI: (38.1819 , 38.4874)

table(nc$mature, nc$mage)
##              
##               13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
##   mature mom   0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   younger mom  1  1  6 10 19 38 35 66 51 60 51 53 54 51 47 53 52 39 52 38 45 45
##              
##               35 36 37 38 39 40 41 42 45 46 50
##   mature mom  35 31 26 12  7  9  8  2  1  1  1
##   younger mom  0  0  0  0  0  0  0  0  0  0  0