download.file("http://www.openintro.org/stat/data/nc.RData", destfile = "nc.RData")
load("nc.RData")
summary(nc)
## fage mage mature weeks premie
## Min. :14.00 Min. :13 mature mom :133 Min. :20.00 full term:846
## 1st Qu.:25.00 1st Qu.:22 younger mom:867 1st Qu.:37.00 premie :152
## Median :30.00 Median :27 Median :39.00 NA's : 2
## Mean :30.26 Mean :27 Mean :38.33
## 3rd Qu.:35.00 3rd Qu.:32 3rd Qu.:40.00
## Max. :55.00 Max. :50 Max. :45.00
## NA's :171 NA's :2
## visits marital gained weight
## Min. : 0.0 married :386 Min. : 0.00 Min. : 1.000
## 1st Qu.:10.0 not married:613 1st Qu.:20.00 1st Qu.: 6.380
## Median :12.0 NA's : 1 Median :30.00 Median : 7.310
## Mean :12.1 Mean :30.33 Mean : 7.101
## 3rd Qu.:15.0 3rd Qu.:38.00 3rd Qu.: 8.060
## Max. :30.0 Max. :85.00 Max. :11.750
## NA's :9 NA's :27
## lowbirthweight gender habit whitemom
## low :111 female:503 nonsmoker:873 not white:284
## not low:889 male :497 smoker :126 white :714
## NA's : 1 NA's : 2
##
##
##
##
head(nc)
## fage mage mature weeks premie visits marital gained weight
## 1 NA 13 younger mom 39 full term 10 married 38 7.63
## 2 NA 14 younger mom 42 full term 15 married 20 7.88
## 3 19 15 younger mom 37 full term 11 married 38 6.63
## 4 21 15 younger mom 41 full term 6 married 34 8.00
## 5 NA 15 younger mom 39 full term 9 married 27 6.38
## 6 NA 15 younger mom 38 full term 19 married 22 5.38
## lowbirthweight gender habit whitemom
## 1 not low male nonsmoker not white
## 2 not low male nonsmoker not white
## 3 not low female nonsmoker white
## 4 not low male nonsmoker white
## 5 not low female nonsmoker not white
## 6 low male nonsmoker not white
nrow(nc)
## [1] 1000
hist(nc$fage)
hist(nc$mage)
hist(nc$weeks)
hist(nc$gained)
hist(nc$weight)
# Boxplot of habit and weight
boxplot(weight~habit,data=nc, main="Relation Between Mother's Habit and Baby's Weight",
ylab="Baby's Weight", xlab="Mother Smoker/Non-Smoker")
summary(nc$habit)
## nonsmoker smoker NA's
## 873 126 1
summary(nc$weight, nc$habit$smoker)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 6.380 7.310 7.101 8.060 11.750
summary(nc$weight, nc$habit$nonsmoker)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 6.380 7.310 7.101 8.060 11.750
by(nc$weight, nc$habit, mean)
## nc$habit: nonsmoker
## [1] 7.144273
## ------------------------------------------------------------
## nc$habit: smoker
## [1] 6.82873
by(nc$weight, nc$habit, length)
## nc$habit: nonsmoker
## [1] 873
## ------------------------------------------------------------
## nc$habit: smoker
## [1] 126
inference(y = nc$weight, x = nc$habit, est = "mean", type = "ht", null = 0,
alternative = "twosided", method = "theoretical")
## Warning: package 'BHH2' was built under R version 3.6.3
## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_nonsmoker = 873, mean_nonsmoker = 7.1443, sd_nonsmoker = 1.5187
## n_smoker = 126, mean_smoker = 6.8287, sd_smoker = 1.3862
## Observed difference between means (nonsmoker-smoker) = 0.3155
##
## H0: mu_nonsmoker - mu_smoker = 0
## HA: mu_nonsmoker - mu_smoker != 0
## Standard error = 0.134
## Test statistic: Z = 2.359
## p-value = 0.0184
inference(y = nc$weight, x = nc$habit, est = "mean", type = "ci", null = 0,
alternative = "twosided", method = "theoretical",
order = c("smoker","nonsmoker"))
## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_smoker = 126, mean_smoker = 6.8287, sd_smoker = 1.3862
## n_nonsmoker = 873, mean_nonsmoker = 7.1443, sd_nonsmoker = 1.5187
## Observed difference between means (smoker-nonsmoker) = -0.3155
##
## Standard error = 0.1338
## 95 % Confidence interval = ( -0.5777 , -0.0534 )
inference(y = nc$weeks, est = "mean", type = "ci", null = 0,
alternative = "twosided", method = "theoretical")
## Single mean
## Summary statistics:
## mean = 38.3347 ; sd = 2.9316 ; n = 998
## Standard error = 0.0928
## 95 % Confidence interval = ( 38.1528 , 38.5165 )
inference(y = nc$weeks, est = "mean", type = "ci", null = 0,
alternative = "twosided", method = "theoretical", conflevel = 0.90)
## Single mean
## Summary statistics:
## mean = 38.3347 ; sd = 2.9316 ; n = 998
## Standard error = 0.0928
## 90 % Confidence interval = ( 38.182 , 38.4873 )
inference(y = nc$weight, x = nc$mature, est = "mean", type = "ht", null = 0,
alternative = "twosided", method = "theoretical")
## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_mature mom = 133, mean_mature mom = 7.1256, sd_mature mom = 1.6591
## n_younger mom = 867, mean_younger mom = 7.0972, sd_younger mom = 1.4855
## Observed difference between means (mature mom-younger mom) = 0.0283
##
## H0: mu_mature mom - mu_younger mom = 0
## HA: mu_mature mom - mu_younger mom != 0
## Standard error = 0.152
## Test statistic: Z = 0.186
## p-value = 0.8526
install.packages("dplyr")
## Installing package into 'C:/Users/Valued Customer/Documents/R/win-library/3.6'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## problem copying C:\Users\Valued Customer\Documents\R\win-
## library\3.6\00LOCK\dplyr\libs\x64\dplyr.dll to C:\Users\Valued
## Customer\Documents\R\win-library\3.6\dplyr\libs\x64\dplyr.dll: Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\Valued Customer\AppData\Local\Temp\RtmpIlusdf\downloaded_packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dplyr)
nc %>% group_by(mature) %>% summarize(max(mage))
## # A tibble: 2 x 2
## mature `max(mage)`
## <fct> <int>
## 1 mature mom 50
## 2 younger mom 34
nc %>% group_by(mature) %>% summarize(min(mage))
## # A tibble: 2 x 2
## mature `min(mage)`
## <fct> <int>
## 1 mature mom 35
## 2 younger mom 13
inference(y = nc$gained, x = nc$habit, est = "mean", type = "ht", null = 0,
alternative = "twosided", method = "theoretical")
## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_nonsmoker = 851, mean_nonsmoker = 30.0964, sd_nonsmoker = 14.0226
## n_smoker = 122, mean_smoker = 31.9262, sd_smoker = 15.6512
## Observed difference between means (nonsmoker-smoker) = -1.8299
##
## H0: mu_nonsmoker - mu_smoker = 0
## HA: mu_nonsmoker - mu_smoker != 0
## Standard error = 1.496
## Test statistic: Z = -1.223
## p-value = 0.2214
inference(y = nc$gained, x = nc$habit, est = "mean", type = "ci", null = 0,
alternative = "twosided", method = "theoretical")
## Response variable: numerical, Explanatory variable: categorical
## Difference between two means
## Summary statistics:
## n_nonsmoker = 851, mean_nonsmoker = 30.0964, sd_nonsmoker = 14.0226
## n_smoker = 122, mean_smoker = 31.9262, sd_smoker = 15.6512
## Observed difference between means (nonsmoker-smoker) = -1.8299
##
## Standard error = 1.4963
## 95 % Confidence interval = ( -4.7626 , 1.1028 )