knitr::opts_chunk$set(echo = TRUE)
setwd("C:/Users/Admin/Desktop/data set computing")
getwd()
## [1] "C:/Users/Admin/Desktop/data set computing"
hsb2 <- read.csv("hsb2.csv")
names(hsb2)[names(hsb2) == "female"] <- "gender"
names(hsb2)
##  [1] "id"      "gender"  "race"    "ses"     "schtyp"  "prog"    "read"   
##  [8] "write"   "math"    "science" "socst"
knitr::opts_chunk$set(echo = TRUE)
# 1.Structure and dimensions
# structure
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("Stat2Data")
## Installing package into 'C:/Users/Admin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'Stat2Data' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Admin\AppData\Local\Temp\RtmpEtGqpQ\downloaded_packages
library(Stat2Data)
## Warning: package 'Stat2Data' was built under R version 4.5.2
names(hsb2)
##  [1] "id"      "gender"  "race"    "ses"     "schtyp"  "prog"    "read"   
##  [8] "write"   "math"    "science" "socst"
dim(hsb2)
## [1] 200  11
summary(hsb2[, c("math", "science", "read", "write", "socst")])
##       math          science           read           write      
##  Min.   :33.00   Min.   :26.00   Min.   :28.00   Min.   :31.00  
##  1st Qu.:45.00   1st Qu.:44.00   1st Qu.:44.00   1st Qu.:45.75  
##  Median :52.00   Median :53.00   Median :50.00   Median :54.00  
##  Mean   :52.65   Mean   :51.85   Mean   :52.23   Mean   :52.77  
##  3rd Qu.:59.00   3rd Qu.:58.00   3rd Qu.:60.00   3rd Qu.:60.00  
##  Max.   :75.00   Max.   :74.00   Max.   :76.00   Max.   :67.00  
##      socst      
##  Min.   :26.00  
##  1st Qu.:46.00  
##  Median :52.00  
##  Mean   :52.41  
##  3rd Qu.:61.00  
##  Max.   :71.00

Including Plots

You can also embed plots, for example:

## Installing package into 'C:/Users/Admin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Admin\AppData\Local\Temp\RtmpEtGqpQ\downloaded_packages
## Warning: package 'ggplot2' was built under R version 4.5.3
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

plot(# read by gender
ggplot(hsb2, aes(gender, read)) +
  geom_boxplot())
## Warning: Orientation is not uniquely specified when both the x and y aesthetics are
## continuous. Picking default orientation 'x'.
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

knitr::opts_chunk$set(echo = TRUE)
#2. Math by gender (OK: 2 groups)
t.test(math ~ gender, data = hsb2)
## 
##  Welch Two Sample t-test
## 
## data:  math by gender
## t = 0.41097, df = 187.58, p-value = 0.6816
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -2.092206  3.193325
## sample estimates:
## mean in group 0 mean in group 1 
##        52.94505        52.39450
# Program proportions (OK: categorical)
table(hsb2$prog)
## 
##   1   2   3 
##  45 105  50
chisq.test(table(hsb2$prog))
## 
##  Chi-squared test for given probabilities
## 
## data:  table(hsb2$prog)
## X-squared = 33.25, df = 2, p-value = 6.024e-08
summary(# Science across SES (3 groups → ANOVA, NOT var.test)
summary(aov(science ~ ses, data = hsb2))
 )
##      Length Class Mode
## [1,] 5      anova list
knitr::opts_chunk$set(echo = TRUE)
#3. 
# split read into high/low
hsb2$high_read <- ifelse(hsb2$read > mean(hsb2$read), 1, 0)

t.test(math ~ high_read, data = hsb2)
## 
##  Welch Two Sample t-test
## 
## data:  math by high_read
## t = -9.5731, df = 161.53, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -13.060545  -8.593675
## sample estimates:
## mean in group 0 mean in group 1 
##        48.04348        58.87059
knitr::opts_chunk$set(echo = TRUE)
#4.
# t-test: school type
t.test(math ~ schtyp, data = hsb2)
## 
##  Welch Two Sample t-test
## 
## data:  math by schtyp
## t = -1.448, df = 45.351, p-value = 0.1545
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -5.9908645  0.9789598
## sample estimates:
## mean in group 1 mean in group 2 
##        52.24405        54.75000
# ANOVA: SES effect on science
aov(science ~ ses, data = hsb2)
## Call:
##    aov(formula = science ~ ses, data = hsb2)
## 
## Terms:
##                       ses Residuals
## Sum of Squares   1560.739 17946.761
## Deg. of Freedom         1       198
## 
## Residual standard error: 9.520515
## Estimated effects may be unbalanced
# correlation
cor.test(hsb2$read, hsb2$write)
## 
##  Pearson's product-moment correlation
## 
## data:  hsb2$read and hsb2$write
## t = 10.465, df = 198, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4993831 0.6792753
## sample estimates:
##       cor 
## 0.5967765
knitr::opts_chunk$set(echo = TRUE)
#.5.
model <- lm(math ~ read, data = hsb2)
summary(model)
## 
## Call:
## lm(formula = math ~ read, data = hsb2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.1624  -5.1624  -0.4135   4.7775  16.4684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.03816    2.58945   8.125 4.76e-14 ***
## read         0.60515    0.04865  12.438  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.037 on 198 degrees of freedom
## Multiple R-squared:  0.4386, Adjusted R-squared:  0.4358 
## F-statistic: 154.7 on 1 and 198 DF,  p-value: < 2.2e-16
knitr::opts_chunk$set(echo = TRUE)
model0 <- lm(math ~ 1, data = hsb2)
anova(model0, model)
## Analysis of Variance Table
## 
## Model 1: math ~ 1
## Model 2: math ~ read
##   Res.Df   RSS Df Sum of Sq     F    Pr(>F)    
## 1    199 17466                                 
## 2    198  9805  1    7660.8 154.7 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
knitr::opts_chunk$set(echo = TRUE)
#6.
# CI for math mean
t.test(hsb2$math)
## 
##  One Sample t-test
## 
## data:  hsb2$math
## t = 79.47, df = 199, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  51.33868 53.95132
## sample estimates:
## mean of x 
##    52.645
# mean read & write
mean(hsb2$read)
## [1] 52.23
mean(hsb2$write)
## [1] 52.775
# covariance (joint idea)
cov(hsb2[, c("read","write")])
##            read    write
## read  105.12271 57.99673
## write  57.99673 89.84359
summary(#7.
# math ~ read relationship
lm(math ~ read, data = hsb2))
## 
## Call:
## lm(formula = math ~ read, data = hsb2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.1624  -5.1624  -0.4135   4.7775  16.4684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 21.03816    2.58945   8.125 4.76e-14 ***
## read         0.60515    0.04865  12.438  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.037 on 198 degrees of freedom
## Multiple R-squared:  0.4386, Adjusted R-squared:  0.4358 
## F-statistic: 154.7 on 1 and 198 DF,  p-value: < 2.2e-16
knitr::opts_chunk$set(echo = TRUE)
# gender difference in write
t.test(write ~ gender, data = hsb2)
## 
##  Welch Two Sample t-test
## 
## data:  write by gender
## t = -3.6564, df = 169.71, p-value = 0.0003409
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -7.499159 -2.240734
## sample estimates:
## mean in group 0 mean in group 1 
##        50.12088        54.99083
summary(aov(science ~ ses, data = hsb2))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## ses           1   1561  1560.7   17.22 4.94e-05 ***
## Residuals   198  17947    90.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
knitr::opts_chunk$set(echo = TRUE)
#.8

set.seed(123)

# Type I error
B <- 500
p <- replicate(B, {
  x <- rnorm(30, 50, 10)
  t.test(x, mu=50)$p.value
})
mean(p < 0.05)
## [1] 0.034
# Type II error
p2 <- replicate(B, {
  x <- rnorm(30, 55, 10)
  t.test(x, mu=50)$p.value
})
mean(p2 >= 0.05)
## [1] 0.286
knitr::opts_chunk$set(echo = TRUE)
#9
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("pwr")
## Installing package into 'C:/Users/Admin/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'pwr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Admin\AppData\Local\Temp\RtmpEtGqpQ\downloaded_packages
library(pwr)
## Warning: package 'pwr' was built under R version 4.5.3
pwr.t.test(d=0.5, n=100, sig.level=0.05, type="two.sample")
## 
##      Two-sample t test power calculation 
## 
##               n = 100
##               d = 0.5
##       sig.level = 0.05
##           power = 0.9404272
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
# interpretation
test <- t.test(math ~ gender, data = hsb2)

if(test$p.value < 0.05){
  "Reject H0: significant difference"
} else {
  "Fail to reject H0"
}
## [1] "Fail to reject H0"