### C2
install.packages("wooldridge") # Install the wooldridge package
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(wooldridge) # Load the wooldridge package
data('bwght')
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
# (i) Number of women in the sample and how many reported smoking during pregnancy
# Total number of women
total_women <- nrow(bwght)
# Number of women who smoked during pregnancy
women_smoked <- sum(bwght$smoke == 1, na.rm = TRUE)
total_women
## [1] 1388
women_smoked
## [1] 0
# Average number of cigarettes smoked per day
avg_cigs <- mean(bwght$cigs, na.rm = TRUE)
avg_cigs
## [1] 2.087176
median_cigs <- median(bwght$cigs, na.rm = TRUE)
avg_cigs
## [1] 2.087176
median_cigs
## [1] 0
# Among women who smoked during pregnancy, the average number of cigarettes smoked per day
smokers_data <- subset(bwght, smoke == 1)
avg_cigs_smokers <- mean(smokers_data$cigs, na.rm = TRUE)
avg_cigs_smokers
## [1] NaN
# Average of fatheduc in the sample
avg_fatheduc <- mean(bwght$fatheduc, na.rm = TRUE)
valid_obs_fatheduc <- sum(!is.na(bwght$fatheduc))
avg_fatheduc
## [1] 13.18624
valid_obs_fatheduc
## [1] 1192
#Average family income and its standard deviation in dollars
avg_faminc <- mean(bwght$faminc, na.rm = TRUE)
sd_faminc <- sd(bwght$faminc, na.rm = TRUE)
avg_faminc
## [1] 29.02666
sd_faminc
## [1] 18.73928
### C3
#The largest and smallest values of math4
max_math4 <- max(meap01$math4, na.rm = TRUE)
min_math4 <- min(meap01$math4, na.rm = TRUE)
max_math4
## [1] 100
min_math4
## [1] 0
#Schools have a perfect pass rate on the math test
num_perfect_pass <- sum(meap01$math4 == 100, na.rm = TRUE)
percentage_perfect_pass <- (num_perfect_pass / nrow(meap01)) * 100
num_perfect_pass
## [1] 38
percentage_perfect_pass
## [1] 2.084476
#Schools have math pass rates of exactly 50%
num_50_percent_pass <- sum(meap01$math4 == 50, na.rm = TRUE)
num_50_percent_pass
## [1] 17
#Compare the average pass rates for the math and reading scores
avg_math_pass <- mean(meap01$math4, na.rm = TRUE)
avg_read_pass <- mean(meap01$read4, na.rm = TRUE)
avg_math_pass
## [1] 71.909
avg_read_pass
## [1] 60.06188
#The correlation between math4 and read4
correlation <- cor(meap01$math4, meap01$read4, use = "complete.obs")
correlation
## [1] 0.8427281
#The average and standard of exppp deviation
avg_exppp <- mean(meap01$exppp, na.rm = TRUE)
sd_exppp <- sd(meap01$exppp, na.rm = TRUE)
avg_exppp
## [1] 5194.865
sd_exppp
## [1] 1091.89
# the approximation percentage difference based on the difference in the natural logs
school_A <- 6000
school_B <- 5500
percent_diff <- ((school_A - school_B) / school_B) * 100
log_diff <- 100 * (log(school_A) - log(school_B))
percent_diff
## [1] 9.090909
log_diff
## [1] 8.701138