# Question 1
library(wooldridge)
data("bwght")

# (i) Total women and women who smoked during pregnancy
total_women <- nrow(bwght)
women_smoked <- sum(bwght$smoke == 1)

total_women
## [1] 1388
women_smoked
## [1] 0
# (ii) Average number of cigarettes smoked per day
avg_cigarettes <- mean(bwght$cigs, na.rm = TRUE)
avg_cigarettes
## [1] 2.087176
# (iii) Average number of cigarettes smoked per day among smokers
avg_cigs_smokers <- mean(bwght$cigs[bwght$smoke == 1], na.rm = TRUE)
avg_cigs_smokers
## [1] NaN
# (iv) Average father's education
avg_fatheduc <- mean(bwght$fatheduc, na.rm = TRUE)
num_obs_fatheduc <- sum(!is.na(bwght$fatheduc))

avg_fatheduc
## [1] 13.18624
num_obs_fatheduc
## [1] 1192
# (v) Average family income and standard deviation
avg_faminc <- mean(bwght$faminc, na.rm = TRUE)
sd_faminc <- sd(bwght$faminc, na.rm = TRUE)

avg_faminc
## [1] 29.02666
sd_faminc
## [1] 18.73928
# Question 2
library(wooldridge)
data("meap01")

# (i) Largest and smallest values of math4
min_math4 <- min(meap01$math4, na.rm = TRUE)
max_math4 <- max(meap01$math4, na.rm = TRUE)
min_math4
## [1] 0
max_math4
## [1] 100
# (ii) Schools with a perfect pass rate in math
perfect_pass_schools <- sum(meap01$math4 == 100, na.rm = TRUE)
total_schools <- nrow(meap01)
percentage_perfect <- (perfect_pass_schools / total_schools) * 100
perfect_pass_schools
## [1] 38
percentage_perfect
## [1] 2.084476
# (iii) Schools with a 50% pass rate in math
schools_50_pass <- sum(meap01$math4 == 50, na.rm = TRUE)
schools_50_pass
## [1] 17
# (iv) Average pass rates for math and reading
avg_math4 <- mean(meap01$math4, na.rm = TRUE)
avg_read4 <- mean(meap01$read4, na.rm = TRUE)
avg_math4
## [1] 71.909
avg_read4
## [1] 60.06188
# (v) Correlation between math4 and read4
corr_math_read <- cor(meap01$math4, meap01$read4, use = "complete.obs")
corr_math_read
## [1] 0.8427281
# (vi) Average and standard deviation of expenditure per pupil
avg_exppp <- mean(meap01$exppp, na.rm = TRUE)
sd_exppp <- sd(meap01$exppp, na.rm = TRUE)
avg_exppp
## [1] 5194.865
sd_exppp
## [1] 1091.89
# (vii) Percentage difference between School A and School B spending
schoolA_spending <- 6000
schoolB_spending <- 5500
percentage_difference <- ((schoolA_spending - schoolB_spending) / schoolB_spending) * 100
log_diff_approx <- 100 * (log(schoolA_spending) - log(schoolB_spending))
percentage_difference
## [1] 9.090909
log_diff_approx
## [1] 8.701138