# Install and load the wooldridge package
install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(wooldridge)
# Load the BWGHT dataset
BWGHT <- bwght
# (i) Number of women in the sample and those who report smoking during pregnancy
num_women <- nrow(BWGHT)
num_smokers <- sum(BWGHT$smoke == 1, na.rm = TRUE) # Assuming smoke is coded as 1 for smokers
cat("Number of women in the sample:", num_women, "\n")
## Number of women in the sample: 1388
cat("Number of women who report smoking during pregnancy:", num_smokers, "\n")
## Number of women who report smoking during pregnancy: 0
# (ii) Average number of cigarettes smoked per day
avg_cigarettes <- mean(BWGHT$cigs, na.rm = TRUE) # Assuming 'cigs' is the number of cigarettes
cat("Average number of cigarettes smoked per day:", avg_cigarettes, "\n")
## Average number of cigarettes smoked per day: 2.087176
# Check if avg_cigarettes is not NA before proceeding
if (!is.na(avg_cigarettes)) {
# (iii) Average number of cigarettes smoked per day among women who smoked during pregnancy
avg_smokers <- mean(BWGHT$cigs[BWGHT$smoke == 1], na.rm = TRUE)
cat("Average number of cigarettes smoked per day among smokers:", avg_smokers, "\n")
# Compare the two averages
if (!is.na(avg_smokers) && avg_smokers > avg_cigarettes) {
cat("Average cigarettes among smokers is higher than the overall average.\n")
} else {
cat("Average cigarettes among smokers is lower than or equal to the overall average.\n")
}
} else {
cat("The average number of cigarettes smoked per day is NA.\n")
}
## Average number of cigarettes smoked per day among smokers: NaN
## Average cigarettes among smokers is lower than or equal to the overall average.
# (iv) Average father education (fatheduc)
avg_fatheduc <- mean(BWGHT$fatheduc, na.rm = TRUE)
cat("Average father's education (fatheduc):", avg_fatheduc, "\n")
## Average father's education (fatheduc): 13.18624
cat("Number of observations used to compute this average:", sum(!is.na(BWGHT$fatheduc)), "\n")
## Number of observations used to compute this average: 1192
# (v) Average family income and its standard deviation
avg_income <- mean(BWGHT$faminc, na.rm = TRUE) # Assuming 'faminc' is the family income
sd_income <- sd(BWGHT$faminc, na.rm = TRUE)
cat("Average family income:", avg_income, "\n")
## Average family income: 29.02666
cat("Standard deviation of family income:", sd_income, "\n")
## Standard deviation of family income: 18.73928
# Moving on to the MEAP01 dataset (C3)
# Load the MEAP01 dataset
MEAP01 <- meap01
# (i) Largest and smallest values of math4
largest_math4 <- max(MEAP01$math4, na.rm = TRUE)
smallest_math4 <- min(MEAP01$math4, na.rm = TRUE)
cat("Largest value of math4:", largest_math4, "\n")
## Largest value of math4: 100
cat("Smallest value of math4:", smallest_math4, "\n")
## Smallest value of math4: 0
# (ii) Number of schools with perfect pass rate
perfect_pass_rate <- sum(MEAP01$math4 == 100, na.rm = TRUE)
percent_perfect <- (perfect_pass_rate / nrow(MEAP01)) * 100
cat("Number of schools with perfect pass rate:", perfect_pass_rate, "\n")
## Number of schools with perfect pass rate: 38
cat("Percentage of schools with perfect pass rate:", percent_perfect, "%\n")
## Percentage of schools with perfect pass rate: 2.084476 %
# (iii) Schools with math pass rates of exactly 50%
schools_50_percent <- sum(MEAP01$math4 == 50, na.rm = TRUE)
cat("Number of schools with 50% pass rate:", schools_50_percent, "\n")
## Number of schools with 50% pass rate: 17
# (iv) Average pass rates for math and reading
avg_math_pass_rate <- mean(MEAP01$math4, na.rm = TRUE)
avg_read_pass_rate <- mean(MEAP01$read4, na.rm = TRUE)
cat("Average math pass rate:", avg_math_pass_rate, "\n")
## Average math pass rate: 71.909
cat("Average reading pass rate:", avg_read_pass_rate, "\n")
## Average reading pass rate: 60.06188
# (v) Correlation between math and read4
correlation <- cor(MEAP01$math4, MEAP01$read4, use = "complete.obs")
cat("Correlation between math and reading scores:", correlation, "\n")
## Correlation between math and reading scores: 0.8427281
# (vi) Average expenditure per pupil (exppp)
avg_exppp <- mean(MEAP01$exppp, na.rm = TRUE)
sd_exppp <- sd(MEAP01$exppp, na.rm = TRUE)
cat("Average expenditure per pupil:", avg_exppp, "\n")
## Average expenditure per pupil: 5194.865
cat("Standard deviation of expenditure per pupil:", sd_exppp, "\n")
## Standard deviation of expenditure per pupil: 1091.89
# (vii) Percentage difference between School A and School B spending
school_a_spending <- 6000
school_b_spending <- 5500
percentage_difference <- ((school_a_spending - school_b_spending) / school_b_spending) * 100
log_difference <- 100 * (log(school_a_spending) - log(school_b_spending))
cat("Percentage difference in spending:", percentage_difference, "%\n")
## Percentage difference in spending: 9.090909 %
cat("Approximate percentage difference based on log:", log_difference, "%\n")
## Approximate percentage difference based on log: 8.701138 %