ex1

# Install and load the wooldridge package
install.packages("wooldridge")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)

library(wooldridge)

# Load the BWGHT dataset
BWGHT <- bwght

# (i) Number of women in the sample and those who report smoking during pregnancy
num_women <- nrow(BWGHT)
num_smokers <- sum(BWGHT$smoke == 1, na.rm = TRUE)  # Assuming smoke is coded as 1 for smokers
cat("Number of women in the sample:", num_women, "\n")

## Number of women in the sample: 1388

cat("Number of women who report smoking during pregnancy:", num_smokers, "\n")

## Number of women who report smoking during pregnancy: 0

# (ii) Average number of cigarettes smoked per day
avg_cigarettes <- mean(BWGHT$cigs, na.rm = TRUE)  # Assuming 'cigs' is the number of cigarettes
cat("Average number of cigarettes smoked per day:", avg_cigarettes, "\n")

## Average number of cigarettes smoked per day: 2.087176

# Check if avg_cigarettes is not NA before proceeding
if (!is.na(avg_cigarettes)) {
  # (iii) Average number of cigarettes smoked per day among women who smoked during pregnancy
  avg_smokers <- mean(BWGHT$cigs[BWGHT$smoke == 1], na.rm = TRUE)
  cat("Average number of cigarettes smoked per day among smokers:", avg_smokers, "\n")
  
  # Compare the two averages
  if (!is.na(avg_smokers) && avg_smokers > avg_cigarettes) {
    cat("Average cigarettes among smokers is higher than the overall average.\n")
  } else {
    cat("Average cigarettes among smokers is lower than or equal to the overall average.\n")
  }
} else {
  cat("The average number of cigarettes smoked per day is NA.\n")
}

## Average number of cigarettes smoked per day among smokers: NaN 
## Average cigarettes among smokers is lower than or equal to the overall average.

# (iv) Average father education (fatheduc)
avg_fatheduc <- mean(BWGHT$fatheduc, na.rm = TRUE)
cat("Average father's education (fatheduc):", avg_fatheduc, "\n")

## Average father's education (fatheduc): 13.18624

cat("Number of observations used to compute this average:", sum(!is.na(BWGHT$fatheduc)), "\n")

## Number of observations used to compute this average: 1192

# (v) Average family income and its standard deviation
avg_income <- mean(BWGHT$faminc, na.rm = TRUE)  # Assuming 'faminc' is the family income
sd_income <- sd(BWGHT$faminc, na.rm = TRUE)
cat("Average family income:", avg_income, "\n")

## Average family income: 29.02666

cat("Standard deviation of family income:", sd_income, "\n")

## Standard deviation of family income: 18.73928

# Moving on to the MEAP01 dataset (C3)
# Load the MEAP01 dataset
MEAP01 <- meap01

# (i) Largest and smallest values of math4
largest_math4 <- max(MEAP01$math4, na.rm = TRUE)
smallest_math4 <- min(MEAP01$math4, na.rm = TRUE)
cat("Largest value of math4:", largest_math4, "\n")

## Largest value of math4: 100

cat("Smallest value of math4:", smallest_math4, "\n")

## Smallest value of math4: 0

# (ii) Number of schools with perfect pass rate
perfect_pass_rate <- sum(MEAP01$math4 == 100, na.rm = TRUE)
percent_perfect <- (perfect_pass_rate / nrow(MEAP01)) * 100
cat("Number of schools with perfect pass rate:", perfect_pass_rate, "\n")

## Number of schools with perfect pass rate: 38

cat("Percentage of schools with perfect pass rate:", percent_perfect, "%\n")

## Percentage of schools with perfect pass rate: 2.084476 %

# (iii) Schools with math pass rates of exactly 50%
schools_50_percent <- sum(MEAP01$math4 == 50, na.rm = TRUE)
cat("Number of schools with 50% pass rate:", schools_50_percent, "\n")

## Number of schools with 50% pass rate: 17

# (iv) Average pass rates for math and reading
avg_math_pass_rate <- mean(MEAP01$math4, na.rm = TRUE)
avg_read_pass_rate <- mean(MEAP01$read4, na.rm = TRUE)
cat("Average math pass rate:", avg_math_pass_rate, "\n")

## Average math pass rate: 71.909

cat("Average reading pass rate:", avg_read_pass_rate, "\n")

## Average reading pass rate: 60.06188

# (v) Correlation between math and read4
correlation <- cor(MEAP01$math4, MEAP01$read4, use = "complete.obs")
cat("Correlation between math and reading scores:", correlation, "\n")

## Correlation between math and reading scores: 0.8427281

# (vi) Average expenditure per pupil (exppp)
avg_exppp <- mean(MEAP01$exppp, na.rm = TRUE)
sd_exppp <- sd(MEAP01$exppp, na.rm = TRUE)
cat("Average expenditure per pupil:", avg_exppp, "\n")

## Average expenditure per pupil: 5194.865

cat("Standard deviation of expenditure per pupil:", sd_exppp, "\n")

## Standard deviation of expenditure per pupil: 1091.89

# (vii) Percentage difference between School A and School B spending
school_a_spending <- 6000
school_b_spending <- 5500
percentage_difference <- ((school_a_spending - school_b_spending) / school_b_spending) * 100
log_difference <- 100 * (log(school_a_spending) - log(school_b_spending))
cat("Percentage difference in spending:", percentage_difference, "%\n")

## Percentage difference in spending: 9.090909 %

cat("Approximate percentage difference based on log:", log_difference, "%\n")

## Approximate percentage difference based on log: 8.701138 %

ex1

Maralgua /112035146/

2024-09-24