install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

#C2

library(wooldridge)
data("bwght")
#1. how many woman, how many smokers
total_women <- nrow(bwght)
smoking_women <- sum(bwght$cigs > 0)
cat("1.  Total women:", total_women, "\n")
## 1.  Total women: 1388
cat("    Women who report smoking during pregnancy:", smoking_women, "\n")
##     Women who report smoking during pregnancy: 212
#2.Average no of cig per day 
average_cigarettes <- mean(bwght$cigs)
cat("\n2. Average no of cig per day:", average_cigarettes, "\n")
## 
## 2. Average no of cig per day: 2.087176
#It is not a good measure because  of the distribution of data and skewness. 
#3. Among women who smoked during pregnancy, what is the average no of cig per day?
smoking_women <- bwght[bwght$cigs > 0, ]
average_cigs_smoked <- sum(smoking_women$cigs) / nrow(smoking_women)

cat("\n3. Average no of cig per day among smoking women:", average_cigs_smoked, "\n")
## 
## 3. Average no of cig per day among smoking women: 13.66509
#4. the average of fatheduc
average_fatheduc <- mean(bwght$fatheduc, na.rm = TRUE)

cat("\n4. Average of fatheduc:", average_fatheduc, "\n")
## 
## 4. Average of fatheduc: 13.18624
# Because of the NA, only 1192 observation used to compute this average
#5. Average family income and its standard deviation in dollars.
average_income <- mean(bwght$faminc)
std_dev_income <- sd(bwght$faminc)
cat("\n5. Average family income in dollars:", average_income, "\n")
## 
## 5. Average family income in dollars: 29.02666
cat("   Standard deviation of family income in dollars:", std_dev_income, "\n")
##    Standard deviation of family income in dollars: 18.73928

#C3

data("meap01")
#1. Find the largest and smallest values of math4.
max_math4 <- max(meap01$math4)
min_math4 <- min(meap01$math4)
cat("1. Largest value:", max_math4, "\n")
## 1. Largest value: 100
cat("   Smallest value:", min_math4, "\n")
##    Smallest value: 0
#2.How many schools have a perfect pass rate on the math test? 
perfect_pass_rate <- sum(meap01$math4 == 100)
total_schools <- nrow(meap01)
percentage_perfect_pass <- (perfect_pass_rate / total_schools) * 100
cat("\n2. Number of schools with a perfect pass rate on math test:", perfect_pass_rate, "\n")
## 
## 2. Number of schools with a perfect pass rate on math test: 38
# What percentage is this of the total sample?
cat("   Percentage of total sample with perfect pass rate:", percentage_perfect_pass, "%\n")
##    Percentage of total sample with perfect pass rate: 2.084476 %
#3. How many schools have math pass rates of exactly 50%?
schools_50_percent <- sum(meap01$math4 == 50)
cat("\n3. No of schools with math pass rates of exactly 50%:", schools_50_percent, "\n")
## 
## 3. No of schools with math pass rates of exactly 50%: 17
#4. Compare the average pass rates for the math and reading scores. 
average_math_pass_rate <- mean(meap01$math4)
average_reading_pass_rate <- mean(meap01$read4)
cat("\n4. Average pass rate for math test:", average_math_pass_rate, "\n")
## 
## 4. Average pass rate for math test: 71.909
cat("   Average pass rate for reading test:", average_reading_pass_rate, "\n")
##    Average pass rate for reading test: 60.06188
#5. Find the correlation between math4 and read4.
correlation_math_read <- cor(meap01$math4, meap01$read4)
cat("\n5. Correlation between math4 and read4:", correlation_math_read, "\n")
## 
## 5. Correlation between math4 and read4: 0.8427281
#6.Find the average of exppp along with its standard deviation. 
average_exppp <- mean(meap01$exppp)
std_dev_exppp <- sd(meap01$exppp)
cat("\n6. Average expenditure per pupil:", average_exppp, "\n")
## 
## 6. Average expenditure per pupil: 5194.865
cat("   Standard deviation of expenditure per pupil:", std_dev_exppp, "\n")
##    Standard deviation of expenditure per pupil: 1091.89
#7.Suppose School A spends $6,000 per student and School B spends $5,500 per student. By what percentage does School A's spending exceed School B's?
school_A_spending <- 6000
school_B_spending <- 5500
percentage_difference <- ((school_A_spending - school_B_spending) / school_B_spending) * 100
approx_percentage_difference <- 100 * (log(school_A_spending) - log(school_B_spending))
cat("\n7. Percentage difference between School A and School B spending:", percentage_difference, "%\n")
## 
## 7. Percentage difference between School A and School B spending: 9.090909 %
cat(" Approximate percentage difference based on natural logs:", approx_percentage_difference, "%\n")
##  Approximate percentage difference based on natural logs: 8.701138 %