FI hw1

# 7th edition on CRAN
install.packages("wooldridge")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)

# 7th edition
remotes::install_github("JustinMShea/wooldridge")

## Downloading GitHub repo JustinMShea/wooldridge@HEAD

## Running `R CMD build`...

## * checking for file ‘/tmp/RtmpBFg4Fp/remotes44c4f34fc9f/JustinMShea-wooldridge-8d6e57a/DESCRIPTION’ ... OK
## * preparing ‘wooldridge’:
## * checking DESCRIPTION meta-information ... OK
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * building ‘wooldridge_1.4-3.tar.gz’

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)

?wage1

## No documentation for 'wage1' in specified packages and libraries:
## you could try '??wage1'

library(wooldridge)
data("wage1")
wageModel <- lm(lwage ~ educ + exper + tenure, data = wage1)
summary(wageModel)

## 
## Call:
## lm(formula = lwage ~ educ + exper + tenure, data = wage1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.05802 -0.29645 -0.03265  0.28788  1.42809 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.284360   0.104190   2.729  0.00656 ** 
## educ        0.092029   0.007330  12.555  < 2e-16 ***
## exper       0.004121   0.001723   2.391  0.01714 *  
## tenure      0.022067   0.003094   7.133 3.29e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4409 on 522 degrees of freedom
## Multiple R-squared:  0.316,  Adjusted R-squared:  0.3121 
## F-statistic: 80.39 on 3 and 522 DF,  p-value: < 2.2e-16

#C2 BWGHT dataset
data("bwght")
head(bwght)

##   faminc cigtax cigprice bwght fatheduc motheduc parity male white cigs
## 1   13.5   16.5    122.3   109       12       12      1    1     1    0
## 2    7.5   16.5    122.3   133        6       12      2    1     0    0
## 3    0.5   16.5    122.3   129       NA       12      2    0     0    0
## 4   15.5   16.5    122.3   126       12       12      2    1     0    0
## 5   27.5   16.5    122.3   134       14       12      2    1     1    0
## 6    7.5   16.5    122.3   118       12       14      6    1     0    0
##     lbwght bwghtlbs packs    lfaminc
## 1 4.691348   6.8125     0  2.6026897
## 2 4.890349   8.3125     0  2.0149031
## 3 4.859812   8.0625     0 -0.6931472
## 4 4.836282   7.8750     0  2.7408400
## 5 4.897840   8.3750     0  3.3141861
## 6 4.770685   7.3750     0  2.0149031

# 1 Number of women in the sample
n <- nrow(bwght)
n

## [1] 1388

# 2 Number of women who smoked during pregnancy (cigs > 0)
smokers <- sum(bwght$cigs > 0)
smokers

## [1] 212

# 3 Average number of cigarettes smoked per day
avg_cigs <- mean(bwght$cigs, na.rm = TRUE)
avg_cigs

## [1] 2.087176

# 4 Average number of cigarettes smoked among smokers
avg_cigs_smokers <- mean(bwght$cigs[bwght$cigs > 0], na.rm = TRUE)
avg_cigs_smokers

## [1] 13.66509

# 5 Average father's education
avg_fatheduc <- mean(bwght$fatheduc, na.rm = TRUE)
avg_fatheduc

## [1] 13.18624

# 6 Number of observations used to compute fatheduc
n_fatheduc <- sum(!is.na(bwght$fatheduc))
n_fatheduc

## [1] 1192

# 7 Average family income
avg_faminc <- mean(bwght$faminc, na.rm = TRUE)
avg_faminc

## [1] 29.02666

# 8 Standard deviation of family income
sd_faminc <- sd(bwght$faminc, na.rm = TRUE)
sd_faminc

## [1] 18.73928

#C3 MEAP01 dataset
data("meap01")
head(meap01)

##   dcode bcode math4 read4 lunch enroll  expend    exppp  lenroll  lexpend
## 1  1010  4937  83.3  77.8 40.60    468 2747475 5870.673 6.148468 14.82619
## 2  2070   597  90.3  82.3 27.10    679 1505772 2217.632 6.520621 14.22482
## 3  2080  4860  61.9  71.4 41.75    400 2121871 5304.678 5.991465 14.56781
## 4  3010   790  85.7  60.0 12.75    251 1211034 4824.836 5.525453 14.00698
## 5  3010  1403  77.3  59.1 17.08    439 1913501 4358.772 6.084499 14.46445
## 6  3010  4056  85.2  67.0 23.17    561 2637483 4701.396 6.329721 14.78534
##     lexppp
## 1 8.677725
## 2 7.704195
## 3 8.576344
## 4 8.481532
## 5 8.379946
## 6 8.455615

# 1 Largest and smallest values of math4
max_math4 <- max(meap01$math4, na.rm = TRUE)
min_math4 <- min(meap01$math4, na.rm = TRUE)
max_math4

## [1] 100

min_math4

## [1] 0

# 2 Number of schools with perfect pass rate (math4 = 100)
perfect_pass <- sum(meap01$math4 == 100, na.rm = TRUE)
perfect_pass

## [1] 38

# 3 Percentage of total sample with perfect pass rate
percent_perfect_pass <- (perfect_pass / nrow(meap01)) * 100
percent_perfect_pass

## [1] 2.084476

# 4 Number of schools with math pass rates of exactly 50%
pass_rate_50 <- sum(meap01$math4 == 50, na.rm = TRUE)
pass_rate_50

## [1] 17

# 5 Average math pass rate
avg_math4 <- mean(meap01$math4, na.rm = TRUE)
avg_math4

## [1] 71.909

# 6 Average reading pass rate
avg_read4 <- mean(meap01$read4, na.rm = TRUE)
avg_read4

## [1] 60.06188

# 7 Correlation between math4 and read4
cor_math_read <- cor(meap01$math4, meap01$read4, use = "complete.obs")
cor_math_read

## [1] 0.8427281

# 8 Average expenditure per pupil
avg_exppp <- mean(meap01$exppp, na.rm = TRUE)
avg_exppp

## [1] 5194.865

# 9 Standard deviation of expenditure per pupil
sd_exppp <- sd(meap01$exppp, na.rm = TRUE)
sd_exppp

## [1] 1091.89

# 10 Percentage difference in spending between School A and School B
log_diff <- 100 * (log(6000) - log(5500))
log_diff

## [1] 8.701138

FI hw1

2024-09-22