install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(wooldridge)
data<-wooldridge::bwght
i. How many women are in the sample, and how many report smoking
during pregnancy?
women <- nrow(data)
women
## [1] 1388
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nocigaretes <-data %>% filter(cigs==0)
nonsmoking <- nrow(nocigaretes)
nonsmoking
## [1] 1176
ii. What is the average number of cigarettes smoked per day? Is the
average a good measure of the “typical” woman in this case?
Explain.
average<- mean(data$cigs)
average
## [1] 2.087176
histogram <- hist(data$cigs)

iii.Among women who smoked during pregnancy, what is the average
number of cigarettes smoked per day? How does this compare with your
answer from part (ii), and why?
av_cigar <- data %>% filter(cigs != 0)
mean(av_cigar$cigs)
## [1] 13.66509
Find the average of fatheduc in the sample. Why are only 1,192
observations used to compute this average?
av_feduc <- mean(data$fatheduc, na.rm= TRUE)
av_feduc
## [1] 13.18624
na_feduc <- sum(is.na(data$fatheduc))
na_feduc
## [1] 196
v. Report the average family income and its standard deviation in
dollars.
av_faminc <- mean(data$faminc)
std_faminc <- sd(data$faminc)
av_faminc
## [1] 29.02666
std_faminc
## [1] 18.73928
C3
data2 <- wooldridge::meap01
i.Find the largest and smallest values of math4. Does the range make
sense?
max(data2$math4)
## [1] 100
min(data2$math4)
## [1] 0
ii. How many schools have a perfect pass rate on the math test? What
percentage is this of the total sample?
pass100 <- data2 %>% filter(math4==100)
rate100 <- nrow(pass100)
perpass100 <- nrow(pass100)/nrow(data2)*100
rate100
## [1] 38
perpass100
## [1] 2.084476
iii.How many schools have math pass rates of exactly 50%?
pass50 <- data2 %>% filter(math4==50)
rate50 <- nrow(pass50)
rate50
## [1] 17
iv. Compare the average pass rates for the math and reading scores.
Which test is harder to pass?
mean(data2$math4)
## [1] 71.909
mean(data2$read4)
## [1] 60.06188
v.Find the correlation between math4 and read4. What do you
conclude?
cor(data2$math4,data2$read4)
## [1] 0.8427281
vi.The variable exppp is expenditure per pupil. Find the average of
exppp along with its standard deviation. Would you say there is wide
variation in per pupil spending?
mean(data2$exppp)
## [1] 5194.865
sd(data2$exppp)
## [1] 1091.89
his <- hist(data2$exppp)

his
## $breaks
## [1] 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000
##
## $counts
## [1] 6 20 144 696 594 262 75 17 4 2 3
##
## $density
## [1] 3.291278e-06 1.097093e-05 7.899067e-05 3.817883e-04 3.258365e-04
## [6] 1.437191e-04 4.114098e-05 9.325288e-06 2.194185e-06 1.097093e-06
## [11] 1.645639e-06
##
## $mids
## [1] 1500 2500 3500 4500 5500 6500 7500 8500 9500 10500 11500
##
## $xname
## [1] "data2$exppp"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"