install.packages("wooldridge")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(wooldridge)
data<-wooldridge::bwght

i. How many women are in the sample, and how many report smoking during pregnancy?

women <- nrow(data)
women
## [1] 1388
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
nocigaretes <-data %>% filter(cigs==0) 
nonsmoking <- nrow(nocigaretes)
nonsmoking
## [1] 1176

ii. What is the average number of cigarettes smoked per day? Is the average a good measure of the “typical” woman in this case? Explain.

average<- mean(data$cigs)
average
## [1] 2.087176
histogram <- hist(data$cigs)

iii.Among women who smoked during pregnancy, what is the average number of cigarettes smoked per day? How does this compare with your answer from part (ii), and why?

av_cigar <- data %>% filter(cigs != 0)
mean(av_cigar$cigs)
## [1] 13.66509

Find the average of fatheduc in the sample. Why are only 1,192 observations used to compute this average?

av_feduc <- mean(data$fatheduc, na.rm= TRUE)
av_feduc
## [1] 13.18624
na_feduc <- sum(is.na(data$fatheduc))
na_feduc
## [1] 196

v. Report the average family income and its standard deviation in dollars.

av_faminc <- mean(data$faminc)
std_faminc <- sd(data$faminc)
av_faminc
## [1] 29.02666
std_faminc
## [1] 18.73928

C3

data2 <- wooldridge::meap01

i.Find the largest and smallest values of math4. Does the range make sense?

max(data2$math4)
## [1] 100
min(data2$math4)
## [1] 0

ii. How many schools have a perfect pass rate on the math test? What percentage is this of the total sample?

pass100 <- data2 %>% filter(math4==100)
rate100 <- nrow(pass100)
perpass100 <- nrow(pass100)/nrow(data2)*100
rate100
## [1] 38
perpass100
## [1] 2.084476

iii.How many schools have math pass rates of exactly 50%?

pass50 <- data2 %>% filter(math4==50)
rate50 <- nrow(pass50)
rate50
## [1] 17

iv. Compare the average pass rates for the math and reading scores. Which test is harder to pass?

mean(data2$math4)
## [1] 71.909
mean(data2$read4)
## [1] 60.06188

v.Find the correlation between math4 and read4. What do you conclude?

cor(data2$math4,data2$read4)
## [1] 0.8427281

vi.The variable exppp is expenditure per pupil. Find the average of exppp along with its standard deviation. Would you say there is wide variation in per pupil spending?

mean(data2$exppp)
## [1] 5194.865
sd(data2$exppp)
## [1] 1091.89
his <- hist(data2$exppp)

his
## $breaks
##  [1]  1000  2000  3000  4000  5000  6000  7000  8000  9000 10000 11000 12000
## 
## $counts
##  [1]   6  20 144 696 594 262  75  17   4   2   3
## 
## $density
##  [1] 3.291278e-06 1.097093e-05 7.899067e-05 3.817883e-04 3.258365e-04
##  [6] 1.437191e-04 4.114098e-05 9.325288e-06 2.194185e-06 1.097093e-06
## [11] 1.645639e-06
## 
## $mids
##  [1]  1500  2500  3500  4500  5500  6500  7500  8500  9500 10500 11500
## 
## $xname
## [1] "data2$exppp"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"