install.packages("wooldridge")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

library(wooldridge)
data<-wooldridge::bwght

i. How many women are in the sample, and how many report smoking during pregnancy?

women <- nrow(data)
women

## [1] 1388

install.packages("dplyr")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

nocigaretes <-data %>% filter(cigs==0) 
nonsmoking <- nrow(nocigaretes)
nonsmoking

## [1] 1176

ii. What is the average number of cigarettes smoked per day? Is the average a good measure of the “typical” woman in this case? Explain.

average<- mean(data$cigs)
average

## [1] 2.087176

histogram <- hist(data$cigs)

iii.Among women who smoked during pregnancy, what is the average number of cigarettes smoked per day? How does this compare with your answer from part (ii), and why?

av_cigar <- data %>% filter(cigs != 0)
mean(av_cigar$cigs)

## [1] 13.66509

Find the average of fatheduc in the sample. Why are only 1,192 observations used to compute this average?

av_feduc <- mean(data$fatheduc, na.rm= TRUE)
av_feduc

## [1] 13.18624

na_feduc <- sum(is.na(data$fatheduc))
na_feduc

## [1] 196

v. Report the average family income and its standard deviation in dollars.

av_faminc <- mean(data$faminc)
std_faminc <- sd(data$faminc)
av_faminc

## [1] 29.02666

std_faminc

## [1] 18.73928

C3

data2 <- wooldridge::meap01

i.Find the largest and smallest values of math4. Does the range make sense?

max(data2$math4)

## [1] 100

min(data2$math4)

## [1] 0

ii. How many schools have a perfect pass rate on the math test? What percentage is this of the total sample?

pass100 <- data2 %>% filter(math4==100)
rate100 <- nrow(pass100)
perpass100 <- nrow(pass100)/nrow(data2)*100
rate100

## [1] 38

perpass100

## [1] 2.084476

iii.How many schools have math pass rates of exactly 50%?

pass50 <- data2 %>% filter(math4==50)
rate50 <- nrow(pass50)
rate50

## [1] 17

iv. Compare the average pass rates for the math and reading scores. Which test is harder to pass?

mean(data2$math4)

## [1] 71.909

mean(data2$read4)

## [1] 60.06188

v.Find the correlation between math4 and read4. What do you conclude?

cor(data2$math4,data2$read4)

## [1] 0.8427281

vi.The variable exppp is expenditure per pupil. Find the average of exppp along with its standard deviation. Would you say there is wide variation in per pupil spending?

mean(data2$exppp)

## [1] 5194.865

sd(data2$exppp)

## [1] 1091.89

his <- hist(data2$exppp)

his

## $breaks
##  [1]  1000  2000  3000  4000  5000  6000  7000  8000  9000 10000 11000 12000
## 
## $counts
##  [1]   6  20 144 696 594 262  75  17   4   2   3
## 
## $density
##  [1] 3.291278e-06 1.097093e-05 7.899067e-05 3.817883e-04 3.258365e-04
##  [6] 1.437191e-04 4.114098e-05 9.325288e-06 2.194185e-06 1.097093e-06
## [11] 1.645639e-06
## 
## $mids
##  [1]  1500  2500  3500  4500  5500  6500  7500  8500  9500 10500 11500
## 
## $xname
## [1] "data2$exppp"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

exercise 1 - KhulanB

2024-01-11

i. How many women are in the sample, and how many report smoking during pregnancy?

ii. What is the average number of cigarettes smoked per day? Is the average a good measure of the “typical” woman in this case? Explain.

iii.Among women who smoked during pregnancy, what is the average number of cigarettes smoked per day? How does this compare with your answer from part (ii), and why?

Find the average of fatheduc in the sample. Why are only 1,192 observations used to compute this average?

v. Report the average family income and its standard deviation in dollars.

C3

i.Find the largest and smallest values of math4. Does the range make sense?

ii. How many schools have a perfect pass rate on the math test? What percentage is this of the total sample?

iii.How many schools have math pass rates of exactly 50%?

iv. Compare the average pass rates for the math and reading scores. Which test is harder to pass?

v.Find the correlation between math4 and read4. What do you conclude?

vi.The variable exppp is expenditure per pupil. Find the average of exppp along with its standard deviation. Would you say there is wide variation in per pupil spending?