AER (you will have to install it first,
but do not copy that line in the lab), and load the dataset as in
class.library(AER)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
data("CASchools")
read_la.library(tidyverse)
colnames(CASchools)
## [1] "district" "school" "county" "grades" "students"
## [6] "teachers" "calworks" "lunch" "computer" "expenditure"
## [11] "income" "english" "read" "math"
read_la <- CASchools[CASchools$county == "Los Angeles",]
ggplot(read_la, aes(x = read)) + geom_histogram(bins = 30) + geom_density()
CASchools.CASchools <- CASchools[!is.na(CASchools$computers),]
CASchools$computers[CASchools$computers==0] <- NA
CASchools$students[CASchools$students==0] <- NA
CASchools$computers_per_student <- CASchools$computers / CASchools$students
CASchools$computer_more_than_10 <- ifelse(CASchools$computer_ratio > 0.1, 1, 0)
read score: the
distribution of conditional on less than 10% of the students having a
computer, and the distribution conditional on more than 10% of students
having a computer. Make sure to define the labels.library(ggplot2)
read_less_than_10 <- CASchools[CASchools$computer_more_than_10 == 0,]
read_more_than_10 <- CASchools[CASchools$computer_more_than_10 == 1,]
ggplot() +
geom_density(data = read_less_than_10, aes(x = read), color = "red", fill = "red", alpha = 0.5) +
geom_density(data = read_more_than_10, aes(x = read), color = "blue", fill = "blue", alpha = 0.5) +
xlab("Read Score") +
ylab("Density") +
ggtitle("Reading Score Distribution") +
scale_x_continuous(limits = c(0,100)) +
scale_fill_manual(name = "Computer Ratio", labels = c("< 10%", "> 10%"), values = c("red", "blue"))
read_population <- CASchools[,]
geom_density(data = read_population, aes(x = read), color = "black", fill = "black", alpha = 0.5)
## mapping: x = ~read
## geom_density: na.rm = FALSE, orientation = NA, outline.type = upper
## stat_density: na.rm = FALSE, orientation = NA
## position_identity
Are the distributions different? How? yes one is on population and one is on reading scores
Make a table showing how the mean value of the read score differs by the share of students with a computer (more or less than 10%).