library(AER)
## Loading required package: car
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.4.4
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tibble' was built under R version 3.4.4
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'readr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## Warning: package 'stringr' was built under R version 3.4.3
## Warning: package 'forcats' was built under R version 3.4.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
data("CPS1985")
df<-CPS1985
attach(df)
tab<-table(occupation)
tab
## occupation
## worker technical services office sales management
## 156 105 83 97 38 55
hist(log(wage),freq = FALSE)
lines(density(log(wage)),col=4)

tab <- table(sector)
barplot(tab)

pie(tab)

xtabs(~ gender + occupation, data = df)
## occupation
## gender worker technical services office sales management
## male 126 53 34 21 21 34
## female 30 52 49 76 17 21
plot(gender ~ occupation, data = df)

plot(wage ~ gender, data = df)

by(df$wage,df$gender,mean)
## df$gender: male
## [1] 9.994913
## --------------------------------------------------------
## df$gender: female
## [1] 7.878857
tapply(df$wage,df$gender,mean)
## male female
## 9.994913 7.878857
malewage <- subset(df, gender == "male")$wage
femalewage <- subset(df, gender == "female")$wage
length(malewage);length(femalewage)
## [1] 289
## [1] 245
qqplot(malewage, femalewage, xlim = range(df$wage), ylim = range(df$wage), xaxs = "i", yaxs = "i", xlab = "male", ylab = "female")
abline(a=0,b = 1)
