# We retrieve data(Vacab) from the package "carData"
library(car)
## Loading required package: carData
library(carData)
data(Vocab)
knitr::kable(head(Vocab))
year sex education vocabulary
19740001 1974 Male 14 9
19740002 1974 Male 16 9
19740003 1974 Female 10 9
19740004 1974 Female 10 5
19740005 1974 Female 12 8
19740006 1974 Male 16 8
library(tibble)
# We save the dta as tibble
dta <- as_tibble(carData::Vocab)
# We learn the names of the variables
names(dta)
## [1] "year"       "sex"        "education"  "vocabulary"
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lattice)
# We choose the variable "sex=male" and display the first 6 rows 
dta %>% dplyr::filter(sex == "Male") %>% head
## # A tibble: 6 x 4
##    year sex   education vocabulary
##   <dbl> <fct>     <dbl>      <dbl>
## 1  1974 Male         14          9
## 2  1974 Male         16          9
## 3  1974 Male         16          8
## 4  1974 Male         17          9
## 5  1974 Male         10          5
## 6  1974 Male          6          5
# We choose the variable "sex=female" and display the first 6 rows
dta %>% dplyr::filter(sex == "Female") %>% head
## # A tibble: 6 x 4
##    year sex    education vocabulary
##   <dbl> <fct>      <dbl>      <dbl>
## 1  1974 Female        10          9
## 2  1974 Female        10          5
## 3  1974 Female        12          8
## 4  1974 Female        12          3
## 5  1974 Female        11          5
## 6  1974 Female        11          6
# I separte the data to Male to Female
dta_m <- dta %>% dplyr::filter(sex == "Male")  
dta_f <- dta %>% dplyr::filter(sex == "Female") 

# We visualize raw data of education and vocabulary with xyplot and grouping on years.

lattice::xyplot(vocabulary ~ education, groups=year, data=dta_m, type=c("p","r"), auto.key=list(columns=6))

# 1. I create new variables by the interval of ten years. 
# 2. I group by year and summarize the correlation.
# 3. Conclusion: For males, the correlation between education and vocabulary is positive and it gets weaker over the years. In Q1(year before 1980), the correlation is 0.57 while in Q4, the correlation is 0.46.

dta_m %>% mutate(year = case_when(
  year < 1980 ~ "Q1",
  year >= 1981 & year < 1990 ~ "Q2",
  year >= 1991 & year < 2000 ~ "Q3",
  year >= 2001 & year < 2010 ~ "Q4",year >= 2011 ~ "Q5")) %>%
 group_by(year) %>%
  summarise(cor = cor(education, vocabulary))
## # A tibble: 6 x 2
##   year    cor
##   <chr> <dbl>
## 1 Q1    0.571
## 2 Q2    0.528
## 3 Q3    0.484
## 4 Q4    0.485
## 5 Q5    0.468
## 6 <NA>  0.492
# I do the same thing to females
lattice::xyplot(vocabulary ~ education, groups=year, data=dta_f, type=c("p","r"), auto.key=list(columns=6))

dta_f %>% mutate(year = case_when(
  year < 1980 ~ "Q1",
  year >= 1981 & year < 1990 ~ "Q2",
  year >= 1991 & year < 2000 ~ "Q3",
  year >= 2001 & year < 2010 ~ "Q4",year >= 2011 ~ "Q5")) %>%
 group_by(year) %>%
  summarise(cor = cor(education, vocabulary))
## # A tibble: 6 x 2
##   year    cor
##   <chr> <dbl>
## 1 Q1    0.498
## 2 Q2    0.496
## 3 Q3    0.481
## 4 Q4    0.401
## 5 Q5    0.431
## 6 <NA>  0.472
# Conclusion: For females, the correlation between education and vocabulary is positive and it gets slightly weaker over the years. In Q1(year before 1980), the correlation is 0.49 while in Q4(year between 2001~2010), the correlation is 0.40. But the correlation gets stronger again in the year after 2010.