#load cardata
library(car)
## Loading required package: carData
cardta <- carData::Vocab
#see data


dim(cardta)
## [1] 30351     4
head(cardta,n=10)
##          year    sex education vocabulary
## 19740001 1974   Male        14          9
## 19740002 1974   Male        16          9
## 19740003 1974 Female        10          9
## 19740004 1974 Female        10          5
## 19740005 1974 Female        12          8
## 19740006 1974   Male        16          8
## 19740007 1974   Male        17          9
## 19740008 1974   Male        10          5
## 19740009 1974 Female        12          3
## 19740010 1974 Female        11          5
str(cardta)
## 'data.frame':    30351 obs. of  4 variables:
##  $ year      : num  1974 1974 1974 1974 1974 ...
##  $ sex       : Factor w/ 2 levels "Female","Male": 2 2 1 1 1 2 2 2 1 1 ...
##  $ education : num  14 16 10 10 12 16 17 10 12 11 ...
##  $ vocabulary: num  9 9 9 5 8 8 9 5 3 5 ...
##  - attr(*, "na.action")= 'omit' Named int  1 2 3 4 5 6 7 8 9 10 ...
##   ..- attr(*, "names")= chr  "19720001" "19720002" "19720003" "19720004" ...
#filter the data with female and male
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
cardtaF <- cardta %>% dplyr::filter(sex == "Female")
cardtaM <- cardta %>% dplyr::filter(sex == "Male") 
#use xyplot display regression line of education and vocabulary by female
library(lattice)
xyplot(education ~ vocabulary, groups=year, data=cardtaF, type=c("g","r"), auto.key=list(columns=5))

xyplot(vocabulary ~ education, groups=year, data=cardtaF, type=c("g","r"), auto.key=list(columns=5))

#for Female,education can prdict vocabulary becomes smaller as time goes by.(we can see regression coef.)
lapply(split(cardtaF, cardtaF$year), function(x) coef(lm(x$vocabulary ~ x$education)))
## $`1974`
## (Intercept) x$education 
##   1.5652579   0.3816095 
## 
## $`1976`
## (Intercept) x$education 
##   1.7021281   0.3824002 
## 
## $`1978`
## (Intercept) x$education 
##   1.3006416   0.4002707 
## 
## $`1982`
## (Intercept) x$education 
##   0.9829602   0.3949758 
## 
## $`1984`
## (Intercept) x$education 
##   1.4536872   0.3728698 
## 
## $`1987`
## (Intercept) x$education 
##   0.9647931   0.3843508 
## 
## $`1988`
## (Intercept) x$education 
##   1.1634561   0.3763999 
## 
## $`1989`
## (Intercept) x$education 
##   1.0682600   0.3863606 
## 
## $`1990`
## (Intercept) x$education 
##   0.4594812   0.4346902 
## 
## $`1991`
## (Intercept) x$education 
##   1.1543766   0.3875821 
## 
## $`1993`
## (Intercept) x$education 
##   1.7388287   0.3286325 
## 
## $`1994`
## (Intercept) x$education 
##   1.6453365   0.3422146 
## 
## $`1996`
## (Intercept) x$education 
##   1.1482811   0.3727178 
## 
## $`1998`
## (Intercept) x$education 
##   1.4472751   0.3592843 
## 
## $`2000`
## (Intercept) x$education 
##   1.9276040   0.3155532 
## 
## $`2004`
## (Intercept) x$education 
##    2.104150    0.304056 
## 
## $`2006`
## (Intercept) x$education 
##   2.7777171   0.2535376 
## 
## $`2008`
## (Intercept) x$education 
##   2.6074315   0.2553971 
## 
## $`2010`
## (Intercept) x$education 
##   1.3520300   0.3468821 
## 
## $`2012`
## (Intercept) x$education 
##   1.7535298   0.3080832 
## 
## $`2014`
## (Intercept) x$education 
##   2.3445239   0.2663464 
## 
## $`2016`
## (Intercept) x$education 
##   2.0055919   0.2928955
#use xyplot display regression line of education and vocabulary by male
library(lattice)
xyplot(education ~ vocabulary, groups=year, data=cardtaM, type=c("g","r"), auto.key=list(columns=5))

library(lattice)
xyplot(vocabulary ~ education, groups=year, data=cardtaM, type=c("g","r"), auto.key=list(columns=5))

#for Male,education can prdict vocabulary becomes smaller as time goes by.(we can see regression coef.)
lapply(split(cardtaM, cardtaM$year), function(x) coef(lm(x$vocabulary ~ x$education)))
## $`1974`
## (Intercept) x$education 
##   1.5318434   0.3713183 
## 
## $`1976`
## (Intercept) x$education 
##   1.6342960   0.3555403 
## 
## $`1978`
## (Intercept) x$education 
##   0.9762161   0.3963762 
## 
## $`1982`
## (Intercept) x$education 
##   0.9730291   0.3832637 
## 
## $`1984`
## (Intercept) x$education 
##    1.678465    0.337124 
## 
## $`1987`
## (Intercept) x$education 
##   0.8103651   0.3818373 
## 
## $`1988`
## (Intercept) x$education 
##   1.0459936   0.3592442 
## 
## $`1989`
## (Intercept) x$education 
##   1.0596176   0.3708525 
## 
## $`1990`
## (Intercept) x$education 
##   1.7000935   0.3377029 
## 
## $`1991`
## (Intercept) x$education 
##   1.2504604   0.3683962 
## 
## $`1993`
## (Intercept) x$education 
##   1.6384884   0.3221049 
## 
## $`1994`
## (Intercept) x$education 
##   1.8684770   0.3146151 
## 
## $`1996`
## (Intercept) x$education 
##   0.8221711   0.3770325 
## 
## $`1998`
## (Intercept) x$education 
##   1.5199973   0.3314754 
## 
## $`2000`
## (Intercept) x$education 
##   1.1203888   0.3558918 
## 
## $`2004`
## (Intercept) x$education 
##   1.4259424   0.3411153 
## 
## $`2006`
## (Intercept) x$education 
##   2.1383454   0.2952926 
## 
## $`2008`
## (Intercept) x$education 
##   1.4212286   0.3277987 
## 
## $`2010`
## (Intercept) x$education 
##   1.7996389   0.3135749 
## 
## $`2012`
## (Intercept) x$education 
##   1.7303105   0.3061534 
## 
## $`2014`
## (Intercept) x$education 
##   1.4804789   0.3262112 
## 
## $`2016`
## (Intercept) x$education 
##   1.8562367   0.3031146

#regression coef. not equal to correlation coef.