class: center, middle, inverse, title-slide .title[ # Advanced quantitative data analysis ] .subtitle[ ## Cross-sectional data analysis II ] .author[ ### Mengni Chen ] .institute[ ### Department of Sociology, University of Copenhagen ] --- <style type="text/css"> .remark-slide-content { font-size: 20px; padding: 20px 80px 20px 80px; } .remark-code, .remark-inline-code { background: #f0f0f0; } .remark-code { font-size: 14px; } </style> #Let's get ready ```r #install one new package install.packages("oaxaca") #for Blinder-Oaxaca Decomposition ``` ```r library(tidyverse) # see session 3. library(haven) # Import data, see session 3. library(janitor) #cleaning data library(estimatr) #robust standard error OLS library(texreg) #export regression result library(ggplot2) # Allows us to create nice figures library(oaxaca) # for Blinder-Oaxaca Decomposition ``` Prepare a dataset of age, sex, relationship status, life satisfaction - DV: life satisfaction (sat6) - IV: - age(age), - sex(sex_gen), - relationship status(relstat), - number of children (nkids), - health (hlt1) - religion (sd30) --- #Prepare data ```r wave1 <- read_dta("anchor1_50percent_Eng.dta") # sample size =6201 ``` ```r tabyl(wave1,age) #no missing tabyl(wave1,sex_gen) #no missing tabyl(wave1,relstat) # 34 cases reporting -7, needs cleaning tabyl(wave1, nkids) #no missing tabyl(wave1, hlt1) #10 cases reporting -1 or -2, needs cleaning tabyl(wave1,sd30) #28 cases reporting -1 or -2, needs cleaning ``` --- #Prepare data ```r wave1b <- wave1 %>% transmute( age, nkids, sat6=case_when(sat6<0 ~ as.numeric(NA), #specify when sat should be considered missing TRUE ~ as.numeric(sat6)), gender=as_factor(sex_gen) %>% fct_drop(), #treat sex_gen as categorical, and drop unused level relstat=as_factor(relstat), #treat relationship status as categorical relstat_new1=case_when( relstat=="-7 Incomplete data" ~ as.character(NA),#specify when it should be missing TRUE ~ as.character(relstat), ) %>% as_factor() %>% fct_drop(), #make relstat as a factor, and then drop unused levels in relstat_new1 health=case_when( hlt1<0 ~ NA,#specify when it should be missing TRUE ~ hlt1), religion=case_when( sd30<0 ~ as.character(NA), #specify when it should be missing sd30==7 ~ "No",#specify when it should be "no religion" sd30 %in% c(1:6) ~ "Yes" #specify when it should be "have religion" ) %>% as_factor()%>%fct_relevel("No", "Yes") #use "No" as reference level )%>% drop_na() #drop all observations with missing values in the sample # sample size change from 6201 to 6132 ``` --- #The relationship between having a partner and life satisfaction - Generate a new variable for relationship status ```r wave1c <- wave1b %>% mutate( relstat_new2=case_when( relstat_new1 %in% c("1 Never married single") ~ "single", #treat 'never married single' as 'single' relstat_new1 %in% c("2 Never married LAT", "3 Never married COHAB", "4 Married COHAB", "5 Married noncohabiting") ~ 'partnered', #treat the 4 situations as "partnered" relstat_new1 %in% c("6 Divorced/separated single", "7 Divorced/separated LAT", "8 Divorced/separated COHAB") ~ 'separated', #treat the 3 situations as "separated" relstat_new1 %in% c("9 Widowed single", "10 Widowed LAT") ~ 'widowed' #treat the 2 situations as "widowed" ) %>% as_factor()# make relstat_new2 as factor ) %>% filter(relstat_new2!= "widowed" & relstat_new2!= "separated") #only 4 widowed and 284 separated, dropping. # sample size change to 5845 after dropping widowed and separated ``` --- #Exploring the relationship between having a partner and life satisfaction First, we can estimate the average sat6 across relationship status to have a first look .pull-left[ ```r tabyl(wave1c,relstat_new2) ``` ``` ## relstat_new2 n percent ## single 2435 0.4165954 ## partnered 3410 0.5834046 ## separated 0 0.0000000 ## widowed 0 0.0000000 ``` ```r #check the frequency of each category ``` ] .pull-right[ ```r wave1c$relstat_new2 <- fct_drop(wave1c$relstat_new2) #drop unused levels rel_sat<- wave1c %>% group_by(relstat_new2) %>% dplyr::summarise( mean_sat6=mean(sat6), #calculate the mean of sat6 by marital1 ) rel_sat ``` ``` ## # A tibble: 2 × 2 ## relstat_new2 mean_sat6 ## <fct> <dbl> ## 1 single 7.51 ## 2 partnered 7.78 ``` ] --- #Why those partnered are happier than those singles ```r wave1c$relstat_new2 <- fct_drop(wave1c$relstat_new2) #drop unused levels rel_sat<- wave1c %>% group_by(relstat_new2) %>% dplyr::summarise( mean_sat6=mean(sat6), #calculate the mean of sat6 by marital1 mean_age=mean(age), mean_nkids=mean(nkids), mean_health=mean(health), mean_religion=mean(as.numeric(religion)-1) ) rel_sat ``` ``` ## # A tibble: 2 × 6 ## relstat_new2 mean_sat6 mean_age mean_nkids mean_health mean_religion ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 single 7.51 20.3 0.0583 3.90 0.744 ## 2 partnered 7.78 29.1 0.969 3.79 0.725 ``` ```r #compare to the single group, the partnered group are older, had more children, slightly lower in health and religion. ``` --- #Why those partnered are happier than those singles ```r OLS_partner <- lm_robust(formula = sat6 ~ age + nkids + health + religion, subset=(relstat_new2=="partnered"), data = wave1c) OLS_single <- lm_robust(formula = sat6 ~ age + nkids + health + religion, subset=(relstat_new2=="single"), data = wave1c) #subset=() here is to specify which sample for the OLS screenreg(list(OLS_partner,OLS_single),include.ci = FALSE, digits = 3, custom.model.names = c("OLS for partnered", "OLS for single"),#rename the models single.row =TRUE #to make the coef. and standard error in the same row ) ``` ``` ## ## ======================================================= ## OLS for partnered OLS for single ## ------------------------------------------------------- ## (Intercept) 5.947 (0.184) *** 7.038 (0.225) *** ## age -0.013 (0.005) ** -0.070 (0.006) *** ## nkids 0.060 (0.033) 0.076 (0.144) ## health 0.502 (0.031) *** 0.464 (0.042) *** ## religionYes 0.337 (0.061) *** 0.107 (0.082) ## ------------------------------------------------------- ## R^2 0.100 0.147 ## Adj. R^2 0.099 0.146 ## Num. obs. 3410 2435 ## RMSE 1.557 1.661 ## ======================================================= ## *** p < 0.001; ** p < 0.01; * p < 0.05 ``` --- #Why those partnered are happier than those singles ```r rel_sat ``` ``` ## # A tibble: 2 × 6 ## relstat_new2 mean_sat6 mean_age mean_nkids mean_health mean_religion ## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 single 7.51 20.3 0.0583 3.90 0.744 ## 2 partnered 7.78 29.1 0.969 3.79 0.725 ``` ```r #compare to the single group, the partnered group are older, had more children, slightly lower score in health, less lonely. ``` <img src="https://github.com/fancycmn/24-Session-6/blob/main/Table%201b.JPG?raw=true" width="50%" style="display: block; margin: auto;" > --- #Why those partnered are happier than those singles - Blinder-Oaxaca Decomposition - an econometric method - often used to explain the difference in the means of the dependent variable between two groups - A good example is the gender difference in wage: why men's average wage is higher than women's wage - because men are more educated, more experienced, working long hours than women? (composition difference) - or the return of education, experience, labor is higher for men than women? (discrimination) - today, we learn the so-call two-fold Blinder-Oaxaca decomposition --- # OLS regression `$$y_i= \alpha+\beta*x_i + e_i$$` -- - Group A (treated)=Those partnered, the life satisfaction can be modelled as the following `$$y_i^a= \alpha^a +\beta_1^a*age_i^a +... +\beta_4^a*religion_i^a+ e_i^a$$` -- - Then, Group A (treated) =Those partnered, the average life satisfaction is `$$\overline{y^a}= \alpha^a +\beta_1^a*\overline{age_i^a} + ...+\beta_4^a*\overline{religion_i^a}$$` -- - Group B (control)=Those singled, the life satisfaction can be modeled as the following `$$y_i^b= \alpha^b +\beta_1^b*age_i^b + ... + \beta_4^b*religion_i^b + e_i^b$$` -- - Then,Group B (control)=Those singled, the average life satisfaction is `$$\overline{y^b}= \alpha^b +\beta_1^b*\overline{age_i^b} + ...+\beta_4^b*\overline{religion_i^b}$$` --- #Blinder-Oaxaca Decomposition The difference between the average life satisfaction between those singles and partnered can be described: `$$\overline{y^a}-\overline{y^b}= \alpha^a - \alpha^b + \beta_1^a*\overline{age_i^a}- \beta_1^b*\overline{age_i^b} + ...+ \beta_4^a*\overline{religion_i^a}-\beta_4^b*\overline{religion_i^b}$$` some transformation `$$\overline{y^a}-\overline{y^b}=$$` `$$\alpha^a - \alpha^b +$$` `$$\color{red}\beta_\color{red}1^\color{red}b*(\overline{age_i^a}-\overline{age_i^b})+ \overline{age_i^a}*(\beta_1^a-\beta_1^b)+$$` `$$...+$$` `$$\color{red}\beta_\color{red}4^\color{red}b*(\overline{religion_i^a}-\overline{religion_i^b})+ \overline{religion_i^a}*(\beta_4^a-\beta_4^b)$$` *note: `\(a\)` represents "partnered group"; `\(b\)` represents "single group" --- #Blinder-Oaxaca Decomposition <img src="https://github.com/fancycmn/24-Session-6/blob/main/Formula5.JPG?raw=true" width="100%" style="display: block; margin: auto;" > --- #Blinder-Oaxaca Decomposition - explained part: attribute to cross-group differences in the explanatory variables - unexplained part: attribute to difference in the intercept + different in the regression coefficients <img src="https://github.com/fancycmn/24-Session-6/blob/main/Formula5.JPG?raw=true" width="80%" style="display: block; margin: auto;" > --- #Blinder-Oaxaca Decomposition The difference between the average life satisfaction between those singles and partnered can be described: `$$\overline{y^a}-\overline{y^b}= \alpha^a - \alpha^b + \beta_1^a*\overline{age_i^a}- \beta_1^b*\overline{age_i^b} + ...+ \beta_5^a*\overline{religion_i^a}-\beta_5^b*\overline{religion_i^b}$$` some transformation `$$\overline{y^a}-\overline{y^b}=$$` `$$\alpha^a - \alpha^b +$$` `$$\color{red}\beta_\color{red}1^\color{red}a*(\overline{age_i^a}-\overline{age_i^b})+ \overline{age_i^b}*(\beta_1^a-\beta_1^b)$$` `$$...+$$` `$$\color{red}\beta_\color{red}4^\color{red}a*(\overline{religion_i^a}-\overline{religion_i^b})+ \overline{religion_i^b}*(\beta_4^a-\beta_4^b)$$` *note: `\(a\)` represents "partnered"; `\(b\)` prepresents "single" --- #Blinder-Oaxaca Decomposition The difference between the average life satisfaction between those singles and partnered can be described: `$$\overline{y^a}-\overline{y^b}= \alpha^a - \alpha^b + \beta_1^a*\overline{age_i^a}- \beta_1^b*\overline{age_i^b} + ...+ \beta_4^a*\overline{religion_i^a}-\beta_4^b*\overline{religion_i^b}$$` some transformation `$$\overline{y^a}-\overline{y^b}=$$` `$$\alpha^a - \alpha^b +$$` `$$\dfrac{\color{red}(\color{red}\beta_\color{red}1^\color{red}a\color{red}+\color{red}\beta_\color{red}1^\color{red}b\color{red})}{\color{red}{2}}*(\overline{age_i^a}-\overline{age_i^b})+ \dfrac{(\overline{age_i^a}+\overline{age_i^b})}2*(\beta_1^a-\beta_1^b)+$$` `$$...+$$` `$$\dfrac{\color{red}(\color{red}\beta_\color{red}4^\color{red}a\color{red}+\color{red}\beta_\color{red}4^\color{red}b\color{red})}{\color{red}{2}}*(\overline{religion_i^a}-\overline{religion_i^b})+ \dfrac{(\overline{religion_i^a}+\overline{religion_i^b})}2*(\beta_4^a-\beta_4^b)$$` *note: `\(a\)` represents "partnered"; `\(b\)` prepresents "single" --- #Blinder-Oaxaca Decomposition which should be use as the reference set of coefficients - Group B (control) coefficients used as reference, i.e. `\(\color{red}\beta^\color{red}b\)`. **In oaxaca package, group.weights=1** - Group A (treated) coefficients used as reference, i.e. `\(\color{red}\beta^\color{red}a\)`. **In oaxaca package, group.weights=0** - Equally weighted average (each 0.5) of Group A and B coefficients used as reference, as in Reimers (1983). That is, `\(\dfrac{\color{red}(\color{red}\beta_\color{red}1^\color{red}a\color{red}+\color{red}\beta_\color{red}1^\color{red}b\color{red})}{\color{red}{2}}\)`. **In oaxaca package, group.weights=0.5** - an average of Group A and B coefficients weighted by the number of observations in Group A and B, following Cotton (1988). - Coefficients from a pooled regression (that does not include the group indicator variable) used as reference, as suggested by Neumark (1988). **In oaxaca package, group.weights=-1** - Coefficients from a pooled regression (that includes the group indicator) used as reference. See Jann (2008). **In oaxaca package, group.weights=-2** - In oaxaca package, all the situations are estimated. --- #Blinder-Oaxaca Decomposition: coding - first step: identify group, here is "relstat_new3" ```r wave1c <- wave1c %>% mutate( relstat_new3=case_when( relstat_new2 %in% c("single") ~ TRUE, #oaxaca treat those TRUE as the control group,i.e. group B relstat_new2 %in% c("partnered") ~ FALSE) #oaxaca treat those FALSE as the treated group, i.e. group A ) ``` - second step: decompose ```r #oaxaca(#formula = y ~ x1 + x2.. | your group variable, data=your data) result <- oaxaca(formula = sat6 ~ age + nkids + health + religion | relstat_new3 , data = wave1c) ``` ``` ## oaxaca: oaxaca() performing analysis. Please wait. ``` ``` ## ## Bootstrapping standard errors: ``` ``` ## 1 / 100 (1%) ``` ``` ## 10 / 100 (10%) ``` ``` ## 20 / 100 (20%) ``` ``` ## 30 / 100 (30%) ``` ``` ## 40 / 100 (40%) ``` ``` ## 50 / 100 (50%) ``` ``` ## 60 / 100 (60%) ``` ``` ## 70 / 100 (70%) ``` ``` ## 80 / 100 (80%) ``` ``` ## 90 / 100 (90%) ``` ``` ## 100 / 100 (100%) ``` --- #Blinder-Oaxaca Decomposition: understand results ```r result #look at what the result of decomposition looks ``` ``` ## $beta ## $beta$beta.A ## (Intercept) age nkids health religionYes ## 5.94657309 -0.01288393 0.05987523 0.50224933 0.33670445 ## ## $beta$beta.B ## (Intercept) age nkids health religionYes ## 7.03838495 -0.07037895 0.07564373 0.46434948 0.10704506 ## ## $beta$beta.diff ## (Intercept) age nkids health religionYes ## -1.09181186 0.05749502 -0.01576851 0.03789985 0.22965939 ## ## $beta$beta.R ## (Intercept) age nkids health religionYes ## [1,] 0.0000000 7.038385 -0.07037895 0.07564373 0.4643495 0.1070451 ## [2,] 1.0000000 5.946573 -0.01288393 0.05987523 0.5022493 0.3367045 ## [3,] 0.5000000 6.492479 -0.04163144 0.06775948 0.4832994 0.2218748 ## [4,] 0.5834046 6.401417 -0.03683609 0.06644431 0.4864604 0.2410294 ## [5,] -1.0000000 6.109504 -0.02582568 0.19722216 0.4957332 0.2645122 ## [6,] -2.0000000 6.705597 -0.03890896 0.14254406 0.4953289 0.2609984 ## ## ## $call ## oaxaca(formula = sat6 ~ age + nkids + health + religion | relstat_new3, ## data = wave1c) ## ## $n ## $n$n.A ## [1] 3410 ## ## $n$n.B ## [1] 2435 ## ## $n$n.pooled ## [1] 5845 ## ## ## $R ## [1] 100 ## ## $reg ## $reg$reg.A ## ## Call: ## NULL ## ## Coefficients: ## (Intercept) age nkids health religionYes ## 5.94657 -0.01288 0.05988 0.50225 0.33670 ## ## ## $reg$reg.B ## ## Call: ## NULL ## ## Coefficients: ## (Intercept) age nkids health religionYes ## 7.03838 -0.07038 0.07564 0.46435 0.10705 ## ## ## $reg$reg.pooled.1 ## ## Call: ## NULL ## ## Coefficients: ## (Intercept) age nkids health religionYes ## 6.10950 -0.02583 0.19722 0.49573 0.26451 ## ## ## $reg$reg.pooled.2 ## ## Call: ## NULL ## ## Coefficients: ## (Intercept) age nkids health ## 6.70560 -0.03891 0.14254 0.49533 ## religionYes relstat_new3TRUE ## 0.26100 -0.54465 ## ## ## ## $threefold ## $threefold$overall ## coef(endowments) se(endowments) coef(coefficients) se(coefficients) ## -0.60740111 0.11115988 0.39187202 0.04685037 ## coef(interaction) se(interaction) ## 0.48570318 0.11692835 ## ## $threefold$variables ## coef(endowments) se(endowments) coef(coefficients) se(coefficients) ## (Intercept) 0.000000000 0.000000000 -1.0918118635 0.291728466 ## age -0.622738578 0.056984429 1.1658856925 0.160018982 ## nkids 0.068858905 0.120736151 -0.0009195598 0.008149348 ## health -0.051432520 0.013215470 0.1478172132 0.181716406 ## religionYes -0.002088922 0.002666225 0.1709005412 0.077451411 ## coef(interaction) se(interaction) ## (Intercept) 0.000000000 0.000000000 ## age 0.508736888 0.069768576 ## nkids -0.014354159 0.123597535 ## health -0.004197883 0.005684331 ## religionYes -0.004481668 0.003611676 ## ## ## $twofold ## $twofold$overall ## group.weight coef(explained) se(explained) coef(unexplained) ## [1,] 0.0000000 -0.6074011 0.11115988 0.8775752 ## [2,] 1.0000000 -0.1216979 0.03413745 0.3918720 ## [3,] 0.5000000 -0.3645495 0.05781766 0.6347236 ## [4,] 0.5834046 -0.3240396 0.06611645 0.5942137 ## [5,] -1.0000000 -0.1090531 0.02958618 0.3792272 ## [6,] -2.0000000 -0.2744792 0.03355034 0.5446532 ## se(unexplained) coef(unexplained A) se(unexplained A) coef(unexplained B) ## [1,] 0.11308414 8.775752e-01 1.130841e-01 0.0000000 ## [2,] 0.04685037 0.000000e+00 0.000000e+00 0.3918720 ## [3,] 0.06382343 4.387876e-01 5.654207e-02 0.1959360 ## [4,] 0.07107777 3.655938e-01 6.597381e-02 0.2286199 ## [5,] 0.03365214 1.579843e-01 1.403577e-02 0.2212429 ## [6,] 0.04698833 -1.841235e-13 1.270068e-13 0.5446532 ## se(unexplained B) ## [1,] 0.00000000 ## [2,] 0.04685037 ## [3,] 0.02342518 ## [4,] 0.01951765 ## [5,] 0.01996223 ## [6,] 0.04698833 ## ## $twofold$variables ## $twofold$variables[[1]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) 0 0.000000000 0.000000000 -1.09181186 ## age 0 -0.622738578 0.056984429 1.67462258 ## nkids 0 0.068858905 0.120736151 -0.01527372 ## health 0 -0.051432520 0.013215470 0.14361933 ## religionYes 0 -0.002088922 0.002666225 0.16641887 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.29172847 -1.09181186 0.29172847 ## age 0.22882538 1.67462258 0.22882538 ## nkids 0.13169995 -0.01527372 0.13169995 ## health 0.17631207 0.14361933 0.17631207 ## religionYes 0.07515867 0.16641887 0.07515867 ## coef(unexplained B) se(unexplained B) ## (Intercept) 0 0 ## age 0 0 ## nkids 0 0 ## health 0 0 ## religionYes 0 0 ## ## $twofold$variables[[2]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) 1 0.00000000 0.000000000 -1.0918118635 ## age 1 -0.11400169 0.037385343 1.1658856925 ## nkids 1 0.05450475 0.031726680 -0.0009195598 ## health 1 -0.05563040 0.014238382 0.1478172132 ## religionYes 1 -0.00657059 0.004519059 0.1709005412 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.291728466 0 0 ## age 0.160018982 0 0 ## nkids 0.008149348 0 0 ## health 0.181716406 0 0 ## religionYes 0.077451411 0 0 ## coef(unexplained B) se(unexplained B) ## (Intercept) -1.0918118635 0.291728466 ## age 1.1658856925 0.160018982 ## nkids -0.0009195598 0.008149348 ## health 0.1478172132 0.181716406 ## religionYes 0.1709005412 0.077451411 ## ## $twofold$variables[[3]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) 0.5 0.000000000 0.00000000 -1.091811864 ## age 0.5 -0.368370134 0.03324953 1.420254137 ## nkids 0.5 0.061681825 0.06303025 -0.008096639 ## health 0.5 -0.053531462 0.01343920 0.145718271 ## religionYes 0.5 -0.004329756 0.00324103 0.168659707 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.29172847 -0.545905932 0.14586423 ## age 0.19433644 0.837311290 0.11441269 ## nkids 0.06990391 -0.007636859 0.06584997 ## health 0.17901207 0.071809665 0.08815604 ## religionYes 0.07629228 0.083209437 0.03757934 ## coef(unexplained B) se(unexplained B) ## (Intercept) -0.5459059318 0.145864233 ## age 0.5829428463 0.080009491 ## nkids -0.0004597799 0.004074674 ## health 0.0739086066 0.090858203 ## religionYes 0.0854502706 0.038725706 ## ## $twofold$variables[[4]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) 0.5834046 0.000000000 0.000000000 -1.091811864 ## age 0.5834046 -0.325939128 0.035967401 1.377823130 ## nkids 0.5834046 0.060484622 0.072186797 -0.006899436 ## health 0.5834046 -0.053881584 0.013360191 0.146068394 ## religionYes 0.5834046 -0.004703547 0.003079714 0.169033499 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.29172847 -0.454843779 0.17019573 ## age 0.20007998 0.697640031 0.13349778 ## nkids 0.08021177 -0.006362961 0.07683436 ## health 0.17856138 0.059831149 0.10286128 ## religionYes 0.07610138 0.069329334 0.04384792 ## coef(unexplained B) se(unexplained B) ## (Intercept) -0.6369680846 0.121532731 ## age 0.6801830986 0.066663169 ## nkids -0.0005364754 0.003394981 ## health 0.0862372450 0.075702216 ## religionYes 0.0997041652 0.032265900 ## ## $twofold$variables[[5]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) -1 0.00000000 0.000000000 -1.0918119 ## age -1 -0.22851506 0.029518498 1.2803991 ## nkids -1 0.17953241 0.028558292 -0.1259472 ## health -1 -0.05490866 0.013412580 0.1470955 ## religionYes -1 -0.00516180 0.003587765 0.1694918 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.29172847 -0.16293056 0.13211142 ## age 0.18727087 0.37694667 0.09873885 ## nkids 0.02060377 -0.13303722 0.01582417 ## health 0.17953739 0.02469249 0.07414487 ## religionYes 0.07654547 0.05231292 0.03027886 ## coef(unexplained B) se(unexplained B) ## (Intercept) -0.928881301 0.171011047 ## age 0.903452391 0.101874527 ## nkids 0.007089994 0.007532673 ## health 0.122402984 0.108775557 ## religionYes 0.117178832 0.047651827 ## ## $twofold$variables[[6]] ## group.weight coef(explained) se(explained) coef(unexplained) ## (Intercept) -2 0.000000000 0.000000000 -1.09181186 ## age -2 -0.344280679 0.030890693 1.39616468 ## nkids -2 0.129758640 0.028866208 -0.07617345 ## health -2 -0.054863884 0.013769382 0.14705069 ## religionYes -2 -0.005093231 0.003669041 0.16942318 ## se(unexplained) coef(unexplained A) se(unexplained A) ## (Intercept) 0.29172847 -0.75902411 0.12532856 ## age 0.19000762 0.75801540 0.10203278 ## nkids 0.01881962 -0.08007483 0.01356851 ## health 0.17954889 0.02622441 0.07235732 ## religionYes 0.07652859 0.05485913 0.03027930 ## coef(unexplained B) se(unexplained B) ## (Intercept) -0.332787750 0.174323395 ## age 0.638149284 0.091348210 ## nkids 0.003901374 0.007591391 ## health 0.120826281 0.108404693 ## religionYes 0.114564050 0.047041165 ## ## ## ## $x ## $x$x.mean.A ## (Intercept) age nkids health religionYes ## 1.0000000 29.1263930 0.9686217 3.7894428 0.7246334 ## ## $x$x.mean.B ## (Intercept) age nkids health religionYes ## 1.00000000 20.27802875 0.05831622 3.90020534 0.74414784 ## ## $x$x.mean.diff ## (Intercept) age nkids health religionYes ## 0.00000000 8.84836421 0.91030548 -0.11076252 -0.01951441 ## ## ## $y ## $y$y.A ## [1] 7.77654 ## ## $y$y.B ## [1] 7.506366 ## ## $y$y.diff ## [1] 0.2701741 ## ## ## attr(,"class") ## [1] "oaxaca" ``` --- #Blinder-Oaxaca Decomposition: understand results how to read the result? you need to know what is [list in R](https://www.youtube.com/watch?v=X8lNTDeiKiE&t=171s) ```r #I want to see the resutl that uses the equally weighted average coefficent to decompose variable <- result$twofold$variables[[3]] overall <- result$twofold$overall[3,] result2<- rbind(variable, overall)[,c(1:5)] #rbind is to bind rows of "variable" and "overall" dataset. [,c(1:5)] after rbind is to select column 1 to 5. write.csv(result2, file = "OB decompose.csv")#use write.csv to export the result2 into a csv file. result2[,c(1,2,4)] ``` ``` ## group.weight coef(explained) coef(unexplained) ## (Intercept) 0.5 0.000000000 -1.091811864 ## age 0.5 -0.368370134 1.420254137 ## nkids 0.5 0.061681825 -0.008096639 ## health 0.5 -0.053531462 0.145718271 ## religionYes 0.5 -0.004329756 0.168659707 ## overall 0.5 -0.364549526 0.634723612 ``` --- #Blinder-Oaxaca Decomposition: understand results ```r plot(result, decomposition = "twofold", type="overall", group.weight = 0.5, title = "Overall") ``` <img src="https://github.com/fancycmn/24-Session-6/blob/main/plot%20overall_1b.JPG?raw=true" width="50%" style="display: block; margin: auto;" > --- #Blinder-Oaxaca Decomposition: understand results ```r plot(result, decomposition = "twofold", type="variables", group.weight = 0.5, title="by variable") ``` <img src="https://github.com/fancycmn/24-Session-6/blob/main/plot%20variables_1c.JPG?raw=true" width="40%" style="display: block; margin: auto;" > --- #Blinder-Oaxaca Decomposition: understand results .pull-left[ <img src="https://github.com/fancycmn/24-Session-6/blob/main/plot%20overall_1b.JPG?raw=true" width="100%" style="display: block; margin: auto;" > ] .pull-right[ <img src="https://github.com/fancycmn/24-Session-6/blob/main/plot%20variables_1c.JPG?raw=true" width="100%" style="display: block; margin: auto;" > ] --- #Take home 1. Exploratory analysis: two-fold oaxaca decomposition 2. Two-fold Oaxaca decomposition - explained part - unexplained part - reference set of coefficient in OB decomposition 4. Important code - oaxaca(formula = y ~ x1 + x2 + x3 | your group var , data = your dataset) - plot(result, decomposition = "twofold", type="", group.weight =, title = "") --- class: center, middle #[Exercise](https://rpubs.com/fancycmn/1229752)