HW5 Socy 602

From larger GSS data set convert sexfreq to a metric

that is based on yearly average and subset year=2012 data

as HW5 containing age, race, sexfreq, and marital:

## sexfreq22 
##       n missing  unique    Info    Mean 
##   27786   31813       7    0.97   59.88 
## 
##              0    2   12   36   52  156  208
## Frequency 6059 2307 2959 4474 4793 5443 1751
## %           22    8   11   16   17   20    6

PART1

Create two dummy variables for race, black=1, white=0 & black=0, white=1:

## race
## WHITE BLACK OTHER 
##  1477   301   196

## black
## Black White 
##   301  1477

## white
## White Black 
##  1477   301

Get sexual frequency means for race and plot:

## [1] 72.65242

## [1] 84.39344

Compare previous means with t-test:

## 
##  Two Sample t-test
## 
## data:  sexfreq22 by race
## t = -2.0621, df = 1070, p-value = 0.03944
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -22.9131870  -0.5688613
## sample estimates:
## mean in group WHITE mean in group BLACK 
##            72.65242            84.39344

## 
## Call:
## lm(formula = sexfreq22 ~ I(race == "BLACK"), data = HW5.data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84.39 -60.65 -34.52  83.35 135.35 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              72.652      2.352  30.883   <2e-16 ***
## I(race == "BLACK")TRUE   11.741      5.694   2.062   0.0394 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 70.14 on 1070 degrees of freedom
##   (706 observations deleted due to missingness)
## Multiple R-squared:  0.003958,   Adjusted R-squared:  0.003027 
## F-statistic: 4.252 on 1 and 1070 DF,  p-value: 0.03944

## 
## Call:
## lm(formula = sexfreq22 ~ I(race == "WHITE"), data = HW5.data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84.39 -60.65 -34.52  83.35 135.35 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              84.393      5.185  16.276   <2e-16 ***
## I(race == "WHITE")TRUE  -11.741      5.694  -2.062   0.0394 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 70.14 on 1070 degrees of freedom
##   (706 observations deleted due to missingness)
## Multiple R-squared:  0.003958,   Adjusted R-squared:  0.003027 
## F-statistic: 4.252 on 1 and 1070 DF,  p-value: 0.03944

PART 2

Now we fit a regression model to sexfreq22 on marital status using “married”

as the reference group:

attach(HW5data)
table(marital)

## marital
##       MARRIED       WIDOWED      DIVORCED     SEPARATED NEVER MARRIED 
##           900           163           317            68           526

describeBy(sexfreq22, marital)

## group: MARRIED
##   vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## 1    1 636 66.26 64.05     36   59.96 35.58   0 208   208 0.89     -0.7
##     se
## 1 2.54
## -------------------------------------------------------- 
## group: WIDOWED
##   vars  n  mean    sd median trimmed   mad min max range skew kurtosis
## 1    1 22 76.09 69.31     44   69.67 29.65   2 208   206  0.7    -1.16
##      se
## 1 14.78
## -------------------------------------------------------- 
## group: DIVORCED
##   vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## 1    1 167 76.49 75.58     36   69.76 35.58   0 208   208 0.74    -1.13
##     se
## 1 5.85
## -------------------------------------------------------- 
## group: SEPARATED
##   vars  n mean   sd median trimmed   mad min max range skew kurtosis    se
## 1    1 37 97.3 81.1     52   95.87 74.13   0 208   208  0.2    -1.72 13.33
## -------------------------------------------------------- 
## group: NEVER MARRIED
##   vars   n  mean    sd median trimmed   mad min max range skew kurtosis
## 1    1 334 89.51 74.63     52   85.73 74.13   0 208   208 0.32    -1.55
##     se
## 1 4.08

model.marital.sexfreq = lm(sexfreq22~factor(marital), data=HW5data)
summary(model.marital.sexfreq)

## 
## Call:
## lm(formula = sexfreq22 ~ factor(marital), data = HW5data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -97.30 -54.26 -30.26  66.48 141.74 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    66.258      2.754  24.057  < 2e-16 ***
## factor(marital)WIDOWED          9.833     15.063   0.653  0.51401    
## factor(marital)DIVORCED        10.233      6.040   1.694  0.09046 .  
## factor(marital)SEPARATED       31.039     11.747   2.642  0.00834 ** 
## factor(marital)NEVER MARRIED   23.257      4.694   4.955 8.28e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.46 on 1191 degrees of freedom
##   (778 observations deleted due to missingness)
## Multiple R-squared:  0.02339,    Adjusted R-squared:  0.02011 
## F-statistic: 7.132 on 4 and 1191 DF,  p-value: 1.127e-05

Lets use linear combinations calculated by hand to compare divorced with

seperated:

coeffs = coef(model.marital.sexfreq); coeffs

##                  (Intercept)       factor(marital)WIDOWED 
##                    66.257862                     9.833047 
##      factor(marital)DIVORCED     factor(marital)SEPARATED 
##                    10.233156                    31.039436 
## factor(marital)NEVER MARRIED 
##                    23.257108

Beta1 = coeffs[3]; Beta1 # divorced

## factor(marital)DIVORCED 
##                10.23316

Beta2 = coeffs[4]; Beta2 # seperated

## factor(marital)SEPARATED 
##                 31.03944

var.covar = vcov(model.marital.sexfreq); var.covar

##                              (Intercept) factor(marital)WIDOWED
## (Intercept)                     7.585947              -7.585947
## factor(marital)WIDOWED         -7.585947             226.888764
## factor(marital)DIVORCED        -7.585947               7.585947
## factor(marital)SEPARATED       -7.585947               7.585947
## factor(marital)NEVER MARRIED   -7.585947               7.585947
##                              factor(marital)DIVORCED
## (Intercept)                                -7.585947
## factor(marital)WIDOWED                      7.585947
## factor(marital)DIVORCED                    36.476138
## factor(marital)SEPARATED                    7.585947
## factor(marital)NEVER MARRIED                7.585947
##                              factor(marital)SEPARATED
## (Intercept)                                 -7.585947
## factor(marital)WIDOWED                       7.585947
## factor(marital)DIVORCED                      7.585947
## factor(marital)SEPARATED                   137.982216
## factor(marital)NEVER MARRIED                 7.585947
##                              factor(marital)NEVER MARRIED
## (Intercept)                                     -7.585947
## factor(marital)WIDOWED                           7.585947
## factor(marital)DIVORCED                          7.585947
## factor(marital)SEPARATED                         7.585947
## factor(marital)NEVER MARRIED                    22.031042

S.E.beta1 = var.covar[13:13]; S.E.beta1 # variance of divorced

## [1] 36.47614

S.E.beta2 = var.covar[19:19]; S.E.beta2 # variance of seperated

## [1] 137.9822

cov.Div.Sep = var.covar[14:14]; cov.Div.Sep # covariance of divorced & seperated

## [1] 7.585947

# calculate standard error of difference 
S.E.diff = sqrt((S.E.beta1+S.E.beta2)-(2*cov.Div.Sep)); S.E.diff

## [1] 12.62087

# calculate t-value by dividing by standard error of difference
t.value = ((Beta2 - Beta1) - 0) / S.E.diff; t.value

## factor(marital)SEPARATED 
##                 1.648561

(Beta2 - Beta1) # result is same as mean dif between Divorced & Seperated

## factor(marital)SEPARATED 
##                 20.80628

We can also do this using the contrast function:

library(car)
local({
        .Hypothesis <- matrix(c(0,0,1,-1,0), 1, 5, byrow=TRUE) 
        # 1=divorced, -1=seperated
        .RHS <- c(0)
        linearHypothesis(model.marital.sexfreq, .Hypothesis, rhs=.RHS)
}) # take the sqrt of the F-test to get t-statistic

## Linear hypothesis test
## 
## Hypothesis:
## factor(marital)DIVORCED - factor(marital)SEPARATED = 0
## 
## Model 1: restricted model
## Model 2: sexfreq22 ~ factor(marital)
## 
##   Res.Df     RSS Df Sum of Sq      F Pr(>F)  
## 1   1192 5759285                             
## 2   1191 5746172  1     13112 2.7178 0.0995 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Or better yet, changing the reference group to “divorced”:

HW5data <- within(HW5data, marital <- relevel(marital, ref = 3))
summary(lm(sexfreq22~factor(marital), data=HW5data)) # reference=Divorced

## 
## Call:
## lm(formula = sexfreq22 ~ factor(marital), data = HW5data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -97.30 -54.26 -30.26  66.48 141.74 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   76.4910     5.3750  14.231   <2e-16 ***
## factor(marital)MARRIED       -10.2332     6.0395  -1.694   0.0905 .  
## factor(marital)WIDOWED        -0.4001    15.7541  -0.025   0.9797    
## factor(marital)SEPARATED      20.8063    12.6209   1.649   0.0995 .  
## factor(marital)NEVER MARRIED  13.0240     6.5830   1.978   0.0481 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 69.46 on 1191 degrees of freedom
##   (778 observations deleted due to missingness)
## Multiple R-squared:  0.02339,    Adjusted R-squared:  0.02011 
## F-statistic: 7.132 on 4 and 1191 DF,  p-value: 1.127e-05

Lets get the average yearly sexual frequency across marital status, but first

lets fit the model again using “married” as the reference group:


	Dependent variable:

	sexfreq22

factor(marital)WIDOWED	12.646
	(15.444)

factor(marital)DIVORCED	10.042
	(6.237)

factor(marital)SEPARATED	33.191^***
	(12.445)

factor(marital)NEVER MARRIED	24.022^***
	(5.006)

Constant	65.354^***
	(2.919)


Observations	1,072
R²	0.025
Adjusted R²	0.021
Residual Std. Error	69.499 (df = 1067)
F Statistic	6.806^*** (df = 4; 1067)

Note:	p<0.1; p<0.05; p<0.01

Using lsmeans we get predicted marginal means for each marital status:

## lm(formula = sexfreq22 ~ factor(marital), data = HW5.data)
##                              coef.est coef.se
## (Intercept)                  65.35     2.92  
## factor(marital)WIDOWED       12.65    15.44  
## factor(marital)DIVORCED      10.04     6.24  
## factor(marital)SEPARATED     33.19    12.45  
## factor(marital)NEVER MARRIED 24.02     5.01  
## ---
## n = 1072, k = 5
## residual sd = 69.50, R-Squared = 0.02

##  marital         lsmean        SE   df lower.CL  upper.CL
##  MARRIED       65.35450  2.918673 1067 59.62751  71.08149
##  WIDOWED       78.00000 15.165869 1067 48.24169 107.75831
##  DIVORCED      75.39623  5.511609 1067 64.58140  86.21105
##  SEPARATED     98.54545 12.098179 1067 74.80653 122.28438
##  NEVER MARRIED 89.37671  4.067106 1067 81.39628  97.35715
## 
## Confidence level used: 0.95

PART 3 & 4

Lets fit sexfreq2 regressed on race=“Black” and marital with “married” as the

reference group:

lm(formula = sexfreq22 ~ factor(race) + factor(marital), data = HW5.data) coef.est coef.se (Intercept) 64.53 2.99
factor(race)BLACK 7.20 5.72
factor(marital)WIDOWED 12.79 15.44
factor(marital)DIVORCED 9.37 6.26
factor(marital)SEPARATED 32.27 12.46
factor(marital)NEVER MARRIED 23.00 5.07
— n = 1072, k = 6 residual sd = 69.48, R-Squared = 0.03


	Dependent variable:

	sexfreq22

factor(race)BLACK	7.198
	(5.725)

factor(marital)WIDOWED	12.785
	(15.440)

factor(marital)DIVORCED	9.373
	(6.258)

factor(marital)SEPARATED	32.271^***
	(12.463)

factor(marital)NEVER MARRIED	22.999^***
	(5.070)

Constant	64.529^***
	(2.991)


Observations	1,072
R²	0.026
Adjusted R²	0.022
Residual Std. Error	69.480 (df = 1066)
F Statistic	5.764^*** (df = 5; 1066)

Note:	p<0.1; p<0.05; p<0.01

Next, we get the predicted means for each combination of race and marital

status:

means.mod.mar.blk = lsmeans(model.marital.blk, ~marital | race)
means.mod.mar.blk

## race = WHITE:
##  marital          lsmean        SE   df lower.CL  upper.CL
##  MARRIED        64.52936  2.990769 1066 58.66090  70.39783
##  WIDOWED        77.31450 15.171542 1066 47.54503 107.08398
##  DIVORCED       73.90236  5.636752 1066 62.84197  84.96275
##  SEPARATED      96.80055 12.174246 1066 72.91235 120.68876
##  NEVER MARRIED  87.52799  4.323696 1066 79.04406  96.01191
## 
## race = BLACK:
##  marital          lsmean        SE   df lower.CL  upper.CL
##  MARRIED        71.72707  5.848292 1066 60.25160  83.20255
##  WIDOWED        84.51221 16.022019 1066 53.07394 115.95049
##  DIVORCED       81.10007  7.137314 1066 67.09529  95.10485
##  SEPARATED     103.99827 12.848914 1066 78.78623 129.21030
##  NEVER MARRIED  94.72570  5.884829 1066 83.17853 106.27286
## 
## Confidence level used: 0.95

plot(means.mod.mar.blk, col=brewer.pal(5,"Set1"))

Lets add age to the previous model:

age.center = scale(age, scale=F) # can center age using scale, lsmeans centers
model.marital.race.age = lm(sexfreq22~factor(race) + factor(marital) + 
                                age, data=HW5.data)
display(model.marital.race.age)

lm(formula = sexfreq22 ~ factor(race) + factor(marital) + age, data = HW5.data) coef.est coef.se (Intercept) 153.14 7.70 factor(race)BLACK 9.79 5.35 factor(marital)WIDOWED 30.62 14.50 factor(marital)DIVORCED 10.30 5.86 factor(marital)SEPARATED 23.76 11.67 factor(marital)NEVER MARRIED -7.43 5.34 age -1.87 0.15 — n = 1071, k = 7 residual sd = 64.93, R-Squared = 0.15

stargazer(model.marital.race.age, type="html")


	Dependent variable:

	sexfreq22

factor(race)BLACK	9.787^*
	(5.354)

factor(marital)WIDOWED	30.618^**
	(14.501)

factor(marital)DIVORCED	10.300^*
	(5.865)

factor(marital)SEPARATED	23.760^**
	(11.667)

factor(marital)NEVER MARRIED	-7.429
	(5.340)

age	-1.865^***
	(0.151)

Constant	153.137^***
	(7.705)


Observations	1,071
R²	0.148
Adjusted R²	0.144
Residual Std. Error	64.929 (df = 1064)
F Statistic	30.907^*** (df = 6; 1064)

Note:	p<0.1; p<0.05; p<0.01

And, finally lets get the predicted means for each combination of race and

marital status while adjusting for age using the mean (M=48):

means.marital.race.age = lsmeans(model.marital.race.age, ~marital + age | race)
means.marital.race.age

## race = WHITE:
##  marital            age    lsmean        SE   df lower.CL  upper.CL
##  MARRIED       48.50794  62.64668  2.798965 1064 57.15457  68.13880
##  WIDOWED       48.50794  93.26502 14.236748 1064 65.32973 121.20031
##  DIVORCED      48.50794  72.94666  5.284984 1064 62.57648  83.31683
##  SEPARATED     48.50794  86.40661 11.407731 1064 64.02240 108.79081
##  NEVER MARRIED 48.50794  55.21791  4.812161 1064 45.77551  64.66031
## 
## race = BLACK:
##  marital            age    lsmean        SE   df lower.CL  upper.CL
##  MARRIED       48.50794  72.43387  5.466023 1064 61.70846  83.15928
##  WIDOWED       48.50794 103.05221 15.046610 1064 73.52781 132.57661
##  DIVORCED      48.50794  82.73384  6.681019 1064 69.62438  95.84331
##  SEPARATED     48.50794  96.19379 12.024801 1064 72.59878 119.78881
##  NEVER MARRIED 48.50794  65.00510  6.008358 1064 53.21552  76.79467
## 
## Confidence level used: 0.95

plot(means.marital.race.age, col=brewer.pal(5,"Set1"))

We must also caution our findings since we have very few cases across different

combination levels:

	MARRIED	WIDOWED	DIVORCED	SEPARATED	NEVER MARRIED
WHITE	719	133	245	44	336
BLACK	85	24	59	13	120

End of document for now: