## Loading required package: foreign
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: ggvis
## Loading required package: magrittr
## Loading required package: xtable
Importing the code to read GSSdata
## Source: local data frame [1 x 3]
##
## mean_hours sd_hours n
## (dbl) (dbl) (int)
## 1 24.07573 24.19891 4820
Beginning this assignment by recoding all -1, 99, 9, 8, and some 0 variables to NA
GSSdata$HRS1 <- ifelse (GSSdata$HRS1 == 99, NA, GSSdata$HRS1)
GSSdata$HRS1 <- ifelse (GSSdata$HRS1 == -1, NA, GSSdata$HRS1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 0, NA, GSSdata$SUICIDE1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 9, NA, GSSdata$SUICIDE1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 8, NA, GSSdata$SUICIDE1)
summary(GSSdata$SUICIDE1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 1.000 1.000 1.402 2.000 2.000 1708
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 0, NA, GSSdata$SPANKING)
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 9, NA, GSSdata$SPANKING)
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 8, NA, GSSdata$SPANKING)
summary(GSSdata$SPANKING)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 2.000 2.153 3.000 4.000 1634
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 0, NA, GSSdata$XMOVIE)
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 9, NA, GSSdata$XMOVIE)
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 8, NA, GSSdata$XMOVIE)
summary(GSSdata$XMOVIE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 2.000 1.775 2.000 2.000 1573
1a) Histogram
Creation of a generic histogram of the Hours Worked from GSS data
GSSdata %>%
ggvis(~HRS1) %>%
layer_histograms()
## Guessing width = 5 # range / 20
1b) Natural Logarithim of HRS1
Creation of a histogram using the natural logarithim of Hours Worked from GSS data
GSSdata %>%
ggvis(~log(HRS1)) %>%
layer_histograms()
## Guessing width = 0.2 # range / 23
2a) Least Squares Regression Analysis
HRS1 depends on the three independent variables selected here.
Definitions of these variables:
SUICIDE1 - Do you think a person has the right to end his or her own life if this person has an incurable disease?
1=Yes, 2=No
SPANKING - Do you strongly agree, agree, disagree or strongly disagree that it is sometimes necessary to discipline a child with a good, hard, spanking?
1=Strongly Agree, 2=Agree 3=Disagree 4=Strongly Disagree
XMOVIE - Have you seen an X-rated movie in the last year?
1=Yes, 2=No
gssfactors<- lm(HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data=GSSdata)
gssfactors
##
## Call:
## lm(formula = HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
##
## Coefficients:
## (Intercept) SUICIDE1 SPANKING XMOVIE
## 49.1322 -0.8562 -1.7661 -2.0647
summary(gssfactors)
##
## Call:
## lm(formula = HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.589 -6.615 0.242 9.087 59.152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.1322 2.7748 17.706 <2e-16 ***
## SUICIDE1 -0.8562 1.0559 -0.811 0.4177
## SPANKING -1.7661 0.6217 -2.841 0.0046 **
## XMOVIE -2.0647 1.1680 -1.768 0.0774 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.63 on 949 degrees of freedom
## (3867 observations deleted due to missingness)
## Multiple R-squared: 0.01321, Adjusted R-squared: 0.01009
## F-statistic: 4.234 on 3 and 949 DF, p-value: 0.005526
gssfactors_log<- lm(log(HRS1) ~ SUICIDE1 + SPANKING + XMOVIE, data=GSSdata)
summary(gssfactors_log)
##
## Call:
## lm(formula = log(HRS1) ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5774 -0.0690 0.1115 0.3081 1.0421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.81799 0.09509 40.152 <2e-16 ***
## SUICIDE1 -0.01232 0.03618 -0.340 0.734
## SPANKING -0.04690 0.02131 -2.201 0.028 *
## XMOVIE -0.06108 0.04002 -1.526 0.127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5356 on 949 degrees of freedom
## (3867 observations deleted due to missingness)
## Multiple R-squared: 0.008284, Adjusted R-squared: 0.005149
## F-statistic: 2.642 on 3 and 949 DF, p-value: 0.04816
## 2.5 % 97.5 %
## (Intercept) 43.686717 54.5777753
## SUICIDE1 -2.928340 1.2159950
## SPANKING -2.986202 -0.5459710
## XMOVIE -4.356755 0.2273967
3 APA Tables
Table 1 Regression Analysis

If \(\alpha\) = .05, then the p-value, .005, is less than \(\alpha\). Therefore, we reject the null hypothesis that there is no relationship between the dependent variable and the entire set of independent variables.
GSSdata %>%
summarise(mean_HRS1=mean(HRS1, na.rm=TRUE), mean_SUICIDE1=mean(SUICIDE1, na.rm=TRUE), mean_SPANKING=mean(SPANKING, na.rm=TRUE),
mean_XMOVIE = mean(XMOVIE, na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## mean_HRS1 mean_SUICIDE1 mean_SPANKING mean_XMOVIE
## (dbl) (dbl) (dbl) (dbl)
## 1 40.30427 1.401671 2.153484 1.774869
GSSdata %>%
summarise(sd_HRS1=sd(HRS1, na.rm=TRUE), sd_SUICIDE1=sd(SUICIDE1, na.rm=TRUE), sd_SPANKING=sd(SPANKING, na.rm=TRUE),
sd_XMOVIE = sd(XMOVIE, na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## sd_HRS1 sd_SUICIDE1 sd_SPANKING sd_XMOVIE
## (dbl) (dbl) (dbl) (dbl)
## 1 15.54908 0.4903148 0.8523578 0.4177328
Table 2, Logarithmic Regression Analysis

If \(\alpha\) = .05, then the p-value, .04, is less than \(\alpha\). Therefore, we reject the null hypothesis that there is no relationship between the dependent variable and the entire set of independent variables.
confint(gssfactors_log)
## 2.5 % 97.5 %
## (Intercept) 3.63138544 4.004597920
## SUICIDE1 -0.08332901 0.058688190
## SPANKING -0.08870701 -0.005085683
## XMOVIE -0.13962150 0.017467256
logHRS1 <- log(GSSdata$HRS1)
mean(logHRS1, na.rm=TRUE)
## [1] 3.585312
sd(logHRS1, na.rm=TRUE)
## [1] 0.5540086
logSUICIDE1 <- log(GSSdata$SUICIDE1)
mean(logSUICIDE1, na.rm=TRUE)
## [1] 0.2784171
sd(logSUICIDE1, na.rm=TRUE)
## [1] 0.3398604
logSPANKING <- log(GSSdata$SPANKING)
mean(logSPANKING, na.rm=TRUE)
## [1] 0.6826637
sd(logSPANKING, na.rm=TRUE)
## [1] 0.4237158
logXMOVIE <- log(GSSdata$XMOVIE)
mean(logXMOVIE, na.rm=TRUE)
## [1] 0.5370983
sd(logXMOVIE, na.rm=TRUE)
## [1] 0.2895503