## Loading required package: foreign
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: ggvis
## Loading required package: magrittr
## Loading required package: xtable

Importing the code to read GSSdata

## Source: local data frame [1 x 3]
## 
##   mean_hours sd_hours     n
##        (dbl)    (dbl) (int)
## 1   24.07573 24.19891  4820

Beginning this assignment by recoding all -1, 99, 9, 8, and some 0 variables to NA

GSSdata$HRS1 <- ifelse (GSSdata$HRS1 == 99, NA, GSSdata$HRS1)
GSSdata$HRS1 <- ifelse (GSSdata$HRS1 == -1, NA, GSSdata$HRS1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 0, NA, GSSdata$SUICIDE1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 9, NA, GSSdata$SUICIDE1)
GSSdata$SUICIDE1 <- ifelse (GSSdata$SUICIDE1 == 8, NA, GSSdata$SUICIDE1)
summary(GSSdata$SUICIDE1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   1.000   1.000   1.402   2.000   2.000    1708
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 0, NA, GSSdata$SPANKING)
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 9, NA, GSSdata$SPANKING)
GSSdata$SPANKING <- ifelse (GSSdata$SPANKING == 8, NA, GSSdata$SPANKING)
summary(GSSdata$SPANKING)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   2.000   2.000   2.153   3.000   4.000    1634
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 0, NA, GSSdata$XMOVIE)
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 9, NA, GSSdata$XMOVIE)
GSSdata$XMOVIE <- ifelse (GSSdata$XMOVIE == 8, NA, GSSdata$XMOVIE)
summary(GSSdata$XMOVIE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.000   2.000   2.000   1.775   2.000   2.000    1573

1a) Histogram

Creation of a generic histogram of the Hours Worked from GSS data

GSSdata %>% 
  ggvis(~HRS1) %>% 
  layer_histograms()
## Guessing width = 5 # range / 20

1b) Natural Logarithim of HRS1

Creation of a histogram using the natural logarithim of Hours Worked from GSS data

GSSdata %>% 
  ggvis(~log(HRS1)) %>% 
  layer_histograms()
## Guessing width = 0.2 # range / 23

2a) Least Squares Regression Analysis

HRS1 depends on the three independent variables selected here.

Definitions of these variables:

SUICIDE1 - Do you think a person has the right to end his or her own life if this person has an incurable disease?

1=Yes, 2=No

SPANKING - Do you strongly agree, agree, disagree or strongly disagree that it is sometimes necessary to discipline a child with a good, hard, spanking?

1=Strongly Agree, 2=Agree 3=Disagree 4=Strongly Disagree

XMOVIE - Have you seen an X-rated movie in the last year?

1=Yes, 2=No

gssfactors<- lm(HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data=GSSdata)
gssfactors
## 
## Call:
## lm(formula = HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
## 
## Coefficients:
## (Intercept)     SUICIDE1     SPANKING       XMOVIE  
##     49.1322      -0.8562      -1.7661      -2.0647
summary(gssfactors)
## 
## Call:
## lm(formula = HRS1 ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.589  -6.615   0.242   9.087  59.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  49.1322     2.7748  17.706   <2e-16 ***
## SUICIDE1     -0.8562     1.0559  -0.811   0.4177    
## SPANKING     -1.7661     0.6217  -2.841   0.0046 ** 
## XMOVIE       -2.0647     1.1680  -1.768   0.0774 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.63 on 949 degrees of freedom
##   (3867 observations deleted due to missingness)
## Multiple R-squared:  0.01321,    Adjusted R-squared:  0.01009 
## F-statistic: 4.234 on 3 and 949 DF,  p-value: 0.005526
gssfactors_log<- lm(log(HRS1) ~ SUICIDE1 + SPANKING + XMOVIE, data=GSSdata)
summary(gssfactors_log)
## 
## Call:
## lm(formula = log(HRS1) ~ SUICIDE1 + SPANKING + XMOVIE, data = GSSdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5774 -0.0690  0.1115  0.3081  1.0421 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.81799    0.09509  40.152   <2e-16 ***
## SUICIDE1    -0.01232    0.03618  -0.340    0.734    
## SPANKING    -0.04690    0.02131  -2.201    0.028 *  
## XMOVIE      -0.06108    0.04002  -1.526    0.127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5356 on 949 degrees of freedom
##   (3867 observations deleted due to missingness)
## Multiple R-squared:  0.008284,   Adjusted R-squared:  0.005149 
## F-statistic: 2.642 on 3 and 949 DF,  p-value: 0.04816
##                 2.5 %     97.5 %
## (Intercept) 43.686717 54.5777753
## SUICIDE1    -2.928340  1.2159950
## SPANKING    -2.986202 -0.5459710
## XMOVIE      -4.356755  0.2273967

3 APA Tables

Table 1 Regression Analysis

Table 1

If \(\alpha\) = .05, then the p-value, .005, is less than \(\alpha\). Therefore, we reject the null hypothesis that there is no relationship between the dependent variable and the entire set of independent variables.

GSSdata %>%
  summarise(mean_HRS1=mean(HRS1, na.rm=TRUE), mean_SUICIDE1=mean(SUICIDE1, na.rm=TRUE), mean_SPANKING=mean(SPANKING, na.rm=TRUE),
            mean_XMOVIE = mean(XMOVIE, na.rm=TRUE)) 
## Source: local data frame [1 x 4]
## 
##   mean_HRS1 mean_SUICIDE1 mean_SPANKING mean_XMOVIE
##       (dbl)         (dbl)         (dbl)       (dbl)
## 1  40.30427      1.401671      2.153484    1.774869
GSSdata %>%
  summarise(sd_HRS1=sd(HRS1, na.rm=TRUE), sd_SUICIDE1=sd(SUICIDE1, na.rm=TRUE), sd_SPANKING=sd(SPANKING, na.rm=TRUE),
            sd_XMOVIE = sd(XMOVIE, na.rm=TRUE))
## Source: local data frame [1 x 4]
## 
##    sd_HRS1 sd_SUICIDE1 sd_SPANKING sd_XMOVIE
##      (dbl)       (dbl)       (dbl)     (dbl)
## 1 15.54908   0.4903148   0.8523578 0.4177328

Table 2, Logarithmic Regression Analysis

Table 2

If \(\alpha\) = .05, then the p-value, .04, is less than \(\alpha\). Therefore, we reject the null hypothesis that there is no relationship between the dependent variable and the entire set of independent variables.

confint(gssfactors_log)
##                   2.5 %       97.5 %
## (Intercept)  3.63138544  4.004597920
## SUICIDE1    -0.08332901  0.058688190
## SPANKING    -0.08870701 -0.005085683
## XMOVIE      -0.13962150  0.017467256
logHRS1 <- log(GSSdata$HRS1)
mean(logHRS1, na.rm=TRUE)
## [1] 3.585312
sd(logHRS1, na.rm=TRUE)
## [1] 0.5540086
logSUICIDE1 <- log(GSSdata$SUICIDE1)
mean(logSUICIDE1, na.rm=TRUE)
## [1] 0.2784171
sd(logSUICIDE1, na.rm=TRUE)
## [1] 0.3398604
logSPANKING <- log(GSSdata$SPANKING)
mean(logSPANKING, na.rm=TRUE)
## [1] 0.6826637
sd(logSPANKING, na.rm=TRUE)
## [1] 0.4237158
logXMOVIE <- log(GSSdata$XMOVIE)
mean(logXMOVIE, na.rm=TRUE)
## [1] 0.5370983
sd(logXMOVIE, na.rm=TRUE)
## [1] 0.2895503