#Title: WFED540 Assignment-3
#Author: Andrew Leigey
#Date: November 17, 2015
#Output: html_document


# Loading packages
require(foreign)
## Loading required package: foreign
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggvis)
## Loading required package: ggvis
require(magrittr)
## Loading required package: magrittr
require(knitr)
## Loading required package: knitr
require(rmarkdown)
## Loading required package: rmarkdown
# The packages must be installed, if not already 
# installed, in your instance of R

# Read the SPSS .SAV file in to R
GSSdata <- read.spss("GSS.SAV",  
                     max.value.labels=TRUE, to.data.frame=FALSE,
                     trim.factor.names=FALSE, 
                     reencode=NA, use.missings=to.data.frame)


# Read the newly-read data from GSS.SAV 
# into a data frame in R
GSSdata <- data.frame(GSSdata)

# Convert the data frame to a table frame through
# the dplyr function, tbl_df
GSSdata <- tbl_df(GSSdata)
# Just to be sure you have the data read correctly,
# run some descriptive statistics (mean, standard
# deviation, and count of cases) for an important
# variable we will use, HRS1:
GSSdata %>%
  summarise(mean_hours=mean(HRS1), sd_hours=sd(HRS1), n=n())
## Source: local data frame [1 x 3]
## 
##   mean_hours sd_hours     n
##        (dbl)    (dbl) (int)
## 1   24.07573 24.19891  4820

Here I am taking a look at the HRS1 variable in the GSSdata

GSSdata$HRS1

#1. A.) Here is the histogram for HRS1
hist(GSSdata$HRS1)

##The Picture of the histogram

#Here I am taking the natural logarithm of HRS1 for the next Histogram.
ln_HRS1<-log(GSSdata$HRS1)
## Warning in log(GSSdata$HRS1): NaNs produced
#1. B.) Here is the histogram for the natural logarithm of HRS1
hist(ln_HRS1)

##The Picture of the histogram for the natural logarithm of HRS1

#2. A.) Here is the ordinary least squares regression using HRS1 as the dependent
#variable and I_AGE, I_SEX, and I_RELIGION as the independent variables.
hrs_factors<-lm(HRS1~I_AGE+I_SEX+I_RELIGION, data = GSSdata)

hrs_factors
## 
## Call:
## lm(formula = HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
## 
## Coefficients:
## (Intercept)        I_AGE        I_SEX   I_RELIGION  
##     56.6000      -6.8802      -8.5905       0.1637
summary(hrs_factors)
## 
## Call:
## lm(formula = HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.948 -19.942  -1.313  16.749  85.774 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  56.6000     1.5229  37.166   <2e-16 ***
## I_AGE        -6.8802     0.2850 -24.141   <2e-16 ***
## I_SEX        -8.5905     0.6540 -13.135   <2e-16 ***
## I_RELIGION    0.1637     0.2379   0.688    0.491    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.36 on 4752 degrees of freedom
##   (64 observations deleted due to missingness)
## Multiple R-squared:  0.1453, Adjusted R-squared:  0.1448 
## F-statistic: 269.3 on 3 and 4752 DF,  p-value: < 2.2e-16
confint(hrs_factors)
##                  2.5 %     97.5 %
## (Intercept) 53.6144339 59.5856647
## I_AGE       -7.4389696 -6.3214998
## I_SEX       -9.8726597 -7.3083293
## I_RELIGION  -0.3026711  0.6301078
#2. B.) Here is the ordinary least squares regression using the natural logarithym 
#of HRS1 as the dependent variable and the same set of independent variabls that I 
#chose in 2.A.
ln_hrs_factors<-lm(ln_HRS1~I_AGE+I_SEX+I_RELIGION, data = GSSdata)

ln_hrs_factors
## 
## Call:
## lm(formula = ln_HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
## 
## Coefficients:
## (Intercept)        I_AGE        I_SEX   I_RELIGION  
##     4.02723     -0.04005     -0.20584     -0.01001
summary(ln_hrs_factors)
## 
## Call:
## lm(formula = ln_HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7213 -0.0697  0.1334  0.2608  1.1496 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.027234   0.048433  83.151  < 2e-16 ***
## I_AGE       -0.040045   0.011034  -3.629 0.000289 ***
## I_SEX       -0.205845   0.020678  -9.955  < 2e-16 ***
## I_RELIGION  -0.010010   0.007311  -1.369 0.171058    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5513 on 2847 degrees of freedom
##   (1969 observations deleted due to missingness)
## Multiple R-squared:  0.03755,    Adjusted R-squared:  0.03653 
## F-statistic: 37.02 on 3 and 2847 DF,  p-value: < 2.2e-16
confint(ln_hrs_factors)
##                   2.5 %       97.5 %
## (Intercept)  3.93226715  4.122200192
## I_AGE       -0.06167991 -0.018410186
## I_SEX       -0.24639054 -0.165298482
## I_RELIGION  -0.02434620  0.004325626
#Here I am getting the means and standard deviations of all variables 
#in the HRS1 equation

#HRS1
hrs_factorstbl <- tbl_df(GSSdata)
hrs_factorstbl %>%
  summarise(mean_HRS1=mean(HRS1), mean_I_AGE=mean(I_AGE,na.rm=TRUE), mean_I_SEX=mean(I_SEX),
            mean_I_RELIGION = mean(I_RELIGION,na.rm=TRUE)) 
## Source: local data frame [1 x 4]
## 
##   mean_HRS1 mean_I_AGE mean_I_SEX mean_I_RELIGION
##       (dbl)      (dbl)      (dbl)           (dbl)
## 1  24.07573   2.831411   1.557676        2.150115
hrs_factorstbl %>%
  summarise(sd_HRS1=sd(HRS1), sd_I_AGE=sd(I_AGE,na.rm=TRUE), sd_I_SEX=sd(I_SEX),
            sd_I_RELIGION = sd(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
## 
##    sd_HRS1 sd_I_AGE  sd_I_SEX sd_I_RELIGION
##      (dbl)    (dbl)     (dbl)         (dbl)
## 1 24.19891 1.170855 0.4967138      1.404815
#Here I am getting the means and standard deviations of all variables 
#in the logarithym of HRS1 equation

#logarithym of HRS1

ln_hrs_factorstbl <- tbl_df(GSSdata)
ln_hrs_factorstbl %>%
  summarise(mean_ln_HRS1=mean(ln_HRS1,na.rm=TRUE), mean_I_AGE=mean(I_AGE,na.rm=TRUE), mean_I_SEX=mean(I_SEX,na.rm=TRUE),
            mean_I_RELIGION = mean(I_RELIGION,na.rm=TRUE)) 
## Source: local data frame [1 x 4]
## 
##   mean_ln_HRS1 mean_I_AGE mean_I_SEX mean_I_RELIGION
##          (dbl)      (dbl)      (dbl)           (dbl)
## 1     3.595463   2.831411   1.557676        2.150115
ln_hrs_factorstbl %>%
  summarise(sd_ln_HRS1=sd(ln_HRS1,na.rm=TRUE), sd_I_AGE=sd(I_AGE,na.rm=TRUE), sd_I_SEX=sd(I_SEX,na.rm=TRUE),
            sd_I_RELIGION = sd(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
## 
##   sd_ln_HRS1 sd_I_AGE  sd_I_SEX sd_I_RELIGION
##        (dbl)    (dbl)     (dbl)         (dbl)
## 1   0.560348 1.170855 0.4967138      1.404815

Table1. Relationship Between HRS1 and I_AGE, I_SEX, and I_RELIGION

Table2. Relationship Between ln_HRS1 and I_AGE, I_SEX, and I_RELIGION

```