# Getting the data for Assignment 3, WF ED 540
# This RScript is available as ReadGSSdata.R in
# the Class 12 folder at https://goo.gl/lSYV9c
# Load the packages that you will need
require(foreign)
## Loading required package: foreign
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggvis)
## Loading required package: ggvis
require(magrittr)
## Loading required package: magrittr
require(knitr)
## Loading required package: knitr
require(rmarkdown)
## Loading required package: rmarkdown
# The packages must be installed, if not already
# installed, in your instance of R
# Read the SPSS .SAV file in to R
GSSdata <- read.spss("GSS.SAV",
max.value.labels=TRUE, to.data.frame=FALSE,
trim.factor.names=FALSE,
reencode=NA, use.missings=to.data.frame)
# Read the newly-read data from GSS.SAV
# into a data frame in R
GSSdata <- data.frame(GSSdata)
# Convert the data frame to a table frame through
# the dplyr function, tbl_df
GSSdata <- tbl_df(GSSdata)
# Just to be sure you have the data read correctly,
# run some descriptive statistics (mean, standard
# deviation, and count of cases) for an important
# variable we will use, HRS1:
GSSdata %>%
summarise(mean_hours=mean(HRS1), sd_hours=sd(HRS1), n=n())
## Source: local data frame [1 x 3]
##
## mean_hours sd_hours n
## (dbl) (dbl) (int)
## 1 24.07573 24.19891 4820
GSSdata$HRS1
#1. A.) Here is the histogram for HRS1
hist(GSSdata$HRS1)
##The Picture of the histogram
#Here I am taking the natural logarithm of HRS1 for the next Histogram.
ln_HRS1<-log(GSSdata$HRS1)
## Warning in log(GSSdata$HRS1): NaNs produced
#1. B.) Here is the histogram for the natural logarithm of HRS1
hist(ln_HRS1)
##The Picture of the histogram for the natural logarithm of HRS1
#2. A.) Here is the ordinary least squares regression using HRS1 as the dependent
#variable and I_AGE, I_SEX, and I_RELIGION as the independent variables.
hrs_factors<-lm(HRS1~I_AGE+I_SEX+I_RELIGION, data = GSSdata)
hrs_factors
##
## Call:
## lm(formula = HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
##
## Coefficients:
## (Intercept) I_AGE I_SEX I_RELIGION
## 56.6000 -6.8802 -8.5905 0.1637
summary(hrs_factors)
##
## Call:
## lm(formula = HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.948 -19.942 -1.313 16.749 85.774
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.6000 1.5229 37.166 <2e-16 ***
## I_AGE -6.8802 0.2850 -24.141 <2e-16 ***
## I_SEX -8.5905 0.6540 -13.135 <2e-16 ***
## I_RELIGION 0.1637 0.2379 0.688 0.491
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.36 on 4752 degrees of freedom
## (64 observations deleted due to missingness)
## Multiple R-squared: 0.1453, Adjusted R-squared: 0.1448
## F-statistic: 269.3 on 3 and 4752 DF, p-value: < 2.2e-16
confint(hrs_factors)
## 2.5 % 97.5 %
## (Intercept) 53.6144339 59.5856647
## I_AGE -7.4389696 -6.3214998
## I_SEX -9.8726597 -7.3083293
## I_RELIGION -0.3026711 0.6301078
#2. B.) Here is the ordinary least squares regression using the natural logarithym
#of HRS1 as the dependent variable and the same set of independent variabls that I
#chose in 2.A.
ln_hrs_factors<-lm(ln_HRS1~I_AGE+I_SEX+I_RELIGION, data = GSSdata)
ln_hrs_factors
##
## Call:
## lm(formula = ln_HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
##
## Coefficients:
## (Intercept) I_AGE I_SEX I_RELIGION
## 4.02723 -0.04005 -0.20584 -0.01001
summary(ln_hrs_factors)
##
## Call:
## lm(formula = ln_HRS1 ~ I_AGE + I_SEX + I_RELIGION, data = GSSdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7213 -0.0697 0.1334 0.2608 1.1496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.027234 0.048433 83.151 < 2e-16 ***
## I_AGE -0.040045 0.011034 -3.629 0.000289 ***
## I_SEX -0.205845 0.020678 -9.955 < 2e-16 ***
## I_RELIGION -0.010010 0.007311 -1.369 0.171058
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5513 on 2847 degrees of freedom
## (1969 observations deleted due to missingness)
## Multiple R-squared: 0.03755, Adjusted R-squared: 0.03653
## F-statistic: 37.02 on 3 and 2847 DF, p-value: < 2.2e-16
confint(ln_hrs_factors)
## 2.5 % 97.5 %
## (Intercept) 3.93226715 4.122200192
## I_AGE -0.06167991 -0.018410186
## I_SEX -0.24639054 -0.165298482
## I_RELIGION -0.02434620 0.004325626
#Here I am getting the means and standard deviations of all variables
#in the HRS1 equation
#HRS1
hrs_factorstbl <- tbl_df(GSSdata)
hrs_factorstbl %>%
summarise(mean_HRS1=mean(HRS1), mean_I_AGE=mean(I_AGE,na.rm=TRUE), mean_I_SEX=mean(I_SEX),
mean_I_RELIGION = mean(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## mean_HRS1 mean_I_AGE mean_I_SEX mean_I_RELIGION
## (dbl) (dbl) (dbl) (dbl)
## 1 24.07573 2.831411 1.557676 2.150115
hrs_factorstbl %>%
summarise(sd_HRS1=sd(HRS1), sd_I_AGE=sd(I_AGE,na.rm=TRUE), sd_I_SEX=sd(I_SEX),
sd_I_RELIGION = sd(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## sd_HRS1 sd_I_AGE sd_I_SEX sd_I_RELIGION
## (dbl) (dbl) (dbl) (dbl)
## 1 24.19891 1.170855 0.4967138 1.404815
#Here I am getting the means and standard deviations of all variables
#in the logarithym of HRS1 equation
#logarithym of HRS1
ln_hrs_factorstbl <- tbl_df(GSSdata)
ln_hrs_factorstbl %>%
summarise(mean_ln_HRS1=mean(ln_HRS1,na.rm=TRUE), mean_I_AGE=mean(I_AGE,na.rm=TRUE), mean_I_SEX=mean(I_SEX,na.rm=TRUE),
mean_I_RELIGION = mean(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## mean_ln_HRS1 mean_I_AGE mean_I_SEX mean_I_RELIGION
## (dbl) (dbl) (dbl) (dbl)
## 1 3.595463 2.831411 1.557676 2.150115
ln_hrs_factorstbl %>%
summarise(sd_ln_HRS1=sd(ln_HRS1,na.rm=TRUE), sd_I_AGE=sd(I_AGE,na.rm=TRUE), sd_I_SEX=sd(I_SEX,na.rm=TRUE),
sd_I_RELIGION = sd(I_RELIGION,na.rm=TRUE))
## Source: local data frame [1 x 4]
##
## sd_ln_HRS1 sd_I_AGE sd_I_SEX sd_I_RELIGION
## (dbl) (dbl) (dbl) (dbl)
## 1 0.560348 1.170855 0.4967138 1.404815
```