Lab Assignemnt 2

##install packages

 #install.packages(c("devtools", "tidyverse","bootstrap", "lmtest", "car", "Hmisc", "sandwich", "multcomp", "knitr", "lattice", "lme4", "survey", "pscl", "readstata13", "ctv","ggplot2", "acs", "ggmap", "dplyr", "sjPlot", "survey", "devtools", "muhaz", "coxme","eha", "cmprsk", "knitr","ipumsr","abind", "plotly","plyr"), dependencies = T)

##load packages to use

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ipumsr)

## Warning: package 'ipumsr' was built under R version 4.0.2

library(readr)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.2

library(psych)

## Warning: package 'psych' was built under R version 4.0.2

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(haven)

Create the Vectors

# Create Vectors X Y and Z
X <- c(5 ,10 ,15 ,20 ,25 ,30)
Y <- c(-1, NA, 75 , 3 , 5 , 8)
Z <- c(5)
X_Z <- c(X*Z)
Y_Z <- c(Y*Z)
print(X_Z)

## [1]  25  50  75 100 125 150

print(Y_Z)

## [1]  -5  NA 375  15  25  40

Data Management Question

#set working directory.
setwd("~/Documents/R_programming")
Akin <- read_dta("stata_PSID_w1.dta")
# View data
View(Akin)
str(Akin)

## tibble [131,361 × 13] (S3: tbl_df/tbl/data.frame)
##  $ year             : num [1:131361] 2001 2003 2005 2007 2009 ...
##   ..- attr(*, "label")= chr "Year"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ sex              : chr [1:131361] "male" "male" "male" "male" ...
##   ..- attr(*, "label")= chr "Sex of respondent"
##   ..- attr(*, "format.stata")= chr "%9s"
##  $ age              : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
##   ..- attr(*, "label")= chr "Age of respondent"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ marpi            : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Marital pairs indicator"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ educ             : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
##   ..- attr(*, "label")= chr "Years completed education"
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjfinc          : num [1:131361] 50.9 31.1 21.3 76.5 19.9 ...
##   ..- attr(*, "label")= chr "Family income in prev yr in 1000s of year 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ pubhs            : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = lives in public housing"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ rnthlp           : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = received govt rent assistance"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ adjwlth1         : num [1:131361] 23.05 3.83 6.55 26.29 12.14 ...
##   ..- attr(*, "label")= chr "Wealth (excluding home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjwlth2         : num [1:131361] 113 119 116 129 112 ...
##   ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
##   ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
##   ..- attr(*, "format.stata")= chr "%16s"
##  $ id               : num [1:131361] 4003 4003 4003 4003 4003 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ race5            : dbl+lbl [1:131361] 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
##    ..@ label       : chr "Race/ethnicity updated codes (5/26/14)"
##    ..@ format.stata: chr "%16.0g"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Latino- Any Race" "NL Asian" "NL Black" "NL Other" ...

#select variables into a new data set*
Akin_sub <- subset(x=Akin,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new","rnthlp"))

Question 3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

#variables
names(Akin_sub)

## [1] "id"                "age"               "marpi"            
## [4] "adjwlth2"          "educ"              "h_race_ethnic_new"
## [7] "rnthlp"

#number of observations
nrow(Akin_sub)

## [1] 131361

#number of variables
ncol(Akin_sub)

## [1] 7

Question 3.2 Show the frequency distribution of race/ethnicity variable

#Frequency distribution
table(Akin_sub$h_race_ethnic_new)

## 
## Latino- Any Race         NL Asian         NL Black         NL Other 
##             9893             2118            46935             1134 
##         NL White 
##            71281

Question 3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

Akin_sub$adjwlth2<- as.numeric (as.character(Akin_sub$adjwlth2))# Convert string to numeric in R
#calculate mean and median
mean(Akin_sub$adjwlth2,na.rm = TRUE)

## [1] 187.1656

median(Akin_sub$adjwlth2,na.rm = TRUE)

## [1] 32.804

Question 3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

summary(Akin_sub$age) #Summary Statistics

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   29.00   32.03   47.00  999.00

Question 3.5 How many people in the data received public assistance? How many Latino received public assistance?

Public_assitance <- filter(Akin_sub, rnthlp == 1)
Public_assitance_latino <- filter(Akin_sub, rnthlp == 1 &  h_race_ethnic_new == "Latino")
nrow(Public_assitance)           #How many people in the data received public assistance?

## [1] 3163

nrow(Public_assitance_latino)    #How many Latino received public assistance?

## [1] 0

Questio 3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

# 1. Health Insurance Status 
# 2. Level of education 
# 3. Household size

Lab Assignemnt 2

Samson Olowolaju

Sept. 30, 2020

Create the Vectors

Data Management Question

Question 3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

Question 3.2 Show the frequency distribution of race/ethnicity variable

Question 3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

Question 3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

Question 3.5 How many people in the data received public assistance? How many Latino received public assistance?