##install packages

 #install.packages(c("devtools", "tidyverse","bootstrap", "lmtest", "car", "Hmisc", "sandwich", "multcomp", "knitr", "lattice", "lme4", "survey", "pscl", "readstata13", "ctv","ggplot2", "acs", "ggmap", "dplyr", "sjPlot", "survey", "devtools", "muhaz", "coxme","eha", "cmprsk", "knitr","ipumsr","abind", "plotly","plyr"), dependencies = T)

##load packages to use

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.2
library(readr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(psych)
## Warning: package 'psych' was built under R version 4.0.2
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(haven)

Create the Vectors

# Create Vectors X Y and Z
X <- c(5 ,10 ,15 ,20 ,25 ,30)
Y <- c(-1, NA, 75 , 3 , 5 , 8)
Z <- c(5)
X_Z <- c(X*Z)
Y_Z <- c(Y*Z)
print(X_Z)
## [1]  25  50  75 100 125 150
print(Y_Z)
## [1]  -5  NA 375  15  25  40

Data Management Question

#set working directory.
setwd("~/Documents/R_programming")
Akin <- read_dta("stata_PSID_w1.dta")
# View data
View(Akin)
str(Akin)
## tibble [131,361 × 13] (S3: tbl_df/tbl/data.frame)
##  $ year             : num [1:131361] 2001 2003 2005 2007 2009 ...
##   ..- attr(*, "label")= chr "Year"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ sex              : chr [1:131361] "male" "male" "male" "male" ...
##   ..- attr(*, "label")= chr "Sex of respondent"
##   ..- attr(*, "format.stata")= chr "%9s"
##  $ age              : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
##   ..- attr(*, "label")= chr "Age of respondent"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ marpi            : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Marital pairs indicator"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ educ             : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
##   ..- attr(*, "label")= chr "Years completed education"
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjfinc          : num [1:131361] 50.9 31.1 21.3 76.5 19.9 ...
##   ..- attr(*, "label")= chr "Family income in prev yr in 1000s of year 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ pubhs            : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = lives in public housing"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ rnthlp           : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "1 = received govt rent assistance"
##   ..- attr(*, "format.stata")= chr "%8.0g"
##  $ adjwlth1         : num [1:131361] 23.05 3.83 6.55 26.29 12.14 ...
##   ..- attr(*, "label")= chr "Wealth (excluding home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ adjwlth2         : num [1:131361] 113 119 116 129 112 ...
##   ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
##   ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
##   ..- attr(*, "format.stata")= chr "%16s"
##  $ id               : num [1:131361] 4003 4003 4003 4003 4003 ...
##   ..- attr(*, "format.stata")= chr "%9.0g"
##  $ race5            : dbl+lbl [1:131361] 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
##    ..@ label       : chr "Race/ethnicity updated codes (5/26/14)"
##    ..@ format.stata: chr "%16.0g"
##    ..@ labels      : Named num [1:5] 1 2 3 4 5
##    .. ..- attr(*, "names")= chr [1:5] "Latino- Any Race" "NL Asian" "NL Black" "NL Other" ...
#select variables into a new data set*
Akin_sub <- subset(x=Akin,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new","rnthlp"))

Question 3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

#variables
names(Akin_sub)
## [1] "id"                "age"               "marpi"            
## [4] "adjwlth2"          "educ"              "h_race_ethnic_new"
## [7] "rnthlp"
#number of observations
nrow(Akin_sub)
## [1] 131361
#number of variables
ncol(Akin_sub)
## [1] 7

Question 3.2 Show the frequency distribution of race/ethnicity variable

#Frequency distribution
table(Akin_sub$h_race_ethnic_new)
## 
## Latino- Any Race         NL Asian         NL Black         NL Other 
##             9893             2118            46935             1134 
##         NL White 
##            71281

Question 3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

Akin_sub$adjwlth2<- as.numeric (as.character(Akin_sub$adjwlth2))# Convert string to numeric in R
#calculate mean and median
mean(Akin_sub$adjwlth2,na.rm = TRUE)
## [1] 187.1656
median(Akin_sub$adjwlth2,na.rm = TRUE)
## [1] 32.804

Question 3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

summary(Akin_sub$age) #Summary Statistics 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   29.00   32.03   47.00  999.00

Question 3.5 How many people in the data received public assistance? How many Latino received public assistance?

Public_assitance <- filter(Akin_sub, rnthlp == 1)
Public_assitance_latino <- filter(Akin_sub, rnthlp == 1 &  h_race_ethnic_new == "Latino")
nrow(Public_assitance)           #How many people in the data received public assistance?
## [1] 3163
nrow(Public_assitance_latino)    #How many Latino received public assistance?
## [1] 0

Questio 3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

# 1. Health Insurance Status 
# 2. Level of education 
# 3. Household size