##install packages
#install.packages(c("devtools", "tidyverse","bootstrap", "lmtest", "car", "Hmisc", "sandwich", "multcomp", "knitr", "lattice", "lme4", "survey", "pscl", "readstata13", "ctv","ggplot2", "acs", "ggmap", "dplyr", "sjPlot", "survey", "devtools", "muhaz", "coxme","eha", "cmprsk", "knitr","ipumsr","abind", "plotly","plyr"), dependencies = T)
##load packages to use
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.0.2
library(readr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(psych)
## Warning: package 'psych' was built under R version 4.0.2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(haven)
# Create Vectors X Y and Z
X <- c(5 ,10 ,15 ,20 ,25 ,30)
Y <- c(-1, NA, 75 , 3 , 5 , 8)
Z <- c(5)
X_Z <- c(X*Z)
Y_Z <- c(Y*Z)
print(X_Z)
## [1] 25 50 75 100 125 150
print(Y_Z)
## [1] -5 NA 375 15 25 40
#set working directory.
setwd("~/Documents/R_programming")
Akin <- read_dta("stata_PSID_w1.dta")
# View data
View(Akin)
str(Akin)
## tibble [131,361 × 13] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:131361] 2001 2003 2005 2007 2009 ...
## ..- attr(*, "label")= chr "Year"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ sex : chr [1:131361] "male" "male" "male" "male" ...
## ..- attr(*, "label")= chr "Sex of respondent"
## ..- attr(*, "format.stata")= chr "%9s"
## $ age : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
## ..- attr(*, "label")= chr "Age of respondent"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ marpi : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
## ..- attr(*, "label")= chr "Marital pairs indicator"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ educ : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
## ..- attr(*, "label")= chr "Years completed education"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ adjfinc : num [1:131361] 50.9 31.1 21.3 76.5 19.9 ...
## ..- attr(*, "label")= chr "Family income in prev yr in 1000s of year 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ pubhs : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "1 = lives in public housing"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ rnthlp : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "1 = received govt rent assistance"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ adjwlth1 : num [1:131361] 23.05 3.83 6.55 26.29 12.14 ...
## ..- attr(*, "label")= chr "Wealth (excluding home equity) in 1000s of yr 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ adjwlth2 : num [1:131361] 113 119 116 129 112 ...
## ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
## ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
## ..- attr(*, "format.stata")= chr "%16s"
## $ id : num [1:131361] 4003 4003 4003 4003 4003 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ race5 : dbl+lbl [1:131361] 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
## ..@ label : chr "Race/ethnicity updated codes (5/26/14)"
## ..@ format.stata: chr "%16.0g"
## ..@ labels : Named num [1:5] 1 2 3 4 5
## .. ..- attr(*, "names")= chr [1:5] "Latino- Any Race" "NL Asian" "NL Black" "NL Other" ...
#select variables into a new data set*
Akin_sub <- subset(x=Akin,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new","rnthlp"))
#variables
names(Akin_sub)
## [1] "id" "age" "marpi"
## [4] "adjwlth2" "educ" "h_race_ethnic_new"
## [7] "rnthlp"
#number of observations
nrow(Akin_sub)
## [1] 131361
#number of variables
ncol(Akin_sub)
## [1] 7
#Frequency distribution
table(Akin_sub$h_race_ethnic_new)
##
## Latino- Any Race NL Asian NL Black NL Other
## 9893 2118 46935 1134
## NL White
## 71281
Akin_sub$adjwlth2<- as.numeric (as.character(Akin_sub$adjwlth2))# Convert string to numeric in R
#calculate mean and median
mean(Akin_sub$adjwlth2,na.rm = TRUE)
## [1] 187.1656
median(Akin_sub$adjwlth2,na.rm = TRUE)
## [1] 32.804
summary(Akin_sub$age) #Summary Statistics
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.00 29.00 32.03 47.00 999.00
Public_assitance <- filter(Akin_sub, rnthlp == 1)
Public_assitance_latino <- filter(Akin_sub, rnthlp == 1 & h_race_ethnic_new == "Latino")
nrow(Public_assitance) #How many people in the data received public assistance?
## [1] 3163
nrow(Public_assitance_latino) #How many Latino received public assistance?
## [1] 0
# 1. Health Insurance Status
# 2. Level of education
# 3. Household size