Question 1
x<-c(5,10,15,20,25,30)
y<-c(-1, NA, 3, 5, 8, 75)
z<-c(5)
Question 2
x*z
## [1] 25 50 75 100 125 150
y*z
## [1] -5 NA 15 25 40 375
xz<-c(25, 50, 75, 100, 125, 150)
yz<-c(-5, NA, 15, 25, 40, 375)
#print(xz,yz)
Question 3
library(haven)
stata_PSID_w1 <- read_dta("stata_PSID_w1.dta")
View(stata_PSID_w1)
View(stata_PSID_w1)
##select variables into a new data set*
assignment1<-subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","pubhs", "h_race_ethnic_new", "race5"))
Question 3.1
length(assignment1)
## [1] 8
str(assignment1)
## tibble [131,361 x 8] (S3: tbl_df/tbl/data.frame)
## $ id : num [1:131361] 4003 4003 4003 4003 4003 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ age : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
## ..- attr(*, "label")= chr "Age of respondent"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ marpi : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
## ..- attr(*, "label")= chr "Marital pairs indicator"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ adjwlth2 : num [1:131361] 113 119 116 129 112 ...
## ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ educ : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
## ..- attr(*, "label")= chr "Years completed education"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ pubhs : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "1 = lives in public housing"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
## ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
## ..- attr(*, "format.stata")= chr "%16s"
## $ race5 : dbl+lbl [1:131361] 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...
## ..@ label : chr "Race/ethnicity updated codes (5/26/14)"
## ..@ format.stata: chr "%16.0g"
## ..@ labels : Named num [1:5] 1 2 3 4 5
## .. ..- attr(*, "names")= chr [1:5] "Latino- Any Race" "NL Asian" "NL Black" "NL Other" ...
names(assignment1)
## [1] "id" "age" "marpi"
## [4] "adjwlth2" "educ" "pubhs"
## [7] "h_race_ethnic_new" "race5"
dim(assignment1)
## [1] 131361 8
Question 3.2
hist(assignment1$race5)

Question 3.3
mean(assignment1$adjwlth2,na.rm = T)
## [1] 187.1656
median(assignment1$adjwlth2,na.rm = T)
## [1] 32.804
Question 3.4
min(assignment1$age)
## [1] 1
max(assignment1$age)
## [1] 999
mean(assignment1$age)
## [1] 32.02676
median(assignment1$age)
## [1] 29
IQR(assignment1$age)
## [1] 33
Question 3.5
summary(assignment1)
## id age marpi adjwlth2
## Min. : 4003 Min. : 1.00 Min. :0.0000 Min. :-2304.98
## 1st Qu.:1269033 1st Qu.: 14.00 1st Qu.:0.0000 1st Qu.: 1.91
## Median :2464171 Median : 29.00 Median :0.0000 Median : 32.80
## Mean :3014466 Mean : 32.03 Mean :0.4178 Mean : 187.17
## 3rd Qu.:5381175 3rd Qu.: 47.00 3rd Qu.:1.0000 3rd Qu.: 143.55
## Max. :6872185 Max. :999.00 Max. :4.0000 Max. :80303.23
## NA's :28 NA's :48
## educ pubhs h_race_ethnic_new race5
## Min. : 0.00 Min. :0.00000 Length:131361 Min. :1.000
## 1st Qu.:12.00 1st Qu.:0.00000 Class :character 1st Qu.:3.000
## Median :12.00 Median :0.00000 Mode :character Median :5.000
## Mean :13.04 Mean :0.05301 Mean :3.927
## 3rd Qu.:15.00 3rd Qu.:0.00000 3rd Qu.:5.000
## Max. :20.00 Max. :1.00000 Max. :5.000
## NA's :2496 NA's :34
psid<-data.frame(assignment1)
hist(psid$pubhs)

psid$pubhs<-factor(psid$pubhs,
levels=c(1,0),
labels=c("Did", "Didnt" ))
prop.table(table(psid$pubhs))
##
## Did Didnt
## 0.05300509 0.94699491
#hist(psid$pubhs, main="Frequency Distribution of pubhs")
barplot(prop.table(table(psid$pubhs)))

barplot(table(psid$race5))

mean(assignment1$pubhs,na.rm = T)
## [1] 0.05300509
median(assignment1$pubhs,na.rm = T)
## [1] 0
summary(assignment1)
## id age marpi adjwlth2
## Min. : 4003 Min. : 1.00 Min. :0.0000 Min. :-2304.98
## 1st Qu.:1269033 1st Qu.: 14.00 1st Qu.:0.0000 1st Qu.: 1.91
## Median :2464171 Median : 29.00 Median :0.0000 Median : 32.80
## Mean :3014466 Mean : 32.03 Mean :0.4178 Mean : 187.17
## 3rd Qu.:5381175 3rd Qu.: 47.00 3rd Qu.:1.0000 3rd Qu.: 143.55
## Max. :6872185 Max. :999.00 Max. :4.0000 Max. :80303.23
## NA's :28 NA's :48
## educ pubhs h_race_ethnic_new race5
## Min. : 0.00 Min. :0.00000 Length:131361 Min. :1.000
## 1st Qu.:12.00 1st Qu.:0.00000 Class :character 1st Qu.:3.000
## Median :12.00 Median :0.00000 Mode :character Median :5.000
## Mean :13.04 Mean :0.05301 Mean :3.927
## 3rd Qu.:15.00 3rd Qu.:0.00000 3rd Qu.:5.000
## Max. :20.00 Max. :1.00000 Max. :5.000
## NA's :2496 NA's :34
psid<-data.frame(assignment1)
hist(psid$race5)

psid$race5<-factor(psid$race5,
levels=c(1,2,3,4,5),
labels=c("Latino","Asian","Black", "Other","White" ))
prop.table(table(psid$race5 ))
##
## Latino Asian Black Other White
## 0.075311546 0.016123507 0.357297828 0.008632699 0.542634420
#hist(psid$race5, main="Frequency Distribution of Race")
barplot(prop.table(table(psid$race5)))

barplot(table(psid$race5))

mean(assignment1$race5,na.rm = T)
## [1] 3.927155
median(assignment1$race5,na.rm = T)
## [1] 5
publicassistance<-
subset(x=assignment1, select=c("race5", "pubhs"))
names(publicassistance)
## [1] "race5" "pubhs"
nrow(publicassistance)
## [1] 131361
ncol(publicassistance)
## [1] 2