library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ipumsr)
library(readr)
library(ggplot2)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(haven)
#1 Create the vectors x, y and z, with values shown below in order. Note, ignore the [1], do not include that in the vector.
x<- c(5, 10, 15, 20, 25, 30)
y <- c(-1, NA, 75, 3, 5, 8)
z <- c(5)
x
## [1] 5 10 15 20 25 30
y
## [1] -1 NA 75 3 5 8
z
## [1] 5
#2. Multiply the first two vectors by the z vector, and store these in new objects. Print these new vectors.
x*z
## [1] 25 50 75 100 125 150
y*z
## [1] -5 NA 375 15 25 40
A <- c(25, 50, 75, 100, 125, 150, -5, NA, 375, 15, 25, 40)
print(A)
## [1] 25 50 75 100 125 150 -5 NA 375 15 25 40
#3. Data Management Questions Go to blackboard,content, assignments, stata_PSID_w1.dta Download stata_PSID_w1.dta data onto your computer Please run the following codes to generate a data frame for following questions
library(haven)
library(readr)
stata_PSID_w1 <- read_dta("C:/Users/codar/OneDrive/Documents/R/Fall2020 - 7273/stata_PSID_w1.dta")
View(stata_PSID_w1)
names(stata_PSID_w1)
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
assignment1<-subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new"))
##3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?
ncol(stata_PSID_w1)
## [1] 13
names(stata_PSID_w1)
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
nrow(stata_PSID_w1) #or use 'dim' function to see observations and variables all at once.
## [1] 131361
dim(stata_PSID_w1)
## [1] 131361 13
##3.2 Show the frequency distribution of race/ethnicity variable.
hist(stata_PSID_w1$race5)
##3.3 What’s the mean and median for adjwlth2(wealth including home equity)?
mean(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 187.1656
median(stata_PSID_w1$adjwlth2,na.rm = T)
## [1] 32.804
#or using subset created from #3 instructions
mean(assignment1$adjwlth2, na.rm = T)
## [1] 187.1656
median(assignment1$adjwlth2, na.rm = T)
## [1] 32.804
##3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)
min(stata_PSID_w1$age)
## [1] 1
max(stata_PSID_w1$age)
## [1] 999
IQR(stata_PSID_w1$age)
## [1] 33
median(stata_PSID_w1$age)
## [1] 29
mean(stata_PSID_w1$age)
## [1] 32.02676
##3.5 How many people in the data received public assistance? How many Latinos received public assistance?
#finding conditionally met observations by creating a subset
recpubhs <- subset(x=stata_PSID_w1, pubhs > "0")
View(recpubhs)
count(recpubhs)
## # A tibble: 1 x 1
## n
## <int>
## 1 6961
#how many Latinos received pubhs
nrow(subset(stata_PSID_w1, pubhs > 0 & race5 == 1))
## [1] 366
#I also used this command to find how many observations overall recieved pubhs and got same result as subset function
nrow(subset(stata_PSID_w1, pubhs > 0))
## [1] 6961
##3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.
#occupation
#urban/rural (or zip code)
#number of children