Lab916

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ipumsr)
library(readr)
library(ggplot2)
library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

library(haven)

#1 Create the vectors x, y and z, with values shown below in order. Note, ignore the [1], do not include that in the vector.

x<- c(5, 10, 15, 20, 25, 30)
y <- c(-1, NA, 75,  3,  5,  8)  
z <- c(5)
x

## [1]  5 10 15 20 25 30

## [1] -1 NA 75  3  5  8

## [1] 5

#2. Multiply the first two vectors by the z vector, and store these in new objects. Print these new vectors.

x*z

## [1]  25  50  75 100 125 150

y*z

## [1]  -5  NA 375  15  25  40

A <- c(25, 50,  75, 100, 125, 150, -5, NA, 375,  15,  25,  40)
print(A)

##  [1]  25  50  75 100 125 150  -5  NA 375  15  25  40

#3. Data Management Questions Go to blackboard,content, assignments, stata_PSID_w1.dta Download stata_PSID_w1.dta data onto your computer Please run the following codes to generate a data frame for following questions

library(haven)
library(readr)
stata_PSID_w1 <- read_dta("C:/Users/codar/OneDrive/Documents/R/Fall2020 - 7273/stata_PSID_w1.dta")
View(stata_PSID_w1)
names(stata_PSID_w1)

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

assignment1<-subset(x=stata_PSID_w1,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new"))

##3.1 How many variables are there in this data and what are the variable names, and how many observations in the data file?

ncol(stata_PSID_w1)

## [1] 13

names(stata_PSID_w1)

##  [1] "year"              "sex"               "age"              
##  [4] "marpi"             "educ"              "adjfinc"          
##  [7] "pubhs"             "rnthlp"            "adjwlth1"         
## [10] "adjwlth2"          "h_race_ethnic_new" "id"               
## [13] "race5"

nrow(stata_PSID_w1) #or use 'dim' function to see observations and variables all at once.

## [1] 131361

dim(stata_PSID_w1)

## [1] 131361     13

##3.2 Show the frequency distribution of race/ethnicity variable.

hist(stata_PSID_w1$race5)

##3.3 What’s the mean and median for adjwlth2(wealth including home equity)?

mean(stata_PSID_w1$adjwlth2,na.rm = T)

## [1] 187.1656

median(stata_PSID_w1$adjwlth2,na.rm = T)

## [1] 32.804

#or using subset created from #3 instructions
mean(assignment1$adjwlth2, na.rm = T)

## [1] 187.1656

median(assignment1$adjwlth2, na.rm = T)

## [1] 32.804

##3.4 Generate five summary statistics for age (i.e., min, max, IQR, mean, and median)

min(stata_PSID_w1$age)

## [1] 1

max(stata_PSID_w1$age)

## [1] 999

IQR(stata_PSID_w1$age)

## [1] 33

median(stata_PSID_w1$age)

## [1] 29

mean(stata_PSID_w1$age)

## [1] 32.02676

##3.5 How many people in the data received public assistance? How many Latinos received public assistance?

#finding conditionally met observations by creating a subset 
recpubhs <- subset(x=stata_PSID_w1, pubhs > "0") 
View(recpubhs)
count(recpubhs)

## # A tibble: 1 x 1
##       n
##   <int>
## 1  6961

#how many Latinos received pubhs
nrow(subset(stata_PSID_w1, pubhs > 0 & race5 == 1))

## [1] 366

#I also used this command to find how many observations overall recieved pubhs and got same result as subset function
nrow(subset(stata_PSID_w1, pubhs > 0))

## [1] 6961

##3.6 Anything you wish to know about individuals’ experiences that are not included in the data set? (Note: unit of analysis is individual here. Open-ended question. E.g., occupation, childhood maltreatment, neighborhood characteristics, etc). List three variables that you wish you had access to.

#occupation
#urban/rural (or zip code)
#number of children

Lab916

Coda Rayo-Garza

9/16/2020