x<-c(5,10,15,20,25,30)
y<-c(-1,NA,75,3,5,8)
z<-c(5)
xz<-x*z
yz<-y*z
print(xz)
## [1] 25 50 75 100 125 150
print(yz)
## [1] -5 NA 375 15 25 40
library(haven)
library(readr)
stata<- read_dta("C:/Users/maman/OneDrive/DEM Fall 2020/DEM 7273/stata_PSID_w1.dta")
##view the data*
View(stata)
hist(stata$race5)
##select variables into a new data set*
assignment1<-subset(x=stata,select=c("id","age","marpi","adjwlth2","educ","h_race_ethnic_new","race5"))
## 3.1
names(stata)
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
dim(stata)
## [1] 131361 13
# 3.2
stata$race5<-factor(stata$race5,
levels=c(1,2,3,4,5),
labels=c( "Latino", "Asian", "Black", "Other" ,"White"))
barplot(prop.table(table(stata$race5)))
barplot(table(stata$race5))
# 3.3
mean(stata$adjwlth2, na.rm=T)
## [1] 187.1656
median(stata$adjwlth2, na.rm=T)
## [1] 32.804
# 3.4
min(stata$age)
## [1] 1
max(stata$age)
## [1] 999
median(stata$age)
## [1] 29
mean(stata$age)
## [1] 32.02676
IQR(stata$age)
## [1] 33
# 3.5
table(stata$rnthlp)
##
## 0 1
## 128150 3163
dplyr::filter(stata, stata$rnthlp == "1" & stata$race5 == "Latino")
## # A tibble: 153 x 13
## year sex age marpi educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2009 fema~ 28 1 11 14.2 0 1 7.99e+0 7.99e+0
## 2 2009 male 25 1 12 27.4 0 1 -1.46e+1 -1.46e+1
## 3 2009 male 4 0 11 14.2 0 1 7.99e+0 7.99e+0
## 4 2009 male 4 0 12 27.4 0 1 -1.46e+1 -1.46e+1
## 5 2009 male 2 0 11 14.2 0 1 7.99e+0 7.99e+0
## 6 2009 fema~ 1 0 12 27.4 0 1 -1.46e+1 -1.46e+1
## 7 2009 fema~ 28 1 12 27.4 0 1 -1.46e+1 -1.46e+1
## 8 2009 fema~ 5 0 12 27.4 0 1 -1.46e+1 -1.46e+1
## 9 2009 male 27 1 11 14.2 0 1 7.99e+0 7.99e+0
## 10 2003 fema~ 92 0 0 7.58 0 1 4.78e-3 4.78e-3
## # ... with 143 more rows, and 3 more variables: h_race_ethnic_new <chr>,
## # id <dbl>, race5 <fct>
#Question 3.6 Geographical variables such as state or county would be helpful along with employment status, and place of birth (foreign or domestic).
##content
head(stata) #shows the first few rows
## # A tibble: 6 x 13
## year sex age marpi educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2001 male 49 1 9 50.9 0 0 23.0 113.
## 2 2003 male 51 1 9 31.1 0 0 3.83 119.
## 3 2005 male 53 1 9 21.3 0 0 6.55 116.
## 4 2007 male 55 1 9 76.5 0 0 26.3 129.
## 5 2009 male 57 1 9 19.9 0 0 12.1 112.
## 6 2011 male 59 1 10 30.9 0 0 4.82 104.
## # ... with 3 more variables: h_race_ethnic_new <chr>, id <dbl>, race5 <fct>
tail(stata) #shows the last few rows
## # A tibble: 6 x 13
## year sex age marpi educ adjfinc pubhs rnthlp adjwlth1 adjwlth2
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2009 male 29 1 12 75.2 0 0 -20.8 -20.8
## 2 2011 male 31 1 11 65.1 0 0 -18.0 -18.0
## 3 2009 fema~ 22 1 12 30.7 0 0 208. 208.
## 4 2011 fema~ 24 1 12 59.7 0 0 7.66 7.66
## 5 2009 fema~ 2 0 12 30.7 0 0 208. 208.
## 6 2011 fema~ 4 0 12 59.7 0 0 7.66 7.66
## # ... with 3 more variables: h_race_ethnic_new <chr>, id <dbl>, race5 <fct>
##size
dim(stata) #shows number of observations and columns
## [1] 131361 13
nrow(stata) #number of observations
## [1] 131361
ncol(stata) #number of columns/variables
## [1] 13
##summary
colnames(stata) #names of columns
## [1] "year" "sex" "age"
## [4] "marpi" "educ" "adjfinc"
## [7] "pubhs" "rnthlp" "adjwlth1"
## [10] "adjwlth2" "h_race_ethnic_new" "id"
## [13] "race5"
str(stata)
## tibble [131,361 x 13] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:131361] 2001 2003 2005 2007 2009 ...
## ..- attr(*, "label")= chr "Year"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ sex : chr [1:131361] "male" "male" "male" "male" ...
## ..- attr(*, "label")= chr "Sex of respondent"
## ..- attr(*, "format.stata")= chr "%9s"
## $ age : num [1:131361] 49 51 53 55 57 59 47 49 51 53 ...
## ..- attr(*, "label")= chr "Age of respondent"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ marpi : num [1:131361] 1 1 1 1 1 1 0 0 0 0 ...
## ..- attr(*, "label")= chr "Marital pairs indicator"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ educ : num [1:131361] 9 9 9 9 9 10 12 12 12 12 ...
## ..- attr(*, "label")= chr "Years completed education"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ adjfinc : num [1:131361] 50.9 31.1 21.3 76.5 19.9 ...
## ..- attr(*, "label")= chr "Family income in prev yr in 1000s of year 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ pubhs : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "1 = lives in public housing"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ rnthlp : num [1:131361] 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "1 = received govt rent assistance"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ adjwlth1 : num [1:131361] 23.05 3.83 6.55 26.29 12.14 ...
## ..- attr(*, "label")= chr "Wealth (excluding home equity) in 1000s of yr 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ adjwlth2 : num [1:131361] 113 119 116 129 112 ...
## ..- attr(*, "label")= chr "Wealth (including home equity) in 1000s of yr 2000 "
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ h_race_ethnic_new: chr [1:131361] "NL White" "NL White" "NL White" "NL White" ...
## ..- attr(*, "label")= chr "Race/ethnicity updated codes (5/26/14)"
## ..- attr(*, "format.stata")= chr "%16s"
## $ id : num [1:131361] 4003 4003 4003 4003 4003 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ race5 : Factor w/ 5 levels "Latino","Asian",..: 5 5 5 5 5 5 5 5 5 5 ...
summary(stata) #more relevant to small data sets
## year sex age marpi
## Min. :2001 Length:131361 Min. : 1.00 Min. :0.0000
## 1st Qu.:2003 Class :character 1st Qu.: 14.00 1st Qu.:0.0000
## Median :2007 Mode :character Median : 29.00 Median :0.0000
## Mean :2006 Mean : 32.03 Mean :0.4178
## 3rd Qu.:2009 3rd Qu.: 47.00 3rd Qu.:1.0000
## Max. :2011 Max. :999.00 Max. :4.0000
## NA's :28
## educ adjfinc pubhs rnthlp
## Min. : 0.00 Min. :-929.60 Min. :0.00000 Min. :0.00000
## 1st Qu.:12.00 1st Qu.: 24.04 1st Qu.:0.00000 1st Qu.:0.00000
## Median :12.00 Median : 45.18 Median :0.00000 Median :0.00000
## Mean :13.04 Mean : 60.39 Mean :0.05301 Mean :0.02409
## 3rd Qu.:15.00 3rd Qu.: 75.31 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :20.00 Max. :5044.84 Max. :1.00000 Max. :1.00000
## NA's :2496 NA's :48 NA's :34 NA's :48
## adjwlth1 adjwlth2 h_race_ethnic_new id
## Min. :-2467.18 Min. :-2304.98 Length:131361 Min. : 4003
## 1st Qu.: 0.01 1st Qu.: 1.91 Class :character 1st Qu.:1269033
## Median : 9.98 Median : 32.80 Mode :character Median :2464171
## Mean : 129.48 Mean : 187.17 Mean :3014466
## 3rd Qu.: 58.05 3rd Qu.: 143.55 3rd Qu.:5381175
## Max. :80199.41 Max. :80303.23 Max. :6872185
## NA's :48 NA's :48
## race5
## Latino: 9893
## Asian : 2118
## Black :46935
## Other : 1134
## White :71281
##
##
dplyr
1. filter
2. select
3. mutate
4. arrange
5. summarize
6. group_by
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```r
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.