library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(haven)
HSBS1980 <- read_dta("hsb2.dta")
View(HSBS1980)
glimpse(HSBS1980)
## Rows: 200
## Columns: 11
## $ id <dbl> 70, 121, 86, 141, 172, 113, 50, 11, 84, 48, 75, 60, 95, 104, 3…
## $ female <dbl+lbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ race <dbl+lbl> 4, 4, 4, 4, 4, 4, 3, 1, 4, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4…
## $ ses <dbl+lbl> 1, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1, 3, 2, 3, 2…
## $ schtyp <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1…
## $ prog <dbl+lbl> 1, 3, 1, 3, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 1, 2, 1, 2, 1…
## $ read <dbl> 57, 68, 44, 63, 47, 44, 50, 34, 63, 57, 60, 57, 73, 54, 45, 42…
## $ write <dbl> 52, 59, 33, 44, 52, 52, 59, 46, 57, 55, 46, 65, 60, 63, 57, 49…
## $ math <dbl> 41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43…
## $ science <dbl> 47, 63, 58, 53, 53, 63, 53, 39, 58, 50, 53, 63, 61, 55, 31, 50…
## $ socst <dbl> 57, 61, 31, 56, 61, 61, 61, 36, 51, 51, 61, 61, 71, 46, 56, 56…
str function can
help…str(HSBS1980$prog)
## dbl+lbl [1:200] 1, 3, 1, 3, 2, 2, 1, 2, 1, 2, 3, 2, 2, 2, 2, 1, 2, 1, 2, 1...
## @ label : chr "type of program"
## @ format.stata: chr "%9.0g"
## @ labels : Named num [1:3] 1 2 3
## ..- attr(*, "names")= chr [1:3] "general" "academic" "vocation"
describe Function from
the psych Packagedescribe(HSBS1980$prog)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 200 2.02 0.69 2 2.02 0 1 3 2 -0.03 -0.91 0.05
table commandtable(HSBS1980$prog)
##
## 1 2 3
## 45 105 50
library(expss)
## Loading required package: maditr
##
## To get total summary skip 'by' argument: take_all(mtcars, mean)
##
## Attaching package: 'maditr'
## The following objects are masked from 'package:dplyr':
##
## between, coalesce, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following object is masked from 'package:readr':
##
## cols
##
## Attaching package: 'expss'
## The following objects are masked from 'package:haven':
##
## is.labelled, read_spss
## The following objects are masked from 'package:stringr':
##
## fixed, regex
## The following objects are masked from 'package:dplyr':
##
## compute, contains, na_if, recode, vars
## The following objects are masked from 'package:purrr':
##
## keep, modify, modify_if, when
## The following objects are masked from 'package:tidyr':
##
## contains, nest
## The following object is masked from 'package:ggplot2':
##
## vars
val_lab(HSBS1980$prog)
## general academic vocation
## 1 2 3
add_val_lab(HSBS1980$prog) = num_lab("
1 general
2 academic
3 vocation
")
table(HSBS1980$prog)
##
## general academic vocation
## 45 105 50
str(HSBS1980$ses)
## dbl+lbl [1:200] 1, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1, 3, 2, 3, 2...
## @ format.stata: chr "%9.0g"
## @ labels : Named num [1:3] 1 2 3
## ..- attr(*, "names")= chr [1:3] "low" "middle" "high"
lowses=subset(HSBS1980,ses==1)
midses=subset(HSBS1980,ses==2)
highses=subset(HSBS1980,ses==3)
#Histograms#
describe(HSBS1980$write)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 200 52.77 9.48 54 53.36 11.86 31 67 36 -0.47 -0.78 0.67
hist(lowses$write)
hist(midses$write)
hist(highses$write)
ggplot(data = lowses, mapping = aes(x = write)) + geom_bar() +
labs(title = "Distribution of Writing Scores for Low SES Individuals",
x = "Writing Scores",
caption = "Data from the High School and Beyond Survey (1980). N = 200")
ggplot(data = midses, mapping = aes(x = write)) + geom_bar() +
labs(title = "Distribution of Writing Scores for Middle SES Individuals",
x = "Writing Scores",
caption = "Data from the High School and Beyond Survey (1980). N = 200")
ggplot(data = highses, mapping = aes(x = write)) + geom_bar() +
labs(title = "Distribution of Writing Scores for High SES Individuals",
x = "Writing Scores",
caption = "Data from the High School and Beyond Survey (1980). N = 200")
tinytex::install_tinytex(force = TRUE)