library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
theURL <- "https://raw.githubusercontent.com/georg4re/LearningR/master/Vocab.csv"
vocab <- read.table(file=theURL, header=TRUE, sep=",")
vocab %>% group_by(year, sex) %>% summarize(mean(education), mean(vocabulary), median(education), median(vocabulary))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## # A tibble: 44 x 6
## # Groups: year [22]
## year sex `mean(education~ `mean(vocabular~ `median(educati~
## <int> <chr> <dbl> <dbl> <dbl>
## 1 1974 Fema~ 11.8 6.08 12
## 2 1974 Male 11.9 5.96 12
## 3 1976 Fema~ 11.6 6.14 12
## 4 1976 Male 12.1 5.95 12
## 5 1978 Fema~ 11.8 6.02 12
## 6 1978 Male 12.4 5.89 12
## 7 1982 Fema~ 12.1 5.77 12
## 8 1982 Male 12.4 5.71 12
## 9 1984 Fema~ 12.4 6.07 12
## 10 1984 Male 12.7 5.96 12
## # ... with 34 more rows, and 1 more variable: `median(vocabulary)` <dbl>
collegeDf = subset(vocab, education>12 )
df <- as_tibble(collegeDf) #To print nicely on Knit
print(df)
## # A tibble: 14,813 x 5
## X year sex education vocabulary
## <int> <int> <chr> <int> <int>
## 1 19740001 1974 Male 14 9
## 2 19740002 1974 Male 16 9
## 3 19740006 1974 Male 16 8
## 4 19740007 1974 Male 17 9
## 5 19740015 1974 Female 14 5
## 6 19740027 1974 Male 15 7
## 7 19740030 1974 Female 13 7
## 8 19740037 1974 Male 14 8
## 9 19740040 1974 Female 16 9
## 10 19740041 1974 Male 16 8
## # ... with 14,803 more rows
library(dplyr)
newCollegeDF <- rename(collegeDf, gender=sex, academic_level=education, lexicon=vocabulary)
df <- as_tibble(newCollegeDF) #To print nicely on Knit
print(df)
## # A tibble: 14,813 x 5
## X year gender academic_level lexicon
## <int> <int> <chr> <int> <int>
## 1 19740001 1974 Male 14 9
## 2 19740002 1974 Male 16 9
## 3 19740006 1974 Male 16 8
## 4 19740007 1974 Male 17 9
## 5 19740015 1974 Female 14 5
## 6 19740027 1974 Male 15 7
## 7 19740030 1974 Female 13 7
## 8 19740037 1974 Male 14 8
## 9 19740040 1974 Female 16 9
## 10 19740041 1974 Male 16 8
## # ... with 14,803 more rows
(newCollegeDF %>% group_by(year, gender) %>% summarize(mean(academic_level), mean(lexicon), median(academic_level), median(lexicon)))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## # A tibble: 44 x 6
## # Groups: year [22]
## year gender `mean(academic_~ `mean(lexicon)` `median(academi~
## <int> <chr> <dbl> <dbl> <dbl>
## 1 1974 Female 15.0 7.43 15
## 2 1974 Male 15.4 7.37 15
## 3 1976 Female 15.1 7.60 15
## 4 1976 Male 15.3 7.26 15
## 5 1978 Female 14.9 7.19 14
## 6 1978 Male 15.6 7.21 15
## 7 1982 Female 15.0 6.97 15
## 8 1982 Male 15.2 6.98 15
## 9 1984 Female 15.0 7.05 15
## 10 1984 Male 15.9 7.21 16
## # ... with 34 more rows, and 1 more variable: `median(lexicon)` <dbl>
newCollegeDF = mutate(newCollegeDF, gender = ifelse(gender == "Female", "F", "M"))
newCollegeDF = mutate(newCollegeDF, academic_level = ifelse(academic_level <= 14, "2 Year College", ifelse(academic_level <= 16, "Undergraduate", "Graduate")))
newCollegeDF <- as_tibble(newCollegeDF)
print(newCollegeDF)
## # A tibble: 14,813 x 5
## X year gender academic_level lexicon
## <int> <int> <chr> <chr> <int>
## 1 19740001 1974 M 2 Year College 9
## 2 19740002 1974 M Undergraduate 9
## 3 19740006 1974 M Undergraduate 8
## 4 19740007 1974 M Graduate 9
## 5 19740015 1974 F 2 Year College 5
## 6 19740027 1974 M Undergraduate 7
## 7 19740030 1974 F 2 Year College 7
## 8 19740037 1974 M 2 Year College 8
## 9 19740040 1974 F Undergraduate 9
## 10 19740041 1974 M Undergraduate 8
## # ... with 14,803 more rows