Data wrangling In-class exercises

#set-up
options(digits = 4, show.signif.stars = FALSE)
pacman::p_load(mlmRev, HSAUR3, knitr, kableExtra,
               readr, dplyr, ggplot2, tidyr, car,
               magrittr, tibble, purrr, stringr)

EX1

dta1 <- read.csv("nlsy86long.csv")
head(dta1)

##     id    sex     race time grade year month   math   read
## 1 2390 Female Majority    1     0    6    67 14.286 19.048
## 2 2560 Female Majority    1     0    6    66 20.238 21.429
## 3 3740 Female Majority    1     0    6    67 17.857 21.429
## 4 4020   Male Majority    1     0    5    60  7.143  7.143
## 5 6350   Male Majority    1     1    7    78 29.762 30.952
## 6 7030   Male Majority    1     0    5    62 14.286 17.857

dta1 %>%
  gather(key = test_var, value = test_score, 8:9) %>% 
  head

##     id    sex     race time grade year month test_var test_score
## 1 2390 Female Majority    1     0    6    67     math     14.286
## 2 2560 Female Majority    1     0    6    66     math     20.238
## 3 3740 Female Majority    1     0    6    67     math     17.857
## 4 4020   Male Majority    1     0    5    60     math      7.143
## 5 6350   Male Majority    1     1    7    78     math     29.762
## 6 7030   Male Majority    1     0    5    62     math     14.286

EX2

dta2 <- car::Vocab   
head(dta2)

##          year    sex education vocabulary
## 20040001 2004 Female         9          3
## 20040002 2004 Female        14          6
## 20040003 2004   Male        14          9
## 20040005 2004 Female        17          8
## 20040008 2004   Male        14          1
## 20040010 2004   Male        14          7

str(dta2)

## 'data.frame':    21638 obs. of  4 variables:
##  $ year      : int  2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
##  $ sex       : Factor w/ 2 levels "Female","Male": 1 1 2 1 2 2 1 2 2 1 ...
##  $ education : int  9 14 14 17 14 14 12 10 11 9 ...
##  $ vocabulary: int  3 6 9 8 1 7 6 6 5 1 ...

ggplot(dta2, aes(x = education, y = vocabulary, color = sex))+
  geom_point()+
  stat_smooth(method = "lm")+
  facet_wrap(~ year)

EX3

#載入資料
source("passwd.txt")
link3 <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Data/probeL.txt")
dta3 <- read.table(link3, header = T)
head(dta3)

##    ID Response_Time Position
## 1 S01            51        1
## 2 S01            36        2
## 3 S01            50        3
## 4 S01            35        4
## 5 S01            42        5
## 6 S02            27        1

dta3W <- dta3 %>% 
  mutate(pre = rep("Pos", dim(dta3)[1])) %>% 
  unite( Position_new, pre, Position) %>% 
  spread(Position_new, Response_Time) %>% 
  arrange(ID)

## Warning: package 'bindrcpp' was built under R version 3.4.3

head(dta3W)

##    ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1 S01    51    36    50    35    42
## 2 S02    27    20    26    17    27
## 3 S03    37    22    41    37    30
## 4 S04    42    36    32    34    27
## 5 S05    27    18    33    14    29
## 6 S06    43    32    43    35    40

EX4

source("passwd.txt")
link4.1 <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Rdw/data/nobel_countries.txt")
country <- read.table(link4.1, header = T)
link4.2 <- paste0("http://", IDPW, "140.116.183.121/~sheu/dataM/Rdw/data/nobel_winners.txt")
winner <- read.table(link4.2, header = T)

semi_join(country, winner)

## Joining, by = "Year"

##   Country Year
## 1  France 2014
## 2      UK 1950
## 3      UK 2017
## 4      US 2016
## 5  Canada 2013
## 6   China 2012

left_join(country, winner)

## Joining, by = "Year"

##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>

anti_join(country, winner)

## Joining, by = "Year"

##   Country Year
## 1  Russia 2015
## 2  Sweden 2011

full_join(country, winner)

## Joining, by = "Year"

##   Country Year              Name Gender
## 1  France 2014   Patrick Modiano   Male
## 2      UK 1950 Bertrand  Russell   Male
## 3      UK 2017    Kazuo Ishiguro   Male
## 4      US 2016        Bob  Dylan   Male
## 5  Canada 2013      Alice  Munro Female
## 6   China 2012            Mo Yan   Male
## 7  Russia 2015              <NA>   <NA>
## 8  Sweden 2011              <NA>   <NA>
## 9    <NA> 1938        Pearl Buck Female

Data wrangling In-class exercises

BHWang

20180326

EX1

EX2

EX3

EX4