InClass Q4

The data set Vocab{car} gives observations on gender, education and vocabulary, from respondents to U.S. General Social Surveys, 1972-2004. Summarize the relationship between education and vocabulary over the years by gender.

library(car)
## Loading required package: carData
head(Vocab)
##          year    sex education vocabulary
## 19740001 1974   Male        14          9
## 19740002 1974   Male        16          9
## 19740003 1974 Female        10          9
## 19740004 1974 Female        10          5
## 19740005 1974 Female        12          8
## 19740006 1974   Male        16          8
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
#將資料分組year, sex後進行平均值運算
Vocab_mean<-Vocab %>% group_by(year, sex) %>% 
  summarise(medu= mean(education), mvoc=mean(vocabulary)) %>% head

InClass Q5

The ‘MASS’ library has these two data sets: ‘Animals’ and ‘mammals’. Merge the two files and remove duplicated observations using ‘duplicated’

#load data
dta_a <- MASS::Animals
dta_b <- MASS::mammals
head(dta_a)
##                     body brain
## Mountain beaver     1.35   8.1
## Cow               465.00 423.0
## Grey wolf          36.33 119.5
## Goat               27.66 115.0
## Guinea pig          1.04   5.5
## Dipliodocus     11700.00  50.0
head(dta_b)
##                    body brain
## Arctic fox        3.385  44.5
## Owl monkey        0.480  15.5
## Mountain beaver   1.350   8.1
## Cow             465.000 423.0
## Grey wolf        36.330 119.5
## Goat             27.660 115.0
#assign row names to "names"(new variable)
dta_a$names <- row.names(dta_a)
dta_b$names <- row.names(dta_b)
#merge to a new data frame
dta_all <-rbind.data.frame(dta_a, dta_b)
str(dta_all)
## 'data.frame':    90 obs. of  3 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
##  $ names: chr  "Mountain beaver" "Cow" "Grey wolf" "Goat" ...
#how many duplications
sum(duplicated(dta_all$names))
## [1] 23
#remove duplicated rows
dta_all <- dta_all[-which(duplicated(dta_all$names)),]
str(dta_all)
## 'data.frame':    67 obs. of  3 variables:
##  $ body : num  1.35 465 36.33 27.66 1.04 ...
##  $ brain: num  8.1 423 119.5 115 5.5 ...
##  $ names: chr  "Mountain beaver" "Cow" "Grey wolf" "Goat" ...

InClass Q6

Convert the data set probe words from long to wide format as described

#load data
dta6 <- read.table("probeL.txt", header= T)
str(dta6)
## 'data.frame':    55 obs. of  3 variables:
##  $ ID           : Factor w/ 11 levels "S01","S02","S03",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Response_Time: int  51 36 50 35 42 27 20 26 17 27 ...
##  $ Position     : int  1 2 3 4 5 1 2 3 4 5 ...
# use 'spread' from long form to wide form
dtaL <- dta6 %>% tidyr::spread(., key= Position, value=Response_Time)
head(dtaL)
##    ID  1  2  3  4  5
## 1 S01 51 36 50 35 42
## 2 S02 27 20 26 17 27
## 3 S03 37 22 41 37 30
## 4 S04 42 36 32 34 27
## 5 S05 27 18 33 14 29
## 6 S06 43 32 43 35 40
#rename columns
colnames(dtaL)[-1]<- paste("Pos_",1:5, sep="")
head(dtaL)
##    ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1 S01    51    36    50    35    42
## 2 S02    27    20    26    17    27
## 3 S03    37    22    41    37    30
## 4 S04    42    36    32    34    27
## 5 S05    27    18    33    14    29
## 6 S06    43    32    43    35    40