The data set Vocab{car} gives observations on gender, education and vocabulary, from respondents to U.S. General Social Surveys, 1972-2004. Summarize the relationship between education and vocabulary over the years by gender.
library(car)
## Loading required package: carData
head(Vocab)
## year sex education vocabulary
## 19740001 1974 Male 14 9
## 19740002 1974 Male 16 9
## 19740003 1974 Female 10 9
## 19740004 1974 Female 10 5
## 19740005 1974 Female 12 8
## 19740006 1974 Male 16 8
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
#將資料分組year, sex後進行平均值運算
Vocab_mean<-Vocab %>% group_by(year, sex) %>%
summarise(medu= mean(education), mvoc=mean(vocabulary)) %>% head
The ‘MASS’ library has these two data sets: ‘Animals’ and ‘mammals’. Merge the two files and remove duplicated observations using ‘duplicated’
#load data
dta_a <- MASS::Animals
dta_b <- MASS::mammals
head(dta_a)
## body brain
## Mountain beaver 1.35 8.1
## Cow 465.00 423.0
## Grey wolf 36.33 119.5
## Goat 27.66 115.0
## Guinea pig 1.04 5.5
## Dipliodocus 11700.00 50.0
head(dta_b)
## body brain
## Arctic fox 3.385 44.5
## Owl monkey 0.480 15.5
## Mountain beaver 1.350 8.1
## Cow 465.000 423.0
## Grey wolf 36.330 119.5
## Goat 27.660 115.0
#assign row names to "names"(new variable)
dta_a$names <- row.names(dta_a)
dta_b$names <- row.names(dta_b)
#merge to a new data frame
dta_all <-rbind.data.frame(dta_a, dta_b)
str(dta_all)
## 'data.frame': 90 obs. of 3 variables:
## $ body : num 1.35 465 36.33 27.66 1.04 ...
## $ brain: num 8.1 423 119.5 115 5.5 ...
## $ names: chr "Mountain beaver" "Cow" "Grey wolf" "Goat" ...
#how many duplications
sum(duplicated(dta_all$names))
## [1] 23
#remove duplicated rows
dta_all <- dta_all[-which(duplicated(dta_all$names)),]
str(dta_all)
## 'data.frame': 67 obs. of 3 variables:
## $ body : num 1.35 465 36.33 27.66 1.04 ...
## $ brain: num 8.1 423 119.5 115 5.5 ...
## $ names: chr "Mountain beaver" "Cow" "Grey wolf" "Goat" ...
Convert the data set probe words from long to wide format as described
#load data
dta6 <- read.table("probeL.txt", header= T)
str(dta6)
## 'data.frame': 55 obs. of 3 variables:
## $ ID : Factor w/ 11 levels "S01","S02","S03",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Response_Time: int 51 36 50 35 42 27 20 26 17 27 ...
## $ Position : int 1 2 3 4 5 1 2 3 4 5 ...
# use 'spread' from long form to wide form
dtaL <- dta6 %>% tidyr::spread(., key= Position, value=Response_Time)
head(dtaL)
## ID 1 2 3 4 5
## 1 S01 51 36 50 35 42
## 2 S02 27 20 26 17 27
## 3 S03 37 22 41 37 30
## 4 S04 42 36 32 34 27
## 5 S05 27 18 33 14 29
## 6 S06 43 32 43 35 40
#rename columns
colnames(dtaL)[-1]<- paste("Pos_",1:5, sep="")
head(dtaL)
## ID Pos_1 Pos_2 Pos_3 Pos_4 Pos_5
## 1 S01 51 36 50 35 42
## 2 S02 27 20 26 17 27
## 3 S03 37 22 41 37 30
## 4 S04 42 36 32 34 27
## 5 S05 27 18 33 14 29
## 6 S06 43 32 43 35 40