library(pacman)
p_load(tidyverse)
Question 1
dta <- read.table("hs0.txt", h = T) %>%
mutate(female = factor(female, levels(female),
labels = c("Female", "Male")),
race = factor(race, levels(race),
labels = c("Black", "Asian", "Hispanic",
"White")),
ses = ordered(ses, levels = c("low", "middle",
"high"),
labels = c("Low", "Middle", "High"))) %>%
mutate(race = reorder(race, math, median))
## Warning: package 'bindrcpp' was built under R version 3.4.4
str(dta)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
## ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
## .. ..- attr(*, "dimnames")=List of 1
## .. .. ..$ : chr "Black" "Asian" "Hispanic" "White"
## $ ses : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 NA ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
bw <- with(dta, IQR(math)/(length(math)^(1/3)))
ggplot() +
stat_bin(data = subset(dta, female=="Male"), binwidth = bw,
aes(math, color = "Male", fill = "Male", y = - ..density.. )) +
stat_bin(data = subset(dta, female == "Female"), binwidth = bw,
aes(math, color = "Female", fill = "Female", y = ..density.. )) +
scale_color_manual(values = c("black", "black"),
guide = guide_legend(title = NULL, direction = "horizontal",
title.position = "top", reverse = TRUE,
label.position = "bottom", label.hjust = .5, label.vjust = .5,
label.theme = element_text(angle = 90) ) ) +
scale_fill_manual(values = c("White", "gray80"),
guide = guide_legend(title = NULL, reverse = TRUE,
direction = "horizontal", title.position = "top",
label.position = "bottom", label.hjust = .5, label.vjust = .5,
label.theme = element_text(angle = 90))) +
scale_x_continuous(limits = c(30, 80), breaks=seq(30, 80, by = 5)) +
labs(x = "Mathematic score", y = "Density") +
coord_flip() +
theme_bw() +
theme(legend.position=c(.9, .85))

#這個圖是男女在數學成績上的機率密度長條圖,並轉成橫向來呈現
Question 2
dta <- read.table("hs0.txt", h = T) %>%
mutate(female = factor(female, levels(female),
labels = c("Female", "Male")),
race = factor(race, levels(race),
labels = c("Black", "Asian", "Hispanic",
"White")),
ses = ordered(ses, levels = c("low", "middle",
"high"),
labels = c("Low", "Middle", "High"))) %>%
mutate(race = reorder(race, math, median))
str(dta)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
## ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
## .. ..- attr(*, "dimnames")=List of 1
## .. .. ..$ : chr "Black" "Asian" "Hispanic" "White"
## $ ses : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 NA ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
#使用老師放在Case studies裡面的NLSY裡的summarySE
library(Rmisc)
## Loading required package: plyr
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following object is masked from 'package:mosaic':
##
## count
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
dta1_1<-summarySE(dta,measurevar = "math",groupvars = c("race","female","ses"))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
dta1_1%>%ggplot(aes(race,math,color=female)) +
geom_point(position = position_dodge(0.1),size=1.5) +
facet_grid(~ses) +
geom_errorbar(aes(ymin=math-se,ymax=math+se),width=0.1)
## Warning: Removed 3 rows containing missing values (geom_errorbar).

#這張圖顯示依據性別、社經地位、種族所形成的mean,error bar。和課堂上所顯示的圖來比較發現,課堂上是以預測資料所形成的圖,而習題中則是資料產生的圖。
Question 3
dta3<-read.table("kkk.txt", sep="",header=T)
## Warning in read.table("kkk.txt", sep = "", header = T): incomplete final
## line found by readTableHeader on 'kkk.txt'
head(dta3)
## Test Format Accuracy SE
## 1 KDT Picture 93.7 0.9
## 2 KDT Word 96.4 0.7
## 3 PPT Picture 90.6 1.0
## 4 PPT Word 88.9 1.0
str(dta3)
## 'data.frame': 4 obs. of 4 variables:
## $ Test : Factor w/ 2 levels "KDT","PPT": 1 1 2 2
## $ Format : Factor w/ 2 levels "Picture","Word": 1 2 1 2
## $ Accuracy: num 93.7 96.4 90.6 88.9
## $ SE : num 0.9 0.7 1 1
ggplot(dta3,aes(Test, Accuracy,fill=Format))+
coord_cartesian(ylim=c(80,100))+
geom_bar(stat="identity",position='dodge', colour="black")+
geom_errorbar(aes(ymin = Accuracy-SE , ymax= Accuracy+SE), width =.2,position=position_dodge(.9))+
scale_fill_manual(values=c("#56B4E9","#0072B2"))
