Q1

這個圖是畫出男女數學成績的直方圖，左右顯示。

library(pacman)
p_load(tidyverse)

dta <- read.table("hs0.txt", h = T) %>%
  mutate(female = factor(female, levels(female), 
                         labels = c("Female", "Male")),
         race = factor(race, levels(race), 
                       labels = c("Black", "Asian", "Hispanic", 
                                  "White")),
         ses = ordered(ses, levels = c("low", "middle", 
                                       "high"),
                       labels = c("Low", "Middle", "High"))) %>%
  mutate(race = reorder(race, math, median))

str(dta)

'data.frame':   200 obs. of  11 variables:
 $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
 $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
 $ race   : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
  ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
  .. ..- attr(*, "dimnames")=List of 1
  .. .. ..$ : chr  "Black" "Asian" "Hispanic" "White"
 $ ses    : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
 $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
 $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
 $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
 $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
 $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
 $ science: int  47 63 58 53 53 63 53 39 58 NA ...
 $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...

bw <- with(dta, IQR(math)/(length(math)^(1/3)))
ggplot() +
  stat_bin(data = subset(dta, female=="Male"), binwidth = bw,                         #分別畫出男女的直方圖
           aes(math, color = "Male", fill = "Male", y = - ..density.. )) +
  stat_bin(data = subset(dta, female == "Female"), binwidth = bw,
           aes(math, color = "Female", fill = "Female", y = ..density.. )) +
  scale_color_manual(values = c("black", "black"),
                     guide = guide_legend(title = NULL, direction = "horizontal",
                                          title.position = "top", reverse = TRUE,
                                          label.position = "bottom", label.hjust = .5, label.vjust = .5,
                                          label.theme = element_text(angle = 90) ) ) +
  scale_fill_manual(values = c("White", "gray80"),
                    guide = guide_legend(title = NULL, reverse = TRUE,
                                         direction = "horizontal", title.position = "top",
                                         label.position = "bottom", label.hjust = .5, label.vjust = .5,
                                         label.theme = element_text(angle = 90))) +
  scale_x_continuous(limits = c(30, 80), breaks=seq(30, 80, by = 5)) +               # 畫x座標
  labs(x = "Mathematic score", y = "Density") +
  coord_flip() +                                                                     # 反轉
  theme_bw() +
  theme(legend.position=c(.9, .85))

Q2

與講義上面的圖不一樣，一個是源於raw data，一個是lm估出來的。有些只有一個資料點，則沒有error bar.

#arrange data 
theme_set(theme_bw())
dta2 <- read.table("hs0.txt", h = T) %>%
  mutate(female = factor(female, levels(female), 
                         labels = c("Female", "Male")),
         race = factor(race, levels(race), 
                       labels = c("Black", "Asian", "Hispanic", 
                                  "White")),
         ses = ordered(ses, levels = c("low", "middle", 
                                       "high"),
                       labels = c("Low", "Middle", "High")))

#plot
dta2 %>% group_by(female, race, ses) %>%
  summarise(m_math = mean(math),
            se_math = sd(math)/sqrt(n())) %>%
  ggplot(aes(race, m_math, color = female)) +
  geom_point(aes(race, m_math), position = position_dodge(.3)) +
  geom_errorbar(aes(ymin = m_math - se_math,
                    ymax = m_math + se_math), width = .2, position = position_dodge(.3)) +
  facet_grid(. ~ ses) +
  labs(y = "Mean Math score", x = "race")

Q3

#read data
dta3 <- data.table::fread("kdt.csv",  h = T) %>% 
          mutate(Test = factor(Test),
                 Format = factor(Format))

#plot
ggplot(dta3, aes(Test, Accuracy, fill = Format, color = Format)) + 
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Accuracy - SE,
                    ymax = Accuracy + SE), width = .2, position = position_dodge(width = 0.9)) +
  scale_color_manual(values = c("black", "black"),
                     guide = guide_legend(title = NULL)) +
  scale_fill_manual(values = c("gray60", "gray30"),
                    guide = guide_legend(title = NULL)) +
  coord_cartesian(ylim = c(85, 100)) +
  labs(y = "Accuracy(%)", x = "Test")

inclass_w9

Pei Jun

2018-04-23

Q1

Q2

Q3