4/23 in class

pacman::p_load(tidyverse, gridExtra, ggExtra,
ggfortify, GGally, ggthemes, ggrepel,
gganimate, ggeffects)

## Installing package into 'C:/Users/she22_000/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)

## Warning: package 'gganimate' is not available (for R version 3.4.4)

## Bioconductor version 3.6 (BiocInstaller 1.28.0), ?biocLite for help

## Warning in p_install(package, character.only = TRUE, ...):

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'gganimate'

## Warning in pacman::p_load(tidyverse, gridExtra, ggExtra, ggfortify, GGally, : Failed to install/load:
## gganimate

ex1

dta <- read.table("C:/Users/she22_000/Documents/hs0.txt",header = T)%>% 
  # 讀取資料
 mutate(female = factor(female, levels(female), 
                 labels = c("Female", "Male")), 
      # 根據性別製造出一個變項，分為女性和男性  
        race = factor(race, levels(race),           
               labels = c("Black", "Asian", "Hispanic", 
                          "White")), 
      # 根據種族製造出一個變項，分為黑人、亞洲人、西班牙裔、白人
        ses = ordered(ses, levels = c("low", "middle",  
                      "high"),
              labels = c("Low", "Middle", "High"))) %>%
  # 根據社經地位製造一個變項排序，分為低、中、高社經地位
 mutate(race = reorder(race, math, median)) 
# 種族再根據數學分數的中位數排序

str(dta)

## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
##   ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
##   .. ..- attr(*, "dimnames")=List of 1
##   .. .. ..$ : chr  "Black" "Asian" "Hispanic" "White"
##  $ ses    : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 NA ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...

bw <- with(dta, IQR(math)/(length(math)^(1/3))) # 設定寬度
ggplot() + # 畫圖 觀察變項差異
 stat_bin(data = subset(dta, female=="Male"), binwidth = bw,   
# 根據性別中男性資料作圖，X軸為數學分數，Y軸為density
     aes(math, color = "Male", fill = "Male", y = - ..density.. )) +
 stat_bin(data = subset(dta, female == "Female"), binwidth = bw,   #根據性別中女性資料作圖，X軸為數學分數，Y軸為density
     aes(math, color = "Female", fill = "Female", y = ..density.. )) +
 scale_color_manual(values = c("black", "black"),         #顏色區分
      guide = guide_legend(title = NULL, direction = "horizontal",
                      #  bar外框顏色設定  作圖方向為橫向
       title.position = "top", reverse = TRUE,
       label.position = "bottom", label.hjust = .5, label.vjust = .5,
       label.theme = element_text(angle = 90) ) ) +
 scale_fill_manual(values = c("White", "gray80"),  # bar填滿顏色設定
      guide = guide_legend(title = NULL, reverse = TRUE,
       direction = "horizontal", title.position = "top",
       label.position = "bottom", label.hjust = .5, label.vjust = .5,
       label.theme = element_text(angle = 90))) +
 scale_x_continuous(limits = c(30, 80), breaks=seq(30, 80, by = 5)) +
 labs(x = "Mathematic score", y = "Density") +
 coord_flip() +
 theme_bw() +
 theme(legend.position=c(.9, .85))

ex2

m0 <- lm(math ~ read + write + science + socst + race + ses +
female, data = dta)
dta_m0 <- ggpredict(m0, terms = c("race", "female", "ses"))
plot(dta_m0) + labs(y = "Mean math score", x = "Race")

dta_m1 <- ggpredict(m0, terms = c( "ses" , "race", "female"))
plot(dta_m1) + labs(y = "Mean math score", x = "ses")

dta_m2 <- ggpredict(m0, terms = c("female" , "race",  "ses"))
plot(dta_m2) + labs(y = "Mean math score", x = "female")

# 以性別來說，可看出不論種族男性與社經地位男性的數學分數平均比女性數學分數高
# 以種族來說，亞洲人的數學成績不論社經地位和性別平均皆比較高
# 以社經地位來說，看不出有太大的影響

ex3

dta03 <- read.csv("C:/Users/she22_000/Documents/kdt.csv",sep = "", header = T)
knitr::kable(dta03)

Test	Format	Accuracy	SE
KDT	Picture	93.7	0.9
KDT	Word	96.4	0.7
PPT	Picture	90.6	1.0
PPT	Word	88.9	1.0

ggplot(dta03, aes(Test, Accuracy, fill = Format))+
  geom_bar(stat = "identity", position = position_dodge())+
  geom_errorbar(aes(ymin = Accuracy - SE, ymax = Accuracy + SE), position = position_dodge())+
  coord_cartesian(ylim = c(85, 100))

4/23 in class

jiru shih

2018年4月23日

ex1

ex2

ex3