507homework

pacman::p_load(tidyverse, lattice, magrittr, dplyr, ggplot2 )

2

dta2 <- read.table("C:/Users/ILT/Documents/nlsy86long.csv",header = T,sep=",")

畫圖:性別與種族在閱讀跟數學兩項成績分布

dta2 %>% 
  ggplot()+ geom_smooth(mapping=aes(math,read,color=race))  +theme_light() + facet_grid(.~sex)

## `geom_smooth()` using method = 'loess'

看起來沒有太大差異

畫圖:總成績與年級、性別關係

dta2 %>% 
  gather(subject,score,8:9) %>%
  ggplot(.,aes(grade,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

年級上升男生的數學與女生的閱讀能力好像漸漸有差異

3

dta3 <- read.csv("C:/Users/ILT/Documents/alcohol_age.csv", header = T,sep=",") %>% mutate(over21=ifelse(Age>=21,"yes","no"))

dta3 %>% 
  xyplot(Alcohol ~ Age, group = over21,
       data =., type = c("g", "r", "p"), 
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

年齡對於酗酒死亡率的效果

aggregate(Alcohol ~ over21, FUN = mean,data=dta3)

##   over21  Alcohol
## 1     no 1.032118
## 2    yes 1.482557

ggplot(dta3, aes(Age, Alcohol))+
  geom_point(aes(color = over21), na.rm = TRUE)+
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  theme(legend.position = "none")+
  labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

比較不同年齡組的平均差異

5

畫圖:情緒與情境的關係

dta5 <- read.table("C:/Users/ILT/Documents/coping.txt", h = T)

dta5 %>% gather(emotion,e_score,c(1:4,8)) %>%
  ggplot(.,aes(situation,e_score,color = emotion)) +
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  theme_bw()+
  labs(x="Situation",y="Score")

策略與情境的關係

dta5 %>% gather(coping, c_score, 5:7) %>%
  ggplot(.,aes(situation,c_score,color = coping)) +
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  theme_bw()+
  labs(x="Situation",y="Score")

7

dta7 <- read.table("C:/Users/ILT/Documents/beautyCourseEval.txt", header = TRUE) %>% 
  mutate(CourseID = factor(courseID), 
         Minority = factor(minority, levels = c(0, 1), labels = c("Minority", "Others")),
         Tenure = factor(tenure, levels = c(0, 1), labels = c("No", "Tenured")),
         Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female")))

xyplot(eval ~ beauty | CourseID, group = Gender,
       data = dta7, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

8

library(sas7bdat)
dta8 <- read.sas7bdat("C:/Users/ILT/Documents/sales.sas7bdat", debug=FALSE)

dta8 <- dta8 %>%
  mutate(region = factor(region, levels = 1:4, 
                          labels = c("Northern", "Southern", "Eastern","Western")),
         district = factor(district, levels = 1:5,
                           labels = c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, levels = 1:4,
                          labels = c("1st", "2nd", "3rd", "4th")),
         month = factor(month, levels = 1:12,
                        labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))

dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)

畫圖:月

ggplot(dta8, aes(month, sales, color = product)) + 
  geom_point() +
  geom_line(aes(group = product)) +
  facet_wrap(~ year) + 
  labs(x = "Month",y = "sales") +
  theme_bw()

畫圖:季

ggplot(dta8,aes(quarter, sales, color = product)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  facet_wrap(~ year) +
  theme_bw()+
  labs(x="quarter",y="sales")

507homework

TZUYING

2018年5月14日

2

3

5

7

8