Data Management HW W10 EX02-8

library(ggplot2)
library(lattice)
library(magrittr)
library(dplyr)
library(tidyverse)

EX02

dta02 <- read.csv("nlsy86long.csv", header = TRUE) 

ggplot(dta02, aes(read, math, color = sex))+
  geom_point()+
  geom_smooth(method = lm)+
  facet_grid(.~race)

閱讀和數學成績呈正相關(沒有明顯性別或種族差異)

dta02 %>% 
  gather(subject,score,8:9) %>%
  ggplot(.,aes(year,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

整體而言，男女生在數學和閱讀成績上未有明顯差異。

EX03

dta03 <- read.csv("alcohol_age.csv", header = TRUE) %>%
  mutate(Over21 = ifelse(Age >= 21, "Yes", "No"))

# 圖一
xyplot(Alcohol ~ Age, group = Over21,
       data =dta03, type = c("g", "r", "p"), 
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

# 圖二
ggplot(dta03, aes(Age, Alcohol, color = Over21)) +
  geom_point(na.rm=TRUE) +
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  labs(x = "Age(year)", y = "Mortality rate from alcohol abuse (per 100,000)")

圖二能清楚的表達是否達到法定年齡的死亡率差異，是更好的選擇。

EX04

dta04 <- read.table("dta04.txt", skip = 3,
                   col.names = c("Country","25-34","35-44","45-54","55-64","65-74"),
                   check.names = FALSE) %>% gather(Age, Rate, 2:6)
ggplot(dta04, aes(Age, Rate))+
  geom_boxplot()+
  labs(x = "Age", y = "Deaths per 100,000 from male suicides")

EX05

dta05 <- read.table("coping.txt", header = TRUE)

# 情境與情緒的關聯
dta05 %>% gather(emotion,score,1:4) %>% 
  ggplot(.,aes(situation,score,color = emotion)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="Situation",y="Score")

不論哪種情境annoy的分數都最高。

#策略與情境的關聯
dta05 %>% gather(coping,score,5:8) %>% 
  ggplot(.,aes(situation,score,color=coping)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="Situation",y="Score")

EX06

EX07

dta07 <- read.table("beautyCourseEval.txt", header = TRUE) %>%
mutate(CourseID = factor(courseID),
         Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female"))) 

  xyplot(eval ~ beauty | CourseID, group = Gender,
       data = dta07, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

EX08

library(sas7bdat)
dta08 <- sas7bdat::read.sas7bdat("sales.sas7bdat")
dta8 <- dta08 %>%
  mutate(region = factor(region, levels = 1:4, 
                          labels = c("Northern", "Southern", "Eastern","Western")),
         district = factor(district, levels = 1:5,
                           labels = c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, levels = 1:4,
                          labels = c("1st", "2nd", "3rd", "4th")),
         month = factor(month, levels = 1:12,
                        labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))

dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)

ggplot(dta8, aes(month, sales, color = product)) + 
  geom_point() +
  geom_line(aes(group = product)) +
  facet_wrap(~ year) + 
  labs(x = "Month",y = "sales") +
  theme_bw()

Data Management HW W10 EX02-8

Yu-Jou Lin

EX02

EX03

EX04

EX05

EX06

EX07

EX08