WEEK10 HOMEWORK

Q2

dta2 <- read.table("C:/Users/Hsin/Documents/nlsy86long.csv", header=T,sep=",")

多數和少數總族的數學成績在第一次的測驗到第四次測驗的期間逐漸提升，多數的總族在第一次測驗的數學成績性別差異較大，少數種族則是在第四次測驗的數學成績性別差異較大。

dta2 %>% ggplot()+ geom_smooth(mapping=aes(math,time,color=sex))  +theme_light() + facet_grid(.~race)

## `geom_smooth()` using method = 'loess'

數學和閱讀的成績都隨著年級提升，數學成績的性別差異在四年級時最明顯，閱讀成績的性別差異不大。

dta2 %>% gather(subject,score,8:9)%>%
  ggplot(.,aes(grade,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

Q3

第一張的酗酒死亡率隨年齡越來越高，但21歲前不合法，所以這樣解釋不合理。

dta3 <- read.csv("alcohol_age.csv", header = T,sep=",") %>%
mutate(grp = if_else(Age >= 21, "Yes", "No"))

# plot 1
xyplot(Alcohol ~ Age, group = grp,
       data = dta3, type = c("g", "r", "p"), auto.key = list(column = 2),
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

aggregate(Alcohol ~ grp, FUN = mean, data = dta3)

##   grp  Alcohol
## 1  No 1.032118
## 2 Yes 1.482557

第二張以合法飲酒年齡21為分界，比較合法前後的酗酒死亡率，理所當然合法後比較有酗酒可能。

ggplot(dta3, aes(Age, Alcohol))+
  geom_point(aes(color = grp), na.rm = TRUE)+
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  theme(legend.position = "none")+
  labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

因此第二張圖表比較適合解釋這筆資料

Q4

隨著年齡增長，自殺死亡的比例有上升的趨勢。

dta4 <- read.table("C:/Users/Hsin/Documents/ex4dta.txt", skip = 3,
                   col.names = c("Country","25-34","35-44","45-54","55-64","65-74"),
                   check.names = FALSE) %>% gather(Age, Rate, 2:6)

ggplot(dta4, aes(Age, Rate))+
  geom_boxplot()+
  labs(x = "Age", y = "Deaths per 100,000 from male suicides")

Q5

dta5 <- read.table("coping.txt",header=T)

dta5 %>% gather(emo, value, 1:4) %>% 
  group_by(emo) %>% mutate(emo_m = mean(value)) %>% 
  ggplot(aes(reorder(sbj, value, mean), value, color = emo))+
  stat_summary(fun.data = mean_se, geom = "point")+
  geom_hline(aes(yintercept = emo_m, color = emo), linetype = "dashed")+
  coord_flip()+
  labs(x = "Subject ID", y = "Average emotions score", color = "emotion")+
  theme(legend.position = "top")

dta5 %>% gather(coping, value, 5:8) %>% 
  group_by(coping) %>% mutate(coping_m = mean(value)) %>% 
  ggplot(aes(reorder(sbj, value, mean), value, color = coping))+
  stat_summary(fun.data = mean_se, geom = "point")+
  geom_hline(aes(yintercept = coping_m, color = coping), linetype = "dashed")+
  coord_flip()+
  labs(x = "Subject ID", y = "Average coping score")+
  theme(legend.position = "top")

Q6略

Q7

dta7 <- read.table("beautyCourseEval.txt", header = TRUE) %>% 
  mutate(CourseID = factor(courseID), 
         Minority = factor(minority, levels = c(0, 1), labels = c("Minority", "Others")),
         Tenure = factor(tenure, levels = c(0, 1), labels = c("No", "Tenured")),
         Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female")))

#
xyplot(eval ~ beauty | CourseID, group = Gender,
       data = dta7, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

xyplot(eval ~ beauty | Tenure, group = Gender,
       data = dta7, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

xyplot(eval ~ beauty | Minority, group = Gender,
       data = dta7, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

Q8

dta8 <- sas7bdat::read.sas7bdat("sales.sas7bdat") %>% 
  mutate(region = factor(region, 1:4, c("Nothern", "Southern", "Eastern", "Western")),
         district = factor(district, 1:5, c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, 1:4, c("1st", "2nd", "3rd", "4th")),
         month = factor(month, 1:12, month.abb),
         sales = ifelse(sales < 0, 0, sales),
         market = factor(market))

dta8 %>% 
  ggplot(aes(month, sales, color = quarter))+
  stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
  stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
  stat_smooth(aes(group = 1), method = lm, alpha = .5)+
  ggthemes::theme_economist()+
  facet_wrap(~year)+
  labs(x = "Months", y = "Average Sales", title = "Product Sales", subtitle = "2001-2002")

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

dta8 %>% 
  group_by(product) %>% 
  mutate(m = mean(sales)) %>% 
  ggplot(aes(month, sales, group = product))+
  stat_smooth(method = lm, se = F)+
  stat_summary(fun.y = mean, geom = "point", alpha = .5)+
  stat_summary(aes(group = product), fun.y = mean, geom = "line")+
  geom_hline(aes(yintercept = m), linetype = "dashed", alpha = .5)+
  facet_wrap(year ~ product)+
  ggthemes::theme_economist()+
  ggrepel::geom_text_repel(aes(label = ifelse(sales == 0, as.character(month),"")),
                           color = "red")+
  labs(x = "Months", y = "Average Sales", title = "Product Sales", subtitle = "2001-2002")+
  theme(axis.text.x = element_text(size = rel(.75)))

dta8 %>% 
  ggplot(aes(region, sales, color = district))+
  stat_summary(fun.data = mean_se, geom = "pointrange")+
  facet_wrap(~product)+
  ggthemes::theme_economist()+
  theme(legend.position = "bottom")+
  labs(x = "Region", y = "Average Sales", title = "Product Sales", subtitle = "2001-2002")

WEEK10 HOMEWORK

WHH

20180513

Q2

Q3

Q4

Q5

Q6略

Q7

Q8

Q9略