HW2

pacman::p_load(tidyverse, lattice, magrittr, dplyr, ggplot2 )
dta2 <- read.csv("nlsy86long.csv",h=T,sep=",")
str(dta2)
## 'data.frame':    664 obs. of  9 variables:
##  $ id   : int  2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
##  $ sex  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 2 1 2 ...
##  $ race : Factor w/ 2 levels "Majority","Minority": 1 1 1 1 1 1 1 1 1 1 ...
##  $ time : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ grade: int  0 0 0 0 1 0 0 0 0 0 ...
##  $ year : int  6 6 6 5 7 5 6 7 6 6 ...
##  $ month: int  67 66 67 60 78 62 66 79 76 67 ...
##  $ math : num  14.29 20.24 17.86 7.14 29.76 ...
##  $ read : num  19.05 21.43 21.43 7.14 30.95 ...
head(dta2)
##     id    sex     race time grade year month      math      read
## 1 2390 Female Majority    1     0    6    67 14.285714 19.047619
## 2 2560 Female Majority    1     0    6    66 20.238095 21.428571
## 3 3740 Female Majority    1     0    6    67 17.857143 21.428571
## 4 4020   Male Majority    1     0    5    60  7.142857  7.142857
## 5 6350   Male Majority    1     1    7    78 29.761905 30.952381
## 6 7030   Male Majority    1     0    5    62 14.285714 17.857143
dta2 %>% 
  ggplot()+ geom_smooth(mapping=aes(math,read,color=race))  +theme_light() + facet_grid(.~sex)
## `geom_smooth()` using method = 'loess'

#男女在數學成績上差異不大
dta2 %>% 
  gather(subject,score,8:9) %>%
  ggplot(.,aes(grade,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

#閱讀成績差異不大,數學成績男生隨著成齡成長略高於女生

HW3

dta3 <- read.csv("alcohol_age.csv",h=T,sep=",") %>%  mutate(grp = if_else(Age >= 21, "Yes", "No"))
xyplot(Alcohol ~ Age, group = grp,
       data = dta3, type = c("g", "r", "p"), auto.key = list(column = 2),
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

#年齡對於酗酒死亡率的效果
aggregate(Alcohol ~ grp, FUN = mean, data = dta3) 
##   grp  Alcohol
## 1  No 1.032118
## 2 Yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
  geom_point(aes(color = grp), na.rm = TRUE)+
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  theme(legend.position = "none")+
  labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

#不同年齡在酗酒死亡率的差異

HW4

dta4 <- read.table("dta4.txt", skip = 3,
                   col.names = c("Country","25-34","35-44","45-54","55-64","65-74"),
                   check.names = FALSE) %>% gather(Age, Rate, 2:6)
ggplot(dta4, aes(Age, Rate))+
  geom_boxplot()+
  labs(x = "Age", y = "Deaths per 100,000 from male suicides")

HW5

dta5 <- read.csv("coping.txt",h=T,sep=" ")
str(dta5)
## 'data.frame':    84 obs. of  10 variables:
##  $ annoy    : int  4 4 2 4 4 4 3 3 3 4 ...
##  $ sad      : int  2 4 2 3 2 3 2 1 1 4 ...
##  $ afraid   : int  2 4 2 4 1 1 2 1 1 2 ...
##  $ angry    : int  2 2 2 4 1 4 2 2 2 1 ...
##  $ approach : num  1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
##  $ avoid    : num  2 3 3 1.5 2.75 2.5 1 4 1 4 ...
##  $ support  : num  1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
##  $ agressive: num  2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
##  $ situation: Factor w/ 6 levels "Bully","Fail",..: 2 4 5 1 6 3 2 4 5 1 ...
##  $ sbj      : Factor w/ 14 levels "S135","S137",..: 6 6 6 6 6 6 4 4 4 4 ...
head(dta5)
##   annoy sad afraid angry approach avoid support agressive situation sbj
## 1     4   2      2     2     1.00  2.00    1.00      2.50      Fail  S2
## 2     4   4      4     2     4.00  3.00    1.25      1.50    NoPart  S2
## 3     2   2      2     2     2.67  3.00    1.00      2.33    TeacNo  S2
## 4     4   3      4     4     4.00  1.50    3.25      1.00     Bully  S2
## 5     4   2      1     1     1.00  2.75    1.25      1.50      Work  S2
## 6     4   3      1     4     2.33  2.50    1.00      3.67     MomNo  S2
dta5 %>% gather(emotion,e_score,c(1:4,8)) %>%
  ggplot(.,aes(situation,e_score,color = emotion)) +
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  theme_bw()+
  labs(x="Situation",y="Score")

#情緒與情境
dta5 %>% gather(coping, c_score, 5:7) %>%
  ggplot(.,aes(situation,c_score,color = coping)) +
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  theme_bw()+
  labs(x="Situation",y="Score")

#策略與情境

HW7

dta7 <- read.table("beautyCourseEval.txt", header = TRUE) %>%
mutate(CourseID = factor(courseID),
         Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female"))) 

  xyplot(eval ~ beauty | CourseID, group = Gender,
       data = dta7, type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

HW8

library(sas7bdat)
dta08 <- sas7bdat::read.sas7bdat("sales.sas7bdat")
dta8 <- dta08 %>%
  mutate(region = factor(region, levels = 1:4, 
                          labels = c("Northern", "Southern", "Eastern","Western")),
         district = factor(district, levels = 1:5,
                           labels = c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, levels = 1:4,
                          labels = c("1st", "2nd", "3rd", "4th")),
         month = factor(month, levels = 1:12,
                        labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))
dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)
ggplot(dta8, aes(month, sales, color = product)) + 
  geom_point() +
  geom_line(aes(group = product)) +
  facet_wrap(~ year) + 
  labs(x = "Month",y = "sales") +
  theme_bw() 

ggplot(dta8,aes(quarter, sales, color = product)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
  facet_wrap(~ year) +
  theme_bw()+
  labs(x="quarter",y="sales")