HW2
pacman::p_load(tidyverse, lattice, magrittr, dplyr, ggplot2 )
dta2 <- read.csv("nlsy86long.csv",h=T,sep=",")
str(dta2)
## 'data.frame': 664 obs. of 9 variables:
## $ id : int 2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 2 1 2 ...
## $ race : Factor w/ 2 levels "Majority","Minority": 1 1 1 1 1 1 1 1 1 1 ...
## $ time : int 1 1 1 1 1 1 1 1 1 1 ...
## $ grade: int 0 0 0 0 1 0 0 0 0 0 ...
## $ year : int 6 6 6 5 7 5 6 7 6 6 ...
## $ month: int 67 66 67 60 78 62 66 79 76 67 ...
## $ math : num 14.29 20.24 17.86 7.14 29.76 ...
## $ read : num 19.05 21.43 21.43 7.14 30.95 ...
head(dta2)
## id sex race time grade year month math read
## 1 2390 Female Majority 1 0 6 67 14.285714 19.047619
## 2 2560 Female Majority 1 0 6 66 20.238095 21.428571
## 3 3740 Female Majority 1 0 6 67 17.857143 21.428571
## 4 4020 Male Majority 1 0 5 60 7.142857 7.142857
## 5 6350 Male Majority 1 1 7 78 29.761905 30.952381
## 6 7030 Male Majority 1 0 5 62 14.285714 17.857143
dta2 %>%
ggplot()+ geom_smooth(mapping=aes(math,read,color=race)) +theme_light() + facet_grid(.~sex)
## `geom_smooth()` using method = 'loess'

#男女在數學成績上差異不大
dta2 %>%
gather(subject,score,8:9) %>%
ggplot(.,aes(grade,score,color=sex)) +
stat_summary(fun.data = mean_se , geom = "pointrange",
position = position_dodge(0.3)) +
facet_grid(.~subject)+
theme_light()

#閱讀成績差異不大,數學成績男生隨著成齡成長略高於女生
HW3
dta3 <- read.csv("alcohol_age.csv",h=T,sep=",") %>% mutate(grp = if_else(Age >= 21, "Yes", "No"))
xyplot(Alcohol ~ Age, group = grp,
data = dta3, type = c("g", "r", "p"), auto.key = list(column = 2),
xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

#年齡對於酗酒死亡率的效果
aggregate(Alcohol ~ grp, FUN = mean, data = dta3)
## grp Alcohol
## 1 No 1.032118
## 2 Yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
geom_point(aes(color = grp), na.rm = TRUE)+
geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
theme(legend.position = "none")+
labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

#不同年齡在酗酒死亡率的差異
HW4
dta4 <- read.table("dta4.txt", skip = 3,
col.names = c("Country","25-34","35-44","45-54","55-64","65-74"),
check.names = FALSE) %>% gather(Age, Rate, 2:6)
ggplot(dta4, aes(Age, Rate))+
geom_boxplot()+
labs(x = "Age", y = "Deaths per 100,000 from male suicides")

HW5
dta5 <- read.csv("coping.txt",h=T,sep=" ")
str(dta5)
## 'data.frame': 84 obs. of 10 variables:
## $ annoy : int 4 4 2 4 4 4 3 3 3 4 ...
## $ sad : int 2 4 2 3 2 3 2 1 1 4 ...
## $ afraid : int 2 4 2 4 1 1 2 1 1 2 ...
## $ angry : int 2 2 2 4 1 4 2 2 2 1 ...
## $ approach : num 1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
## $ avoid : num 2 3 3 1.5 2.75 2.5 1 4 1 4 ...
## $ support : num 1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
## $ agressive: num 2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
## $ situation: Factor w/ 6 levels "Bully","Fail",..: 2 4 5 1 6 3 2 4 5 1 ...
## $ sbj : Factor w/ 14 levels "S135","S137",..: 6 6 6 6 6 6 4 4 4 4 ...
head(dta5)
## annoy sad afraid angry approach avoid support agressive situation sbj
## 1 4 2 2 2 1.00 2.00 1.00 2.50 Fail S2
## 2 4 4 4 2 4.00 3.00 1.25 1.50 NoPart S2
## 3 2 2 2 2 2.67 3.00 1.00 2.33 TeacNo S2
## 4 4 3 4 4 4.00 1.50 3.25 1.00 Bully S2
## 5 4 2 1 1 1.00 2.75 1.25 1.50 Work S2
## 6 4 3 1 4 2.33 2.50 1.00 3.67 MomNo S2
dta5 %>% gather(emotion,e_score,c(1:4,8)) %>%
ggplot(.,aes(situation,e_score,color = emotion)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")

#情緒與情境
dta5 %>% gather(coping, c_score, 5:7) %>%
ggplot(.,aes(situation,c_score,color = coping)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")

#策略與情境
HW7
dta7 <- read.table("beautyCourseEval.txt", header = TRUE) %>%
mutate(CourseID = factor(courseID),
Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female")))
xyplot(eval ~ beauty | CourseID, group = Gender,
data = dta7, type = c("g", "r", "p"),
index.cond = function(x, y) coef(lm(y ~ x))[2],
xlab = "Beauty score", ylab = "Course evaluation score",
lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

HW8
library(sas7bdat)
dta08 <- sas7bdat::read.sas7bdat("sales.sas7bdat")
dta8 <- dta08 %>%
mutate(region = factor(region, levels = 1:4,
labels = c("Northern", "Southern", "Eastern","Western")),
district = factor(district, levels = 1:5,
labels = c("North East", "South East", "South West", "North West", "Central West")),
quarter = factor(quarter, levels = 1:4,
labels = c("1st", "2nd", "3rd", "4th")),
month = factor(month, levels = 1:12,
labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))
dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)
ggplot(dta8, aes(month, sales, color = product)) +
geom_point() +
geom_line(aes(group = product)) +
facet_wrap(~ year) +
labs(x = "Month",y = "sales") +
theme_bw()

ggplot(dta8,aes(quarter, sales, color = product)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
facet_wrap(~ year) +
theme_bw()+
labs(x="quarter",y="sales")
