pacman::p_load(tidyverse, lattice, magrittr, dplyr, ggplot2 )
dta2 <- read.table("C:/Users/ILT/Documents/nlsy86long.csv",header = T,sep=",")
畫圖:性別與種族在閱讀跟數學兩項成績分布
dta2 %>%
ggplot()+ geom_smooth(mapping=aes(math,read,color=race)) +theme_light() + facet_grid(.~sex)
## `geom_smooth()` using method = 'loess'
看起來沒有太大差異
畫圖:總成績與年級、性別關係
dta2 %>%
gather(subject,score,8:9) %>%
ggplot(.,aes(grade,score,color=sex)) +
stat_summary(fun.data = mean_se , geom = "pointrange",
position = position_dodge(0.3)) +
facet_grid(.~subject)+
theme_light()
年級上升男生的數學與女生的閱讀能力好像漸漸有差異
dta3 <- read.csv("C:/Users/ILT/Documents/alcohol_age.csv", header = T,sep=",") %>% mutate(over21=ifelse(Age>=21,"yes","no"))
dta3 %>%
xyplot(Alcohol ~ Age, group = over21,
data =., type = c("g", "r", "p"),
xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")
年齡對於酗酒死亡率的效果
aggregate(Alcohol ~ over21, FUN = mean,data=dta3)
## over21 Alcohol
## 1 no 1.032118
## 2 yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
geom_point(aes(color = over21), na.rm = TRUE)+
geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
theme(legend.position = "none")+
labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")
比較不同年齡組的平均差異
畫圖:情緒與情境的關係
dta5 <- read.table("C:/Users/ILT/Documents/coping.txt", h = T)
dta5 %>% gather(emotion,e_score,c(1:4,8)) %>%
ggplot(.,aes(situation,e_score,color = emotion)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")
策略與情境的關係
dta5 %>% gather(coping, c_score, 5:7) %>%
ggplot(.,aes(situation,c_score,color = coping)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")
dta7 <- read.table("C:/Users/ILT/Documents/beautyCourseEval.txt", header = TRUE) %>%
mutate(CourseID = factor(courseID),
Minority = factor(minority, levels = c(0, 1), labels = c("Minority", "Others")),
Tenure = factor(tenure, levels = c(0, 1), labels = c("No", "Tenured")),
Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female")))
xyplot(eval ~ beauty | CourseID, group = Gender,
data = dta7, type = c("g", "r", "p"),
index.cond = function(x, y) coef(lm(y ~ x))[2],
xlab = "Beauty score", ylab = "Course evaluation score",
lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))
library(sas7bdat)
dta8 <- read.sas7bdat("C:/Users/ILT/Documents/sales.sas7bdat", debug=FALSE)
dta8 <- dta8 %>%
mutate(region = factor(region, levels = 1:4,
labels = c("Northern", "Southern", "Eastern","Western")),
district = factor(district, levels = 1:5,
labels = c("North East", "South East", "South West", "North West", "Central West")),
quarter = factor(quarter, levels = 1:4,
labels = c("1st", "2nd", "3rd", "4th")),
month = factor(month, levels = 1:12,
labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))
dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)
畫圖:月
ggplot(dta8, aes(month, sales, color = product)) +
geom_point() +
geom_line(aes(group = product)) +
facet_wrap(~ year) +
labs(x = "Month",y = "sales") +
theme_bw()
畫圖:季
ggplot(dta8,aes(quarter, sales, color = product)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
facet_wrap(~ year) +
theme_bw()+
labs(x="quarter",y="sales")