隨著時間變化,性別在不同種族的數學成績沒有差異。
dta2 <- read.csv("nlsy86long.csv", h = T)
str(dta2)
'data.frame': 664 obs. of 9 variables:
$ id : int 2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
$ sex : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 2 1 2 ...
$ race : Factor w/ 2 levels "Majority","Minority": 1 1 1 1 1 1 1 1 1 1 ...
$ time : int 1 1 1 1 1 1 1 1 1 1 ...
$ grade: int 0 0 0 0 1 0 0 0 0 0 ...
$ year : int 6 6 6 5 7 5 6 7 6 6 ...
$ month: int 67 66 67 60 78 62 66 79 76 67 ...
$ math : num 14.29 20.24 17.86 7.14 29.76 ...
$ read : num 19.05 21.43 21.43 7.14 30.95 ...
dta2 <- dta2 %>% mutate(time = factor(time))
ggplot(dta2, aes(time, math, color = sex))+
facet_wrap(~race) +
stat_summary(fun.data = mean_se, geom = "pointrange") +
stat_summary(aes(group = sex), fun.y = mean, geom = "line") +
geom_line(aes(group = id), color = "gray50", alpha = .8, linetype = "dotted") +
theme_bw()
隨著時間變化,性別在不同種族的閱讀成績沒有差異。
ggplot(dta2, aes(time, read, color = sex))+
facet_wrap(~race) +
stat_summary(fun.data = mean_se, geom = "pointrange") +
stat_summary(aes(group = sex), fun.y = mean, geom = "line") +
geom_line(aes(group = id), color = "gray50", alpha = .8, linetype = "dotted") +
theme_bw()
圖一是將人分為兩組,觀察組裡隨年齡增加和死亡率的關係。
dta3 <- read.csv("alcohol_age.csv", h = T) %>% na.omit()
dta3 <- dta3 %>% mutate(Legal =factor(c(rep("No", 24), rep("Yes", 24))))
#plot 1
ggplot(dta3, aes(Age, Alcohol, color = Legal))+
geom_point()+
stat_smooth(aes(group = Legal), method = "lm", se = F) +
theme_bw()
圖二是看組裡飲酒和死亡率的關係
aggregate(Alcohol ~ Legal, FUN = mean, data = dta3)
Legal Alcohol
1 No 1.032118
2 Yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
geom_point(aes(color = Legal), na.rm = TRUE)+
geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
theme(legend.position = "none")+
labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)") +
theme_bw()
隨著年齡增加,男性自殺率有增加的趨勢。
dta4 <- read.table("suicide.txt", h = F)
colnames(dta4)<- c("Country","25-34","35-44","45-54","55-64","65-74")
dta4 <- dta4 %>% gather(Age, Rate, 2:6)
ggplot(dta4, aes(Age, Rate))+
geom_boxplot()+
labs(x = "Age", y = "Deaths per 100,000 from male suicides") +
theme_bw()
情緒與情境的關係,在大部分的情境中,annoy的分數均高。
dta5 <- read.table("coping.txt", h = T)
dta5 %>% gather(emotion,e_score,c(1:4,8)) %>%
ggplot(.,aes(situation,e_score,color = emotion)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")
採取策略與情境的關係。
dta5 %>% gather(coping, c_score, 5:7) %>%
ggplot(.,aes(situation,c_score,color = coping)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
theme_bw()+
labs(x="Situation",y="Score")
不知道怎麼用的簡單點。
dta6_1 <- read.table("Murd62/fr10-2.txt", h = F, sep = " ", fill = T)
dta6_2 <- read.table("Murd62/fr15-2.txt", h = F, sep = " ", fill = T)
dta6_3 <- read.table("Murd62/fr20-1.txt", h = F, sep = " ", fill = T)
#dta6_4 <- read.table("Murd62/fr20-2.txt", h = F, sep = " ", fill = T)
dta6_5 <- read.table("Murd62/fr30-1.txt", h = F, sep = " ", fill = T)
dta6_6 <- read.table("Murd62/fr40-1.txt", h = F, sep = " ", fill = T)
#count
Count <- function(x, data) {
d <- 0
c <- 1:x
for(i in 1:x){
d[i] <- sum(colSums(data == c[i], na.rm = T))
}
return(d)
}
#create
Frame <- function(x, data) {
prob <- Count(x, data)/nrow(data)
all <- as.data.frame(cbind(item = 1:x, prob, grp = x))
return(all)
}
Recall <- rbind(Frame(10, dta6_1), Frame(15, dta6_2), Frame(20, dta6_3), Frame(30, dta6_5), Frame(40, dta6_6))
ggplot(Recall, aes(item, prob)) +
geom_point() +
geom_line(aes(group = grp)) +
labs(x = "Serial position",y = "Probability of recall") +
theme_bw()
dta7 <- read.table("beautyCourseEval.txt", h = T)
dta7 <- dta7 %>% mutate(sex = factor(sex, labels = c("Female", "Male") ))
ggplot(dta7, aes(beauty, eval, color = sex)) +
geom_point(shape = 1) +
facet_wrap(~ courseID) +
stat_smooth(method = "lm", se = F) +
labs(x = "Beauty judgment score",y = "Average course evaluation score") +
theme_bw()
library(sas7bdat)
dta8 <- read.sas7bdat("sales.sas7bdat", debug=FALSE)
dta8 <- dta8 %>%
mutate(region = factor(region, levels = 1:4,
labels = c("Northern", "Southern", "Eastern","Western")),
district = factor(district, levels = 1:5,
labels = c("North East", "South East", "South West", "North West", "Central West")),
quarter = factor(quarter, levels = 1:4,
labels = c("1st", "2nd", "3rd", "4th")),
month = factor(month, levels = 1:12,
labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")))
dta8$sales <- replace(dta8$sales, dta8$sales < 0, 0)
以月來看個產品的營銷。
ggplot(dta8, aes(month, sales, color = product)) +
geom_point() +
geom_line(aes(group = product)) +
facet_wrap(~ year) +
labs(x = "Month",y = "sales") +
theme_bw()
以季來看個產品的營銷。
ggplot(dta8,aes(quarter, sales, color = product)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3)) +
facet_wrap(~ year) +
theme_bw()+
labs(x="quarter",y="sales")
北區外的店只有一年中特定月開,沒有月的變化,不討論地區比較。
長條圖不用使用數字、過於使用顏色、標籤中英混雜、標籤標示不清…
(資料還載不下來)