The distribution of personal disposable income in Taiwan in 2015 has a story to tell. Revise the following plot to enhance that message.
dta <- read.csv("income_tw.csv",header = T)
dta$Income <- factor(dta$Income, levels = dta$Income)
# sum(dta$Count)/4;sum(dta$Count)/4*2;sum(dta$Count)/4*3;sum(dta$Count)
# 3613068;7226135;10839202;14452270
unlist(lapply(c(1:length(dta$Count)), function(x) sum(dta$Count[1:x])))## [1] 807160 1108810 1422802 1752092 2121675 2574346 3069733
## [8] 3587512 4145298 4729795 5320902 5893073 6459367 6965848
## [15] 7427684 7916647 8326534 8717097 9104407 9458729 9796099
## [22] 10116893 10395532 10652834 10882033 11097913 11400515 11665964
## [29] 11907472 12208045 12467310 12763106 13044702 13266002 13491119
## [36] 13686310 13881342 14069585 14284495 14392450 14452270
# 8 15 25 41
dta <- dta %>% mutate(Q = c(rep("Q1",8),rep("Q2",7),rep("Q3",10),rep("Q4",16)),
Income1 = Income) %>%
separate(.,Income1,c("from","ig","to")," ") %>%
select(.,-ig)
dta$to[1] = "160,000";dta$to[41] = "2,500,000"
dta$from <- as.numeric(gsub(",","",dta$from))
dta$to <- as.numeric(gsub(",","",dta$to))
dta$med = round((dta$from+dta$to)/2,0)
dtaggplot(aes(Count/10000, Income, col = Q), data = dta) +
geom_point() +
geom_segment(aes(xend=mean(Count/10000), yend=Income)) +
labs(x="Number of persons(x10,000)",
title = "Distribution of disposable personal income in Taiwan in 2015") +
theme_minimal() +
theme(axis.text.y = element_text(color = c(rep("#F8766D",8),rep("#6BB100",7),rep("#00BCD8",10),rep("#B983FF",16))))## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
ggplot(aes(Count*med/100000000000, Income, col = Q), data = dta) +
geom_point() +
geom_segment(aes(xend=mean(Count*med/100000000000), yend=Income)) +
labs(x="Total assets in each income class(x10^11)",
title = "Distribution of total assets in each income class") +
theme_minimal() +
theme(axis.text.y = element_text(color = c(rep("#F8766D",8),rep("#6BB100",7),rep("#00BCD8",10),rep("#B983FF",16))))## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
Comment on how the graphs presented in this link violate the principles for effective graphics and how would you revise them.
## Loading required package: viridisLite
library(ggthemes) #配色package
data <- read.csv("traffic.csv",fileEncoding="big5",stringsAsFactors = F)
type <- data %>%
filter(year == 104) %>%
group_by(type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
vtype <- data %>%
filter(year == 104) %>%
group_by(vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
method <- data %>%
filter(year == 104) %>%
group_by(method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
# write.csv(type,file = "type.csv",row.names = F,fileEncoding="utf-8")
# write.csv(vtype,file = "vtype.csv",row.names = F,fileEncoding="utf-8")
# write.csv(method,file = "method.csv",row.names = F,fileEncoding="utf-8")
type_month <- data %>%
filter(year == 104) %>%
group_by(month,type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
vtype_month <- data %>%
filter(year == 104) %>%
group_by(month,vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
method_month <- data %>%
filter(year == 104) %>%
group_by(month,method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
# write.csv(type_month,file = "type_month.csv",row.names = F,fileEncoding="utf-8")
# write.csv(vtype_month,file = "vtype_month.csv",row.names = F,fileEncoding="utf-8")
# write.csv(method_month,file = "method_month.csv",row.names = F,fileEncoding="utf-8")ggplot(type[1:5,],aes(x=type,y =count,fill = type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104年交通違規舉發件數") +
theme(text=element_text(family="黑體-繁 中黑"))顏色沒有幫助,以二維圖形呈現一維資料,legend與x軸資訊重疊
qplot(count, reorder(type, count), data = type[1:5,]) +
geom_segment(aes(xend = 0, yend = reorder(type, count))) +
geom_text(aes(label = count), vjust = -0.8, hjust = 1) +
ggtitle("104年交通違規舉發件數") +
labs(y="") +
theme(text=element_text(family="黑體-繁 中黑"))ggplot(vtype,aes(x=vehicle_type,y =count,fill = vehicle_type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104年交通違規舉發車種別件數") +
theme(text=element_text(family="黑體-繁 中黑"))問題同上
qplot(count, reorder(vehicle_type, count), data = vtype) +
geom_segment(aes(xend = 0, yend = reorder(vehicle_type, count))) +
geom_text(aes(label = count), vjust = -0.8, hjust = 0.6) +
ggtitle("104年交通違規舉發車種別件數") +
labs(y="") +
theme(text=element_text(family="黑體-繁 中黑"))ggplot(method,aes(x=method,y =count,fill = method))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle("104年交通違規舉發方式別件數") +
theme(text=element_text(family="黑體-繁 中黑"))問題同上
qplot(count, reorder(method, count), data = method) +
geom_segment(aes(xend = 0, yend = reorder(method, count))) +
geom_text(aes(label = count), vjust = -0.8, hjust = 0.6) +
ggtitle("104年交通違規舉發方式別件數") +
labs(y="") +
theme(text=element_text(family="黑體-繁 中黑"))top5type <- type[1:5,1]#取出數量前五的種類
top5 <- c()#產生空的框架
for(i in 1:5){
X <- type_month %>%
filter(type == as.character(top5type[i,]))
top5 <- rbind(top5,X)
}#重新取出數量前五種類的資料
ggplot(top5,aes(x=type,y =count,fill = type))+
geom_boxplot(alpha = 0.7) +
ggtitle("104年交通違規盒鬚圖") +
theme(text=element_text(family="黑體-繁 中黑"))問題同上
# qplot(type, count, data=top5, geom=c("boxplot","jitter")) +
# ggtitle("104年交通違規盒鬚圖") +
# theme(text=element_text(family="黑體-繁 中黑"))
ggplot(top5,aes(x=reorder(type, count),y=count))+
geom_boxplot(alpha = 0.7) +
geom_jitter(color = "gray",pch=21) +
ggtitle("104年交通違規盒鬚圖") +
labs(x="") +
theme(text=element_text(family="黑體-繁 中黑"))bar <- list()#產生空的list
for(i in 1:12){
X <- ungroup(top5) %>%
filter(month == i)
bar[[i]]<- ggplot(X,aes(x=type,y =count,fill = type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle(paste("104年",i,"月交通違規舉發")) +
theme(text=element_text(family="黑體-繁 中黑"))
print(bar[[i]] )
}#產生1至12月的barsbar <- list()#產生空的list
for(i in 1:12){
X <- ungroup(top5) %>%
filter(month == i)
bar[[i]]<- ggplot(X,aes(x=type,y =count,fill = type))+
geom_bar(stat="identity",width=.5,alpha = 0.8) +
geom_text(aes(label = count),size = 3.5) +
ggtitle(paste("104年",i,"月交通違規舉發")) +
theme(text=element_text(family="黑體-繁 中黑"))
print(bar[[i]] )
}#產生1至12月的bars畫太多張圖,每月無法比較,其餘問題同上
ggplot(aes(count, reorder(type, count)), data = top5) +
geom_segment(aes(xend = 0, yend = reorder(type, count))) +
facet_wrap(. ~ month, nrow = 4) +
ggtitle("104年各月交通違規舉發件數") +
labs(y="") +
theme(text=element_text(family="黑體-繁 中黑"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 8),
axis.text.y = element_text(size = 7))trendline <- list()
for(i in 1:5){
X <- ungroup(top5) %>%
filter(type == as.character(top5type[i,]))
trendline[[i]] <- ggplot(X,aes(x=month,y =count))+
geom_text(aes(label = count),size = 3.5) +
geom_line(colour = "darkred",size = 1,alpha=0.8) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
ggtitle(paste("104年",as.character(top5type[i,]),"趨勢線")) +
theme(text=element_text(family="黑體-繁 中黑"))
print(trendline[[i]] )
}#產生前5種類的trendllinestrendline <- list()
for(i in 1:5){
X <- ungroup(top5) %>%
filter(type == as.character(top5type[i,]))
trendline[[i]] <- ggplot(X,aes(x=month,y =count))+
geom_text(aes(label = count),size = 3.5) +
geom_line(colour = "darkred",size = 1,alpha=0.8) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
ggtitle(paste("104年",as.character(top5type[i,]),"趨勢線")) +
theme(text=element_text(family="黑體-繁 中黑"))
print(trendline[[i]] )
}#產生前5種類的trendllines畫太多張圖,每種類型無法比較,y軸沒有統一,其餘問題同上
ggplot(aes(count, month), data = top5) +
geom_segment(aes(xend = 0, yend = month)) +
facet_wrap(. ~ type) +
ggtitle("104年各類型交通違規各月舉發件數") +
ylim('1','2','3','4','5','6','7','8','9','10','11','12') +
theme(text=element_text(family="黑體-繁 中黑"),
axis.text.x = element_text(angle = 90, hjust = 1, size = 5),
axis.text.y = element_text(size = 5))month <- data %>%
filter(year == 104) %>%
group_by(month) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
ggplot(month,aes(x=month,y =count))+
geom_text(aes(label = count),size = 3.5) +
geom_line(colour = "darkred",size = 1,alpha=0.8) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
ggtitle(paste("104年交通違規總數趨勢線")) +
theme(text=element_text(family="黑體-繁 中黑"))折線沒有意義,文字與線重疊
qplot(month, count, data=month, geom=c("point", "smooth"), se=F) +
geom_segment(aes(xend = month, yend = 50000)) +
geom_text(aes(label = count), vjust = -1, size = 2) +
xlim('1','2','3','4','5','6','7','8','9','10','11','12') +
ggtitle("104年交通違規總數趨勢線") +
theme(text=element_text(family="黑體-繁 中黑"))## Warning: Ignoring unknown parameters: se
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(vtype_month,aes(x=month,y =count,colour=vehicle_type))+
geom_text(aes(label = count),size = 3.5) +
geom_line(size = 1,alpha=0.8) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
ggtitle(paste("104年交通違規車種趨勢線")) +
theme(text=element_text(family="黑體-繁 中黑"))車種數量差距大,違規次數差太多,重型機車在此圖看不出趨勢,文字與線重疊導致視覺上混亂,各車種違規趨勢好像也沒有比較的必要,因此傾向分開作圖
vtype_month %>% filter(vehicle_type == "機車") %>%
qplot(month, count, data=., geom=c("point", "smooth"), se=F) +
geom_segment(aes(xend = month, yend = 25000)) +
geom_text(aes(label = count), vjust = -1, size = 2) +
xlim('1','2','3','4','5','6','7','8','9','10','11','12') +
ggtitle("104年交通違規機車趨勢線") +
theme(text=element_text(family="黑體-繁 中黑"))## Warning: Ignoring unknown parameters: se
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
vtype_month %>% filter(vehicle_type == "汽車") %>%
qplot(month, count, data=., geom=c("point", "smooth"), se=F) +
geom_segment(aes(xend = month, yend = 25000)) +
geom_text(aes(label = count), vjust = -1, size = 2) +
xlim('1','2','3','4','5','6','7','8','9','10','11','12') +
ggtitle("104年交通違規汽車趨勢線") +
theme(text=element_text(family="黑體-繁 中黑"))## Warning: Ignoring unknown parameters: se
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
vtype_month %>% filter(vehicle_type == "重型機車(250CC以上)") %>%
qplot(month, count, data=., geom=c("point", "smooth"), se=F) +
geom_segment(aes(xend = month, yend = 200)) +
geom_text(aes(label = count), vjust = -1, size = 2) +
xlim('1','2','3','4','5','6','7','8','9','10','11','12') +
ggtitle("104年交通違規重型機車(250CC以上)趨勢線") +
theme(text=element_text(family="黑體-繁 中黑"))## Warning: Ignoring unknown parameters: se
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(method_month,aes(x=month,y =count,colour=method))+
geom_text(aes(label = count),size = 3.5) +
geom_line(size = 1,alpha=0.8) +
scale_x_continuous(breaks = 1:12) +
theme_bw() +
ggtitle(paste("104年交通違規舉發方式趨勢線")) +
theme(text=element_text(family="黑體-繁 中黑"))加上總數
rb <-method_month %>% group_by(month) %>% summarise(count=sum(count)) %>% mutate(method = "總數")
method_month <- bind_rows(method_month,rb)
method_month$jit = 0
method_month[method_month$method == "攔停",]$jit = 0.1
method_month[method_month$method == "逕舉",]$jit = -0.1
qplot(month+jit, count, data=method_month,
colour=method, geom=c("point", "smooth"), se=F) +
geom_segment(aes(xend = month+jit, yend = 10000)) +
geom_text(aes(label = count), vjust = -1, size = 2) +
xlim('1','2','3','4','5','6','7','8','9','10','11','12') +
labs(x="month") +
ggtitle("104年交通違規重型機車(250CC以上)趨勢線") +
theme(text=element_text(family="黑體-繁 中黑"))## Warning: Ignoring unknown parameters: se
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#熱度圖
ggplot(ungroup(top5),aes(x=factor(month), y=factor(type), fill=count)) +
geom_tile(color="white", size=0.1) +
scale_fill_viridis(name="件數")+ #使用viridis
coord_equal()+
labs(x=NULL, y=NULL, title="104年交通違規熱度圖")+
theme_tufte(base_family="Helvetica")+ #使用ggthemes
theme(text=element_text(family="黑體-繁 中黑"),
plot.title=element_text(hjust=0),
axis.ticks=element_blank(),
axis.text=element_text(size=7),
legend.title=element_text(size=8),
legend.text=element_text(size=6))人類視覺對亮度較為敏感,因此改為灰階
#熱度圖
ggplot(ungroup(top5),aes(x=factor(month), y=factor(type), fill=count)) +
geom_tile(color="white", size=0.1) +
scale_fill_gradient(name = "count",
low = "#FFFFFF",
high = "#012345") +
coord_equal()+
labs(x=NULL, y=NULL, title="104年交通違規熱度圖")+
theme_bw()+
theme(text=element_text(family="黑體-繁 中黑"),
plot.title=element_text(hjust=0),
axis.ticks=element_blank(),
axis.text=element_text(size=7),
legend.title=element_text(size=8),
legend.text=element_text(size=6))Sarah Leo at the Economist magazine published a data set to accompany the story about how scientific publishing is dominated by men. The plot on the left panel below is the orignal graph that appeared in the article. Help her find a better plot.
dta <- read.csv("dtaVisQ3data.csv",header = T)
names(dta)<-c("Country","Health Sciences","Physical Sciences","Engineering","CS and Math","Women Inventores")
dta <- gather(dta, subject, percentage, 2:6)
head(dta)ggplot(aes(percentage, reorder(Country,percentage)), data = dta) +
geom_point()+
geom_segment(aes(xend = 0.2, yend = reorder(Country,percentage))) +
facet_wrap(. ~ reorder(subject,percentage), nrow = 1) +
labs(y="Country") +
ggtitle("Still a man's world") 各國比例皆接近0.2 (Health Sciences除外)