library(ggplot2)
library(lattice)
library(tidyverse)
library(dplyr)The distribution of personal disposable income in Taiwan in 2015 has a story to tell. Revise the following plot to enhance that message.
dta<-read.csv("D://income_tw.csv", head=T)
#看資料
summary(dta) Income Count
Length:41 Min. : 59820
Class :character 1st Qu.:241508
Mode :character Median :313992
Mean :352494
3rd Qu.:461836
Max. :807160
str(dta)'data.frame': 41 obs. of 2 variables:
$ Income: chr "160,000 and under" "160,000 to 179,999" "180,000 to 199,999" "200,000 to 219,999" ...
$ Count : int 807160 301650 313992 329290 369583 452671 495387 517779 557786 584497 ...
dta1<-dta |>
mutate(Count1 = Count / 10000) |>
#增加Count1=Count/10000這個column
mutate(order=seq(1:n()))
#增加order這個column,從1開始編到n
name=c("Income","Count", "Count1","order")
#給column新的命名
str(dta1)'data.frame': 41 obs. of 4 variables:
$ Income: chr "160,000 and under" "160,000 to 179,999" "180,000 to 199,999" "200,000 to 219,999" ...
$ Count : int 807160 301650 313992 329290 369583 452671 495387 517779 557786 584497 ...
$ Count1: num 80.7 30.2 31.4 32.9 37 ...
$ order : int 1 2 3 4 5 6 7 8 9 10 ...
p1<-ggplot(data=dta1,
aes(x=Count1,
y=reorder(Income,-order)))#y軸排序依據order倒著排
p1p2<-p1+
geom_point()
p2p3<-p2+
labs(x='Number of persons',
y='',
title="Distribution of personal disposable income in Taiwan (2015)",
)
p3p1<-ggplot(data=dta1,
aes(x=Count1,
y=reorder(Income,Count1)))+ #y軸排據count1排序
geom_point()+
labs(x='Number of persons',
y='',
title="Distribution of personal disposable income in Taiwan (2015)",
)
p1#這樣其實也只能看出來最多人的是<160000元
#最少人的是2500000元Comment on how the graphs presented in this link violate the principles for effective graphics and how would you revise them.
data <- read.csv("D://test.csv",fileEncoding="big5", head=T)
head(data) law month c_type count X_id
1 12條第1項第1.3.4.5.6.7.8款 1 汽車 0 1
2 12條第1項第1.3.4.5.6.7.8款 1 汽車 149 2
3 12條第1項第1.3.4.5.6.7.8款 1 250cc以上重型機車 0 3
4 12條第1項第1.3.4.5.6.7.8款 1 250cc以上重型機車 0 4
5 12條第1項第1.3.4.5.6.7.8款 1 250cc以下機車 0 5
6 12條第1項第1.3.4.5.6.7.8款 1 250cc以下機車 40 6
type X.year
1 未領用或未懸掛牌照等 104
2 未領用或未懸掛牌照等 104
3 未領用或未懸掛牌照等 104
4 未領用或未懸掛牌照等 104
5 未領用或未懸掛牌照等 104
6 未領用或未懸掛牌照等 104
names(data)<-c("year", "month", "type", "law", "vehicle_type", "method", "count")
str(data)'data.frame': 100 obs. of 7 variables:
$ year : chr "12條第1項第1.3.4.5.6.7.8款" "12條第1項第1.3.4.5.6.7.8款" "12條第1項第1.3.4.5.6.7.8款" "12條第1項第1.3.4.5.6.7.8款" ...
$ month : int 1 1 1 1 1 1 1 1 1 1 ...
$ type : chr "汽車" "汽車" "250cc以上重型機車" "250cc以上重型機車" ...
$ law : int 0 149 0 0 0 40 0 0 0 0 ...
$ vehicle_type: int 1 2 3 4 5 6 7 8 9 10 ...
$ method : chr "未領用或未懸掛牌照等" "未領用或未懸掛牌照等" "未領用或未懸掛牌照等" "未領用或未懸掛牌照等" ...
$ count : int 104 104 104 104 104 104 104 104 104 104 ...
type <- data %>%
filter(year == 104) %>%
group_by(type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
vtype <- data %>%
filter(year == 104) %>%
group_by(vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))
method <- data %>%
filter(year == 104) %>%
group_by(method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))type_month <- data %>%
filter(year == 104) %>%
group_by(month,type) %>%
summarise(count=sum(count)) %>%
arrange(desc(count))vtype_month <- data %>%
filter(year == 104) %>%
group_by(month,vehicle_type) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))method_month <- data %>%
filter(year == 104) %>%
group_by(month,method) %>%
summarise(count = sum(count)) %>%
arrange(desc(count))# add font
windowsFonts(A=windowsFont("Microsoft JhengHei UI")) Use the free recall data to improve on the figure reported in Murdock, B. B. (1962). The serial position effect of free recall. Journal of Experimental Psychology, 64, 482-488.
list.files("D:/Murd62", pattern = "fr")[1] "fr10-2.txt" "fr15-2.txt" "fr20-1.txt" "fr20-2.txt" "fr30-1.txt"
[6] "fr40-1.txt"
# 載入資料(有6個txt檔)
fL<-"D:/Murd62/fr10-2.txt"
# read.table(fL, sep="",header=FALSE)會出現錯誤碼如下,後來發現txt檔有空值
# Error in scan(file = file...: line 3 did not have 7 elements)
# 加上fill=T,帶入空值(NA)
dta10_2 <- read.table(fL, sep="",header=FALSE, fill=T)
#'data.frame': 1607 obs. of 7 variables
str(dta10_2)'data.frame': 1607 obs. of 7 variables:
$ V1: int 6 10 10 10 10 6 8 9 8 10 ...
$ V2: int 1 9 7 88 3 8 9 10 9 8 ...
$ V3: int 4 6 9 7 7 10 10 4 10 9 ...
$ V4: int 7 8 8 2 2 5 7 6 7 2 ...
$ V5: int 10 2 1 1 1 3 88 88 2 3 ...
$ V6: int 2 1 NA 5 9 88 4 8 NA NA ...
$ V7: int 8 88 NA NA NA NA NA 1 NA NA ...
#改column name
colnames(dta10_2)=c(1:7)
#新增group
dta10_2$Group<-as.factor("10-2")
str(dta10_2)'data.frame': 1607 obs. of 8 variables:
$ 1 : int 6 10 10 10 10 6 8 9 8 10 ...
$ 2 : int 1 9 7 88 3 8 9 10 9 8 ...
$ 3 : int 4 6 9 7 7 10 10 4 10 9 ...
$ 4 : int 7 8 8 2 2 5 7 6 7 2 ...
$ 5 : int 10 2 1 1 1 3 88 88 2 3 ...
$ 6 : int 2 1 NA 5 9 88 4 8 NA NA ...
$ 7 : int 8 88 NA NA NA NA NA 1 NA NA ...
$ Group: Factor w/ 1 level "10-2": 1 1 1 1 1 1 1 1 1 1 ...
#匯入其他資料fr15-2、fr20-1、fr20-2、fr30-1、fr40-1
fL<-"D:/Murd62/fr15-2.txt"
dta15_2 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta15_2)'data.frame': 2017 obs. of 7 variables:
$ V1: int 15 15 14 15 15 3 13 14 14 9 ...
$ V2: int 12 14 15 13 14 13 15 15 15 88 ...
$ V3: int 14 3 12 12 12 14 10 12 5 NA ...
$ V4: int 10 4 10 2 13 12 1 10 13 NA ...
$ V5: int 11 2 5 10 1 88 2 3 2 NA ...
$ V6: int NA 1 NA 6 88 2 3 NA 1 NA ...
$ V7: int NA NA NA 8 NA 5 4 NA 4 NA ...
colnames(dta15_2)=c(1:7)
dta15_2$Group<-as.factor("15-2")fL<-"D:/Murd62/fr20-1.txt"
dta20_1 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta20_1)'data.frame': 1293 obs. of 10 variables:
$ V1 : int 20 20 17 18 18 18 19 20 19 20 ...
$ V2 : int 19 18 18 19 19 19 20 19 20 18 ...
$ V3 : int 13 16 8 20 20 20 18 17 88 12 ...
$ V4 : int 18 17 20 15 17 17 11 11 5 19 ...
$ V5 : int 1 3 15 16 12 1 17 15 17 NA ...
$ V6 : int 9 88 14 8 11 10 1 10 1 NA ...
$ V7 : int 2 14 1 9 13 7 2 1 NA NA ...
$ V8 : int 17 NA 2 1 88 NA NA NA NA NA ...
$ V9 : int 16 NA 19 2 NA NA NA NA NA NA ...
$ V10: int 88 NA NA NA NA NA NA NA NA NA ...
colnames(dta20_1)=c(1:10)
dta20_1$Group<-as.factor("20-1")fL<-"D:/Murd62/fr20-2.txt"
dta20_2 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta20_2)=c(1:8)
dta20_2$Group<-as.factor("20-2")fL<-"D:/Murd62/fr30-1.txt"
dta30_1 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta30_1)=c(1:9)
dta30_1$Group<-as.factor("30-1")fL<-"D:/Murd62/fr40-1.txt"
dta40_1 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta40_1)=c(1:10)
dta40_1$Group<-as.factor("40-1")Sarah Leo at the Economist magazine published a data set to accompany the story about how scientific publishing is dominated by men. The plot on the left panel below is the orignal graph that appeared in the article. Help her find a better plot.
dta<-read.csv("D://Economist_women-research.csv", head=T, skip= 1)
#跳開第一列
#看資料
summary(dta) Country Health.sciences Physical.sciences Engineering
Length:18 Length:18 Min. :0.1100 Min. :0.1100
Class :character Class :character 1st Qu.:0.2100 1st Qu.:0.2200
Mode :character Mode :character Median :0.2300 Median :0.2400
Mean :0.2375 Mean :0.2425
3rd Qu.:0.2500 3rd Qu.:0.2525
Max. :0.3700 Max. :0.3600
NA's :6 NA's :6
Computer.science..maths X..of.women.inventores
Min. :0.1100 Min. :0.0800
1st Qu.:0.2025 1st Qu.:0.1200
Median :0.2200 Median :0.1350
Mean :0.2092 Mean :0.1525
3rd Qu.:0.2250 3rd Qu.:0.1825
Max. :0.2700 Max. :0.2600
NA's :6 NA's :6
str(dta)'data.frame': 18 obs. of 6 variables:
$ Country : chr "Japan" "Chile" "United Kingdom" "United States" ...
$ Health.sciences : chr "0.24" "0.43" "0.45" "0.46" ...
$ Physical.sciences : num 0.11 0.23 0.21 0.2 0.25 0.22 0.25 0.24 0.21 0.23 ...
$ Engineering : num 0.11 0.22 0.22 0.22 0.26 0.23 0.25 0.25 0.22 0.25 ...
$ Computer.science..maths: num 0.11 0.16 0.21 0.22 0.22 0.18 0.22 0.22 0.22 0.24 ...
$ X..of.women.inventores : num 0.08 0.19 0.12 0.14 0.18 0.13 0.12 0.17 0.13 0.12 ...
dta <- dta[complete.cases(dta), ]
#移除遺漏值(excel後面的備註)
names(dta)<-c("Country",
"Health",
"Physical",
"Engineering",
"Computer",
"Inventors")
#column重新命名
tail(dta) Country Health Physical Engineering Computer Inventors
7 EU28 0.48 0.25 0.25 0.22 0.12
8 France 0.48 0.24 0.25 0.22 0.17
9 Canada 0.49 0.21 0.22 0.22 0.13
10 Australia 0.5 0.23 0.25 0.24 0.12
11 Brazil 0.57 0.33 0.32 0.24 0.19
12 Portugal 0.57 0.37 0.36 0.27 0.26
#檢查最後幾項,看起來沒問題