1 Exercise 1

Task: The distribution of personal disposable income in Taiwan in 2015 has a story to tell. Revise the following plot to enhance that message.

source: https://www.stat.gov.tw/ct.asp?xItem=40875&ctNode=511&mp=4

mean personal disposable income = 311256
median personal disposable income = 266490

1.1 Data Input

pacman::p_load(ggplot2, dplyr, readr, tidyr)

dat <- read.csv("income_tw.csv", header = T)

dat <- dat |>
  mutate(Income = factor(Income, levels = Income),
         proportionCumulativeCount = rev(cumsum(rev(Count))) / sum(Count),
         proportionCumulativeCount_group = cut(proportionCumulativeCount, 
                                               breaks = c(0, .01, .05, .1, .25, .5, 1),
                                               labels = c("Top 1%", 
                                                          "Top 5%", 
                                                          "Top 10%", 
                                                          "Top 25%", 
                                                          "Top 50%", 
                                                          "Below 50%"),
                                               include.lowest = T))

1.2 Plot bw

ggplot(dat, aes(x = Income, y = Count/10^4,
                fill = proportionCumulativeCount_group,
                shape = proportionCumulativeCount_group)) +
  geom_hline(yintercept = median(dat$Count/10^4), color = "gray90", lwd = .5, lty = 2) +
  geom_vline(xintercept = "260,000 to 279,999",color = "gray80", lwd = .8, lty = 1) +
  geom_point(aes(x = Income, y = Count/10^4), size=2) +
  geom_segment(aes(xend = Income, yend = 0)) +
  scale_fill_manual(name = "% of population", 
                      values = c("gray80",
                                 "gray70", 
                                 "gray60",
                                 "gray50",
                                 "gray40",
                                 "black"))+
  scale_shape_manual(name = "% of population", values = c(24, 24, 24, 24, 24, 25))+
  #scale_linetype_manual(name = "% of population", values = c(1:6))+
  labs(x = "Personal disposable income", 
       y = "Number of persons(x 10,000)",
       title = "Distribution of personal disposable income in Taiwan (2015)")+
  annotate("text", x = "340,000 to 359,999", y = 78, 
           label ="Median DPI=266,490", color="gray40",
           size=3)+
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, margin = margin(t = 6), hjust = 1, size = 5),
        panel.grid.major.x = element_blank(),
        legend.position = "top")+
  #  coord_flip()+
  guides(fill=guide_legend(ncol = 6, reverse = TRUE),
         shape=guide_legend(ncol = 6, reverse = TRUE))

1.3 Plot color

ggplot(dat, aes(x = Income, y = Count/10^4,
                color = proportionCumulativeCount_group,
                shape = proportionCumulativeCount_group)) +
  geom_hline(yintercept = median(dat$Count/10^4), color = "gray90", lwd = .5, lty = 2) +
  geom_vline(xintercept = "260,000 to 279,999",color = "gray80", lwd = .8, lty = 1) +
  geom_point(aes(x = Income, y = Count/10^4), size=2) +
  geom_segment(aes(xend = Income, yend = 0)) +
  scale_color_manual(name = "% of population", 
                      values = c("royalblue4",
                                 "royalblue3", 
                                 "blue",
                                 "royalblue2",
                                 "royalblue1",
                                 "tomato"))+
  scale_shape_manual(name = "% of population", values = c(17, 17, 17, 17, 17, 25))+
  #scale_linetype_manual(name = "% of population", values = c(1:6))+
  labs(x = "Personal disposable income", 
       y = "Number of persons(x 10,000)",
       title = "Distribution of personal disposable income in Taiwan (2015)")+
  annotate("text", x = "340,000 to 359,999", y = 78, 
           label ="Median DPI=266,490", color="gray40",
           size=3)+
  theme_minimal()+
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, margin = margin(t = 6), hjust = 1, size = 5),
        panel.grid.major.x = element_blank(),
        legend.position = "top")+
  #  coord_flip()+
  guides(color=guide_legend(ncol = 6, reverse = TRUE),
         shape=guide_legend(ncol = 6, reverse = TRUE))

2 Exercise 2

Task: Comment on how the graphs presented in this link violate the principles for effective graphics and how would you revise them.

2.1 Data Input

pacman::p_load(dplyr, ggplot2, viridis, ggthemes)

dat <- read.csv("traffic.csv",fileEncoding="big5",stringsAsFactors = F)

type <- dat %>%
  filter(year == 104) %>%
  group_by(type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

vtype <- dat %>%
  filter(year == 104) %>%
  group_by(vehicle_type) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

method <- dat %>%
  filter(year == 104) %>%
  group_by(method) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

type_month <- dat %>%
  filter(year == 104) %>%
  group_by(month,type) %>%
  summarise(count=sum(count)) %>%
  arrange(desc(count))

## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

vtype_month <- dat %>%
  filter(year == 104) %>%
  group_by(month,vehicle_type) %>%
  summarise(count=sum(count)) %>%
  arrange(desc(count))

## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

method_month <- dat %>%
  filter(year == 104) %>%
  group_by(month,method) %>%
  summarise(count=sum(count)) %>%
  arrange(desc(count))

## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

2.2 Plot 1 revice

原本圖的缺點:
- 種類無排序
- 顏色無意義
- 使用長條圖data-ink ratio 太高
- 缺乏整合比較

# add font
windowsFonts(A=windowsFont("Microsoft JhengHei UI"))

type[1:5,] %>%
  arrange(count) %>% 
  mutate(type=factor(type, levels=type)) %>%  
  ggplot(., aes(x=count/1000, y=type, color=type))+
  geom_point()+
  geom_segment(aes(xend=0, yend=type)) +
  scale_color_manual(values=c("gray50", "gray50", "gray50", "gray50", "tomato")) +
  geom_text(aes(label=count/1000), size=3, nudge_x=12)+
  ggtitle("104年桃園市交通違規取締前五名舉發件數之種類")+
  labs(x = "舉發件數 (x 1000)", 
       y = "舉發種類")+
  theme_minimal()+
  theme(legend.position = "",
        text = element_text(family = "A"))

2.3 Plot 2 revice

vtype %>%
  arrange(count) %>% 
  mutate(vehicle_type=factor(vehicle_type, levels=vehicle_type)) %>%
  ggplot(., aes(x=count/1000, y=vehicle_type, color=vehicle_type))+
  geom_point()+
  geom_segment(aes(xend=0, yend=vehicle_type)) +
  scale_color_manual(values=c("gray50", "gray50", "tomato")) +
  geom_text(aes(label=count/1000), size=3.5, nudge_x=28) +
  ggtitle("104年交通違規舉發車種別件數")+
  labs(x = "舉發件數 (x 1000)", 
       y = "車種別")+
  theme_minimal()+
  theme(legend.position = "",
        text = element_text(family = "A"))

2.4 Plot 3 revice

method %>%
  arrange(count) %>% 
  mutate(method=factor(method, levels=method)) %>%
  ggplot(., aes(x=count/1000, y=method, color=method))+
  geom_point()+
  geom_segment(aes(xend=0, yend=method)) +
  scale_color_manual(values=c("gray50", "tomato")) +
  geom_text(aes(label=count/1000), size=3.5, nudge_x=40) +
  ggtitle("104年交通違規舉發方式別件數")+
  labs(x = "舉發件數 (x 1000)", 
       y = "舉發方式")+
  theme_minimal()+
  theme(legend.position = "",
        text = element_text(family = "A"))

2.5 Plot mix

dat %>%
  filter(year == 104) %>%
  filter(type %in% c("違規停車", 
                     "行車速度超速60公里以下", 
                     "違規停車拖吊", 
                     "闖紅燈直行左轉", 
                     "不依規定轉彎"))%>%
  group_by(type, method, vehicle_type) %>%
  summarise(count=sum(count)) %>%
  mutate(type=factor(type, levels=c("不依規定轉彎",
                                    "闖紅燈直行左轉",
                                    "違規停車拖吊",
                                    "行車速度超速60公里以下",
                                    "違規停車")),
         vehicle_type=factor(vehicle_type, levels=c("機車",
                                                    "汽車",
                                                    "重型機車(250CC以上)")),
         method=factor(method, levels=c("逕舉","攔停")))%>%
  ggplot(., aes(x=count/1000, y=type, color=vehicle_type))+
  geom_point()+
  facet_wrap(method ~ vehicle_type)+
  labs(x = "舉發件數 (x 1000)", 
       y = "舉發種類",
       title = "104年桃園市交通違規取締",
       subtitle = "前五名舉發種類、舉發方式及車種別")+
  theme_minimal()+
  theme(legend.position = "top",
        text = element_text(family = "A"))+
  guides(color=guide_legend(title="車種別", ncol = 3, reverse = F))

dat %>%
  filter(year == 104) %>%
  filter(type %in% c("違規停車", 
                     "行車速度超速60公里以下", 
                     "違規停車拖吊", 
                     "闖紅燈直行左轉", 
                     "不依規定轉彎"))%>%
  group_by(type, method, vehicle_type) %>%
#  summarise(count=sum(count)) %>%
  mutate(type=factor(type, levels=c("不依規定轉彎",
                                    "闖紅燈直行左轉",
                                    "違規停車拖吊",
                                    "行車速度超速60公里以下",
                                    "違規停車")),
         vehicle_type=factor(vehicle_type, levels=c("機車",
                                                    "汽車",
                                                    "重型機車(250CC以上)")),
         method=factor(method, levels=c("逕舉","攔停")))%>%
  ggplot(., aes(x=count/1000, y=type, color=vehicle_type))+
  geom_boxplot()+
  facet_wrap(. ~ method)+
  labs(x = "舉發件數 (x 1000)", 
       y = "舉發種類",
       title = "104年桃園市交通違規盒鬚圖",
       subtitle = "前五名舉發種類、舉發方式及車種別")+
  theme_minimal()+
  theme(legend.position = "top",
        text = element_text(family = "A"))+
  guides(color=guide_legend(title="車種別", ncol = 3, reverse = T))

dat %>%
  filter(year == 104) %>%
  filter(type %in% c("違規停車", 
                     "行車速度超速60公里以下", 
                     "違規停車拖吊", 
                     "闖紅燈直行左轉", 
                     "不依規定轉彎"))%>%
  group_by(month, type, method, vehicle_type) %>%
  summarise(count=sum(count)) %>%
  mutate(type=factor(type, levels=c("違規停車",
                                    "行車速度超速60公里以下",
                                    "違規停車拖吊",
                                    "闖紅燈直行左轉",
                                    "不依規定轉彎")),
         vehicle_type=factor(vehicle_type, levels=c("機車",
                                                    "汽車",
                                                    "重型機車(250CC以上)")),
         method=factor(method, levels=c("逕舉","攔停")))%>%
  ggplot(., aes(x=count/1000, y=month, fill=vehicle_type))+
  geom_bar(position="stack", stat="identity", orientation="y")+
  scale_y_reverse(breaks = 1:12) +
  facet_wrap(type ~ method, ncol=2, nrow=5)+
  scale_fill_viridis(discrete = T) +
  labs(x = "舉發件數 (x 1000)", 
       y = "月份",
       title = "104年各月份桃園市交通違規累積件數",
       subtitle = "前五名舉發種類、舉發方式及車種別")+
  theme_minimal()+
  theme(legend.position = "top",
        text = element_text(family = "A"))+
  guides(fill=guide_legend(title="車種別", ncol = 3, reverse = T))

## `summarise()` has grouped output by 'month', 'type', 'method'. You can override using the `.groups` argument.

dat %>%
  filter(year == 104) %>%
  filter(type %in% c("違規停車", 
                     "行車速度超速60公里以下", 
                     "違規停車拖吊", 
                     "闖紅燈直行左轉", 
                     "不依規定轉彎"))%>%
  group_by(month, type, method, vehicle_type) %>%
  summarise(count=sum(count)) %>%
  mutate(type=factor(type, levels=c("違規停車",
                                    "行車速度超速60公里以下",
                                    "違規停車拖吊",
                                    "闖紅燈直行左轉",
                                    "不依規定轉彎")),
         vehicle_type=factor(vehicle_type, levels=c("機車",
                                                    "汽車",
                                                    "重型機車(250CC以上)")),
         method=factor(method, levels=c("逕舉","攔停")))%>%
  ggplot(., aes(x=count/1000, y=month, fill=vehicle_type))+
  geom_bar(position="fill", stat="identity", orientation="y")+
#  geom_point()+
#  geom_line()+
  scale_y_reverse(breaks = 1:12) +
  facet_wrap(type ~ method, ncol=2, nrow=5)+
  scale_fill_viridis(discrete = T) +
  labs(x = "舉發件數 (x 1000)", 
       y = "月份",
       title = "104年各月份桃園市交通違規 比例",
       subtitle = "前五名舉發種類、舉發方式及車種別")+
  theme_minimal()+
  theme(legend.position = "top",
        text = element_text(family = "A"))+
  guides(fill=guide_legend(title="車種別", ncol = 3, reverse = T))

## `summarise()` has grouped output by 'month', 'type', 'method'. You can override using the `.groups` argument.

## Warning: Removed 54 rows containing missing values (geom_bar).

top5type <- type[1:5,1]#取出數量前五的種類
top5 <- data.frame()#產生空的框架
for(i in 1:5){
  X <- type_month %>%
    filter(type == as.character(top5type[i,]))
  top5 <- rbind(top5,X)
}#重新取出數量前五種類的資料

bar <- list()#產生空的list
for(i in 1:12){
  X <- ungroup(top5) %>%
    filter(month == i)
  bar[[i]]<- ggplot(X,aes(x=type,y =count,fill = type))+
    geom_bar(stat="identity",width=.5,alpha = 0.8) +
    geom_text(aes(label = count),size = 3.5) +
    ggtitle(paste("104年",i,"月交通違規舉發")) +
    theme(text = element_text(family = "A"))
  print(bar[[i]] )
  
}#產生1至12月的bars

trendline <- list() 
for(i in 1:5){
   X <- ungroup(top5) %>%
    filter(type == as.character(top5type[i,]))
  trendline[[i]] <- ggplot(X,aes(x=month,y =count))+
    geom_text(aes(label = count),size = 3.5) +
    geom_line(colour = "darkred",size = 1,alpha=0.8) +
    scale_x_continuous(breaks = 1:12) +
    theme_bw() +
    ggtitle(paste("104年",as.character(top5type[i,]),"趨勢線")) +
    theme(text = element_text(family = "A"))
  print(trendline[[i]] )

}
#產生前5種類的trendllines

# 交通違規總數的月份趨勢圖：
month <- dat %>%
  filter(year == 104) %>%
  group_by(month) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))
ggplot(month,aes(x=month,y =count))+
  geom_text(aes(label = count),size = 3.5) +
  geom_line(colour = "darkred",size = 1,alpha=0.8) +
  scale_x_continuous(breaks = 1:12) +
  theme_bw() +
  ggtitle(paste("104年交通違規總數趨勢線"))  +
  theme(text = element_text(family = "A"))

# 針對車種的各月份趨勢圖：
ggplot(vtype_month,aes(x=month,y =count,colour=vehicle_type))+
  geom_text(aes(label = count),size = 3.5) +
  geom_line(size = 1,alpha=0.8) +
  scale_x_continuous(breaks = 1:12) +
  theme_bw() +
  ggtitle(paste("104年交通違規車種趨勢線")) +
  theme(text = element_text(family = "A"))

ggplot(method_month,aes(x=month,y =count,colour=method))+
  geom_text(aes(label = count),size = 3.5) +
  geom_line(size = 1,alpha=0.8) +
  scale_x_continuous(breaks = 1:12) +
  theme_bw() +
  ggtitle(paste("104年交通違規舉發方式趨勢線")) +
    theme(text = element_text(family = "A"))

month <- dat %>%
  filter(year == 104) %>%
  group_by(month) %>%
  summarise(count = sum(count)) %>%
  arrange(desc(count))

# 最後，我們針對前五種類的舉發種類與各月份做了熱度圖：
ggplot(ungroup(top5),aes(x=factor(month), y=factor(type), fill=count)) +
 geom_tile(color="white", size=0.1) +
 scale_fill_viridis(name="件數")+ #使用viridis
 coord_equal()+
 labs(x=NULL, y=NULL, title="104年交通違規熱度圖")+
 theme_tufte(base_family="Helvetica")+ #使用ggthemes
 theme(plot.title=element_text(hjust=0))+
 theme(axis.ticks=element_blank())+
 theme(axis.text=element_text(size=7))+
 theme(legend.title=element_text(size=8))+
 theme(legend.text=element_text(size=6)) +
  theme(text = element_text(family = "A"))

3 Exercise 3

Task: Use the free recall data to improve on the figure reported in Murdock, B. B. (1962). The serial position effect of free recall. Journal of Experimental Psychology, 64, 482-488.

3.1 Data Input

untar("Murd62.data.tgz")

dat10_2 <- read.table("Murd62/fr10-2.txt", sep = "", col.names = c(1:10), fill = T, nrows = 1200)
dat10_2["Group"] = "10-2"

dat15_2 <- read.table("Murd62/fr15-2.txt", sep = "", col.names = c(1:15), fill = T)
dat15_2["Group"] = "15-2"

dat20_1 <- read.table("Murd62/fr20-1.txt", sep = "", col.names = c(1:20), fill = T)
dat20_1["Group"] = "20-1"

dat20_2 <- read.table("Murd62/fr20-2.txt", sep = "", col.names = c(1:20), fill = T)
dat20_2["Group"] = "20-2"

dat30_1 <- read.table("Murd62/fr30-1.txt", sep = "", col.names = c(1:30), fill = T)
dat30_1["Group"] = "30-1"

dat40_1 <- read.table("Murd62/fr40-1.txt", sep = "", col.names = c(1:40), fill = T)
dat40_1["Group"] = "40-1"

dat_recall <- list(dat10_2, dat15_2, dat20_1, dat20_2, dat30_1, dat40_1)

dat3 <- lapply(dat_recall, 
               function(x) {y <- x |> 
                            tidyr::pivot_longer(cols = starts_with("X"),
                                                names_to = "Serial",
                                                values_to = "Item")
                            y <- as.data.frame(table(y$Item)/1200) |> 
                            dplyr::mutate(Group = x$Group[1])
                            } ) |>
                            dplyr::bind_rows()

dat3$Group <- factor(dat3$Group)

# remove "88"
dat3_re <- dat3[!dat3$Var1 == "88", ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "16" & dat3_re$Group == "15-2"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "31" & dat3_re$Group == "30-1"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "41" & dat3_re$Group == "40-1"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "50" & dat3_re$Group == "40-1"), ]

dat3_re<-dat3_re|>
  mutate(Var1 = as.numeric(Var1),
         effectgroup=ifelse(Freq>=0.5 & Var1>5,"Recency effect", 
                            ifelse(Freq>=0.5 & Var1<5, "Primacy effect", "none")))
dat3_re<-dat3_re|>
  mutate(effectgroup=factor(effectgroup, levels=c("Primacy effect",
                                                  "none",
                                                  "Recency effect")))

3.2 Plot (group)

ggplot(dat3_re, aes(x=Var1, y= Freq, group=Group, color=Group))+
  geom_point()+
  geom_line()+
  ggrepel::geom_text_repel(data=dat3_re %>% filter(Freq>0.9), # Filter data first
                           aes(label=Group)) +
#  geom_label(data=dat3_re %>% filter(Freq>0.9), # Filter data first
#    aes(label=Group))+
  scale_x_continuous(breaks=c(0, 10, 15, 20, 30, 40))+
  scale_color_manual(name = "", values = c("gray10", 
                                           "gray20", 
                                           "gray30", 
                                           "gray40", 
                                           "gray50", 
                                           "gray60"))+
  labs(x = "Serial position", y = "Proportion of recall")+
  theme_minimal()+
  theme(legend.position = "")

3.3 Plot (effect)

color by effect

ggplot(dat3_re, aes(x=Var1, y= Freq, group=Group, color=effectgroup))+
  geom_point()+
  geom_line()+
  ggrepel::geom_text_repel(data=dat3_re %>% filter(Freq>0.9), # Filter data first
                           aes(label=Group),
                           color="black") +
#  geom_label(data=dat3_re %>% filter(Freq>0.9), # Filter data first
#    aes(label=Group))+
  scale_x_continuous(breaks=c(0, 10, 15, 20, 30, 40))+
  scale_color_manual(name = "Effect type", 
                     values = c("pink","gray80", "tomato"))+
  labs(x = "Serial position", y = "Proportion of recall")+
  theme_minimal()+
  theme(legend.position = "bottom")

4 Exercise 4

Sarah Leo at the Economist magazine published a data set to accompany the story about how scientific publishing is dominated by men.

Task: The plot on the left panel below is the orignal graph that appeared in the article. Help her find a better plot.

4.1 Data Input

dat <- read.csv("Economist_women-research.csv", header = T)
# remove the extra information
dat <- dat[-c(1, 14:19),]

# variable rename
colnames(dat) <- c("Country", "Health", "Physical", "Engineering", "ComputerMaths", "Inventor")

# reset row name
rownames(dat) <- NULL

knitr::kable(dat)

Country	Health	Physical	Engineering	ComputerMaths	Inventor
Japan	0.24	0.11	0.11	0.11	0.08
Chile	0.43	0.23	0.22	0.16	0.19
United Kingdom	0.45	0.21	0.22	0.21	0.12
United States	0.46	0.2	0.22	0.22	0.14
Mexico	0.46	0.25	0.26	0.22	0.18
Denmark	0.47	0.22	0.23	0.18	0.13
EU28	0.48	0.25	0.25	0.22	0.12
France	0.48	0.24	0.25	0.22	0.17
Canada	0.49	0.21	0.22	0.22	0.13
Australia	0.5	0.23	0.25	0.24	0.12
Brazil	0.57	0.33	0.32	0.24	0.19
Portugal	0.57	0.37	0.36	0.27	0.26

dat_l <- dat|>
  pivot_longer(cols = c(2:6), # wide to long 
                      names_to = "Field", 
                      values_to = "Female") |>
  mutate(Female=as.numeric(Female),
         Male=1-Female) |>
  pivot_longer(cols = c(3:4), # wide to long 
                      names_to = "Gender", 
                      values_to = "Proportion")|>
  mutate(Field=factor(Field, levels=c("Health",
                                      "Physical",
                                      "Engineering",
                                      "ComputerMaths",
                                      "Inventor"),
                      labels = c("Health sciences", 
                                 "Physical sciences", 
                                 "Engineering",
                                 "Computer science and maths", 
                                 "Inventores (patent applications)")),
         Gender=factor(Gender, levels=c("Male",
                                        "Female"),
                       labels = c("Men","Women")),
         Country=factor(Country, levels=c("Portugal",
                                          "Brazil",
                                          "Australia",
                                          "Canada",
                                          "EU28", 
                                          "France",
                                          "Denmark",
                                          "Mexico",
                                          "United States",
                                          "United Kingdom",
                                          "Chile",
                                          "Japan")))

4.2 Plot

windowsFonts("Arial Narrow"=windowsFont("Arial Narrow"))

ggplot(dat_l, aes(x = Proportion, y = forcats::fct_rev(Country), fill = Gender, width=.5)) +
  geom_col(position = "fill")+
  scale_fill_manual(name = "Gender", 
                    values = c("tomato", "pink"))+
  geom_vline(xintercept = 0.5,color = "firebrick", lwd = 0.5, lty = 1) +
  geom_vline(xintercept = 0.25,color = "firebrick4", lwd = 0.5, lty = 3) +
  facet_wrap(.~Field, nrow=1)+
  labs(x="Proportion (%)", 
       y="", 
       title=expression(paste(bold("Still a man's world"))),
       subtitle = "Women among researchers with papers published* (index in Scopus) in 2011-2015",
       caption = expression(paste(' Source: "Gender in the Global Research Landscape" by Elsevier;',
                                  italic("The Economist"))))+
  hrbrthemes::theme_ipsum()+
  theme(legend.position = "top",
        axis.text.x = element_text(size = 6),
        strip.text.x = element_text(size = 7.5))+
  guides(fill=guide_legend(ncol = 2, reverse = TRUE))

4.3 Odds

m1<-glm(Gender~Country+Field+Proportion, data=dat_l, family = binomial(link = "logit"))

dat_l <- dat_l|>
  mutate(odd=Proportion/(1-Proportion),
         fit=m1$fitted.values, 
         resid=m1$residuals)

ggplot(subset(dat_l, Gender=="Women"), aes(x = odd, y = forcats::fct_rev(Country), color=Field)) +
  geom_vline(xintercept = 1,color = "firebrick", lwd = 0.5, lty = 1) +
  geom_point(size=1.5)+
  labs(x="Odds(Women to Men)", 
       y="Country", 
       title=expression(paste(bold("Still a man's world"))),
       subtitle = "Women among researchers with papers published* (index in Scopus) in 2011-2015",
       caption = expression(paste(' Source: "Gender in the Global Research Landscape" by Elsevier;',
                                  italic("The Economist"))))+
  hrbrthemes::theme_ipsum()+
  theme(legend.position = "top")+
  guides(color=guide_legend(ncol = 5, reverse = T))

20211129 Exercises

Chang-Chun Chen

2021-12-05