Project: Global Technology Survey
1.Background
项目工作主要为问卷数据清洗及后续的可视化,问卷分为两份,分别针对Occupier和Investor发放
2.Dorling cartogram:全球分布
> library(tidyverse)
> library(readxl)
> Oc_Global <- read_xlsx('/Users/hufeiran/Desktop/Data\ Analysis\ protfolio/Global\ Technology\ Survey/JLL\ Technology\ Survey\ 2023\ -\ final\ raw\ dataset.xlsx', sheet = 'Occupiers_full')
> head(Oc_Global)
# A tibble: 6 × 314
`Data source` RecipientLastName RecipientFirstName RecipientEmail
<chr> <lgl> <lgl> <lgl>
1 MW-sourced NA NA NA
2 MW-sourced NA NA NA
3 MW-sourced NA NA NA
4 MW-sourced NA NA NA
5 MW-sourced NA NA NA
6 MW-sourced NA NA NA
# ℹ 310 more variables: RecordedDate <dttm>, UserLanguage <chr>, Q1A <chr>,
# Q1A_14_TEXT <lgl>, Country <chr>, Employees <chr>, Function <chr>,
# Role <chr>, Q4_10_TEXT <chr>, Q4_11_TEXT <lgl>, Q5_5 <chr>, Q5_18 <chr>,
# Q5_19 <chr>, Q5_20 <chr>, Q5_21 <chr>, Q5_6 <lgl>, `Total employees` <chr>,
# `Total footprint` <chr>, `Overall CRE budget` <chr>,
# `CRE technology budget` <chr>, Q7_1_1 <chr>, Q7_1_5 <chr>, Q7_1_6 <chr>,
# Q7_1_7 <chr>, Q7_1_8 <chr>, Q7_1_9 <chr>, Q7_1_10 <chr>, Q7_1_11 <chr>, …2.1 Occupier全球分布
> library(janitor)
> Oc_country <- Oc_Global %>%
+ tabyl(Country) %>%
+ adorn_totals('row') %>%
+ adorn_pct_formatting()
>
> Oc_country <- Oc_country %>%
+ mutate(iso_a3 = case_when(
+ Country == 'Albania' ~ 'ALB',
+ Country == 'Australia' ~ 'AUS',
+ Country == 'Austria' ~ 'AUT',
+ Country == 'Brazil' ~ 'BRA',
+ Country == 'Canada' ~ 'CAN',
+ Country == 'China' ~ 'CHN',
+ Country == 'Finland' ~ 'FIN',
+ Country == 'France' ~ 'FRA',
+ Country == 'Germany' ~ 'DEU',
+ Country == 'India' ~ 'IND',
+ Country == 'Japan' ~ 'JPN',
+ Country == 'Netherlands' ~ 'NLD',
+ Country == 'Singapore' ~ 'SGP',
+ Country == 'United Arab Emirates' ~ 'ARE',
+ Country == 'United Kingdom of Great Britain and Northern Ireland' ~ 'GBR',
+ Country == 'United States of America' ~ 'USA',
+ )) %>%
+ select(Oc_count = n, iso_a3, Country) %>%
+ filter(!(Country == 'Total'))
> Oc_country
Oc_count iso_a3 Country
1 ALB Albania
42 AUS Australia
1 AUT Austria
1 BRA Brazil
48 CAN Canada
50 CHN China
1 FIN Finland
58 FRA France
60 DEU Germany
36 IND India
49 JPN Japan
1 NLD Netherlands
24 SGP Singapore
1 ARE United Arab Emirates
61 GBR United Kingdom of Great Britain and Northern Ireland
166 USA United States of America2.3 将简单地图替换为dorling地图
朵林卡图法(Dorling cartogram)是一种世界地图制图方法,通过面积变形来可视化统计数据分布。其主要特点:将国家或地区描绘为大小不等的圆形。圆形面积代表某项统计数据的大小,如人口、GDP等。圆形位置代表地理分布,但不完全遵循地理比例。相邻圆形会稍微重叠,代表相邻关系。颜色也可表示分类或聚类信息。
> library(cartogram)
> dorl_Oc <- cartogram_dorling(
+ map_Oc, weight = 'Oc_count', k = 5,
+ m_weight = 1, itermax = 100
+ )> # 颜色设置
> col_world <- "#9CB4BF"
> col_back <- "#1D201F"
>
> # 主题设置
> theme_custom <- theme_void()+
+ theme(plot.background = element_rect(fill = col_back,color = NA))
>
> ggplot()+
+ # 世界底图
+ geom_sf(
+ world, mapping = aes(geometry = geometry),
+ fill = col_world, color = alpha("dimgrey", 0.25)
+ )+
+ # Dorling cartogram
+ geom_sf(
+ dorl_Oc, mapping=aes(geometry = geometry),
+ fill = alpha("#D0355D",0.75),color=alpha("white",0.2)
+ )+
+ theme_custom> # 计算图中每个圆环的面积和半径
> dorl_Oc <- dorl_Oc %>%
+ mutate(
+ # Compute area
+ ar=as.numeric(st_area(dorl_Oc)),
+ # Compute radius based on area
+ rad=as.numeric(sqrt(ar/pi))
+ )
>
> # 提取每个圆的质心
> centr <- dorl_Oc %>%
+ st_centroid() %>%
+ st_coordinates()
>
> # 组合数据
> dorl2 <- tibble(dorl_Oc, X=centr[,1], Y=centr[,2]) %>%
+ arrange(-Oc_count)> ggplot()+
+ # 世界底图
+ geom_sf(
+ world,mapping=aes(geometry=geometry),
+ fill=col_world,color=alpha("dimgrey",0.25)
+ )+
+ # 使用geom_circle()绘制dorling图
+ ggforce::geom_circle(
+ data = dorl2, aes(x0 = X, y0 = Y, r = rad),
+ fill=alpha("#D0355D",0.75), color=alpha("#D0355D",0.2)
+ )+
+ theme_custom+
+ geom_text(
+ data = dorl2,
+ aes(x=X,y=Y, label=paste0(Country,'(',Oc_count,')'), size=Oc_count),
+ color="white"
+ )+
+ scale_size(range=c(2,4))+
+ guides(size='none')3.哑铃图可视化
Question:过去12个月中,领先的公司在实现以下领域的科技项目目标方面有多成功(Occupier & Investor)
3.1原始数据清洗
> # 选项字符串获取
> Oc_Q23_Text <- read_xlsx('/Users/hufeiran/Desktop/Data\ Analysis\ protfolio/Global\ Technology\ Survey/JLL\ Technology\ Survey\ 2023\ -\ final\ raw\ dataset.xlsx', sheet = 'occupier_keybook') %>%
+ select(c('Data source','Text')) %>%
+ filter(grepl('Q23_[[:digit:]]', `Data source`)) %>%
+ rowwise() %>%
+ mutate(
+ options = unlist(strsplit(`Text`, ' - '))[2]
+ )
> Oc_Q23_Text
# A tibble: 10 × 3
# Rowwise:
`Data source` Text options
<chr> <chr> <chr>
1 Q23_1 "How successful have you been in meeting your goals or… Direct…
2 Q23_2 "How successful have you been in meeting your goals or… Increa…
3 Q23_3 "How successful have you been in meeting your goals or… Custom…
4 Q23_4 "How successful have you been in meeting your goals or… Incide…
5 Q23_5 "How successful have you been in meeting your goals or… Improv…
6 Q23_6 "How successful have you been in meeting your goals or… Enhanc…
7 Q23_7 "How successful have you been in meeting your goals or… Improv…
8 Q23_8 "How successful have you been in meeting your goals or… Develo…
9 Q23_9 "How successful have you been in meeting your goals or… User a…
10 Q23_10 "How successful have you been in meeting your goals or… [QID14…> # Global‘Highly successful’计算
> library(janitor)
> # 筛选出以‘Q23’开头的列
> Oc_Global_Q23 <- Oc_Global %>%
+ select(starts_with('Q23'))
> # 计算成功率列联表
> Global_success <- map_df(Oc_Global_Q23, tabyl)
> # 赋值
> Oc_Global_Q23 <- Oc_Global_Q23 %>%
+ slice(1:6)
> f <- sort(rep(c(1:10),6))
> for (i in 1:10){
+ Oc_Global_Q23[paste0('Q23_',i)] = split(Global_success$percent,f)[i]
+ }
> row.names(Oc_Global_Q23) = c("Don't know",'Highly successful','Not very successful',
+ 'Somewhat successful','Successful','NA')
> colnames(Oc_Global_Q23) <- Oc_Q23_Text$options
> # 筛选出‘Highly successful’的数据并进行转置
> Oc_Global_Q23 <- Oc_Global_Q23 %>%
+ select(1:9) %>%
+ slice(2) %>%
+ pivot_longer(cols = 1:9,names_to = 'options',
+ values_to = 'Highly successful Global')
> Oc_Global_Q23
# A tibble: 9 × 2
options Highly successful Gl…¹
<chr> <dbl>
1 Direct monetary benefits (cost savings or increased re… 0.318
2 Increased employee productivity 0.3
3 Customer / employee engagement & satisfaction 0.325
4 Incident response time 0.217
5 Improved energy efficiency or carbon emissions perform… 0.275
6 Enhanced data reporting & analytics capabilities 0.307
7 Improved portfolio utilization & efficiency 0.255
8 Development of new external or internal tools or produ… 0.233
9 User adoption & retention rates per product 0.265
# ℹ abbreviated name: ¹`Highly successful Global`> # China‘Highly successful’计算
> library(janitor)
> # 筛选出以‘Q23’开头的列
> Oc_China_Q23 <- Oc_Global %>%
+ filter(Country == 'China') %>%
+ select(starts_with('Q23'))
> # 计算成功率列联表
> China_success <- map_df(Oc_China_Q23, tabyl) %>%
+ filter(`.x[[i]]` == 'Highly successful')
> # 赋值
> for (k in 1:10){
+ Oc_China_Q23 <- Oc_China_Q23 %>%
+ slice(c(1))
+ Oc_China_Q23[paste0('Q23_',k)] = China_success$percent[k]
+ }
> colnames(Oc_China_Q23) <- Oc_Q23_Text$options
> Oc_China_Q23 <- Oc_China_Q23 %>%
+ select(1:9) %>%
+ pivot_longer(cols = 1:9, names_to = 'options', values_to = 'Highly successful China')
> Oc_China_Q23
# A tibble: 9 × 2
options Highly successful Ch…¹
<chr> <dbl>
1 Direct monetary benefits (cost savings or increased re… 0.24
2 Increased employee productivity 0.3
3 Customer / employee engagement & satisfaction 0.3
4 Incident response time 0.22
5 Improved energy efficiency or carbon emissions perform… 0.24
6 Enhanced data reporting & analytics capabilities 0.18
7 Improved portfolio utilization & efficiency 0.28
8 Development of new external or internal tools or produ… 0.18
9 User adoption & retention rates per product 0.32
# ℹ abbreviated name: ¹`Highly successful China`> # Global 与 China数据合并
> Oc_success <- Oc_Global_Q23 %>%
+ mutate(`Highly successful China` = Oc_China_Q23$`Highly successful China`)
> Oc_success
# A tibble: 9 × 3
options Highly successful Gl…¹ Highly successful Ch…²
<chr> <dbl> <dbl>
1 Direct monetary benefits (cost … 0.318 0.24
2 Increased employee productivity 0.3 0.3
3 Customer / employee engagement … 0.325 0.3
4 Incident response time 0.217 0.22
5 Improved energy efficiency or c… 0.275 0.24
6 Enhanced data reporting & analy… 0.307 0.18
7 Improved portfolio utilization … 0.255 0.28
8 Development of new external or … 0.233 0.18
9 User adoption & retention rates… 0.265 0.32
# ℹ abbreviated names: ¹`Highly successful Global`, ²`Highly successful China`> # 绘图
> # 前置参数
> breaks1 <- 1:length(unique(Oc_success$options))
> labels1 <- Oc_success %>% pull(options) %>% unique()
> par1 <- "#BCDEE6"
> par2 <- "#003E51"> # 字体
> library(showtext)
> showtext_auto(enable = TRUE)
> options(encoding = 'UTF-8')
> # ggplot绘图
> Oc_success %>%
+ mutate(options = options %>% as_factor(),
+ y = options %>% as.numeric(),
+ `Highly successful Global` = `Highly successful Global` *100,
+ `Highly successful China` = `Highly successful China` *100
+ ) %>%
+ ggplot(aes(y = y))+
+ geom_vline(xintercept = 0, size = 2, alpha = .5, color = "grey50")+ # x = 0零值线
+ geom_segment(aes(x = `Highly successful China`, xend = `Highly successful Global`, yend = y))+ # 点之间连线
+ geom_point(aes(x = `Highly successful China`), shape = 16, size = 4, fill = par1)+
+ geom_point(aes(x = `Highly successful Global`), shape = 21, size = 4, fill = 'white')+
+ scale_color_manual()+
+ scale_y_continuous(breaks = breaks1, labels = labels1, expand = c(.01, .01))+
+ theme_minimal(base_family = 'wqy-microhei', base_size = 16)+ # 字体格式
+ theme(legend.position = "none",
+ panel.grid.minor.y = element_blank(),
+ panel.grid.major.y = element_line(size = 4, color = "grey95"),
+ axis.text.y = element_text(vjust = .3, size = 12))+
+ labs(x = "Change of successful rate between Global and China, %", y = NULL)+
+ # 添加其他注释
+ annotate("rect", xmin = 50, xmax = 60, ymin = 1, ymax = 3,
+ color = "grey50", fill = "white")+
+ annotate("text", x = 55, y = 2.5, label = "LEGEND",
+ size = 4, hjust = .5, family = 'wqy-microhei', color = "grey20")+
+ annotate("text", x = 55, y = 2, label = "Successful rate between Global and China",
+ size = 2, hjust = .5, family = 'wqy-microhei', color = "grey20")+
+ annotate("point", x = c(53, 56), y = 1.5,
+ pch = c(16, 21), size = 2, color = 1)+
+ annotate("text", x = c(54, 57), y = 1.5,
+ label = c("China", "Global"),
+ size = 2, hjust = 0, family = 'wqy-microhei', color = "grey20")