Project: Global Technology Survey

1.Background

项目工作主要为问卷数据清洗及后续的可视化，问卷分为两份，分别针对Occupier和Investor发放

2.Dorling cartogram：全球分布

> library(tidyverse)
> library(readxl)
> Oc_Global <- read_xlsx('/Users/hufeiran/Desktop/Data\ Analysis\ protfolio/Global\ Technology\ Survey/JLL\ Technology\ Survey\ 2023\ -\ final\ raw\ dataset.xlsx', sheet = 'Occupiers_full')
> head(Oc_Global)
# A tibble: 6 × 314
  `Data source` RecipientLastName RecipientFirstName RecipientEmail
  <chr>         <lgl>             <lgl>              <lgl>         
1 MW-sourced    NA                NA                 NA            
2 MW-sourced    NA                NA                 NA            
3 MW-sourced    NA                NA                 NA            
4 MW-sourced    NA                NA                 NA            
5 MW-sourced    NA                NA                 NA            
6 MW-sourced    NA                NA                 NA            
# ℹ 310 more variables: RecordedDate <dttm>, UserLanguage <chr>, Q1A <chr>,
#   Q1A_14_TEXT <lgl>, Country <chr>, Employees <chr>, Function <chr>,
#   Role <chr>, Q4_10_TEXT <chr>, Q4_11_TEXT <lgl>, Q5_5 <chr>, Q5_18 <chr>,
#   Q5_19 <chr>, Q5_20 <chr>, Q5_21 <chr>, Q5_6 <lgl>, `Total employees` <chr>,
#   `Total footprint` <chr>, `Overall CRE budget` <chr>,
#   `CRE technology budget` <chr>, Q7_1_1 <chr>, Q7_1_5 <chr>, Q7_1_6 <chr>,
#   Q7_1_7 <chr>, Q7_1_8 <chr>, Q7_1_9 <chr>, Q7_1_10 <chr>, Q7_1_11 <chr>, …

2.1 Occupier全球分布

> library(janitor)
> Oc_country <- Oc_Global %>%
+                 tabyl(Country) %>%
+                 adorn_totals('row') %>%
+                 adorn_pct_formatting()
> 
> Oc_country <- Oc_country %>%
+   mutate(iso_a3 = case_when(
+     Country == 'Albania' ~ 'ALB',
+     Country == 'Australia' ~ 'AUS',
+     Country == 'Austria' ~ 'AUT',
+     Country == 'Brazil' ~ 'BRA',
+     Country == 'Canada' ~ 'CAN',
+     Country == 'China' ~ 'CHN',
+     Country == 'Finland' ~ 'FIN',
+     Country == 'France' ~ 'FRA',
+     Country == 'Germany' ~ 'DEU',
+     Country == 'India' ~ 'IND',
+     Country == 'Japan' ~ 'JPN',
+     Country == 'Netherlands' ~ 'NLD',
+     Country == 'Singapore' ~ 'SGP',
+     Country == 'United Arab Emirates' ~ 'ARE',
+     Country == 'United Kingdom of Great Britain and Northern Ireland' ~ 'GBR',
+     Country == 'United States of America' ~ 'USA',
+   )) %>%
+   select(Oc_count = n, iso_a3, Country) %>%
+   filter(!(Country == 'Total'))
> Oc_country
 Oc_count iso_a3                                              Country
        1    ALB                                              Albania
       42    AUS                                            Australia
        1    AUT                                              Austria
        1    BRA                                               Brazil
       48    CAN                                               Canada
       50    CHN                                                China
        1    FIN                                              Finland
       58    FRA                                               France
       60    DEU                                              Germany
       36    IND                                                India
       49    JPN                                                Japan
        1    NLD                                          Netherlands
       24    SGP                                            Singapore
        1    ARE                                 United Arab Emirates
       61    GBR United Kingdom of Great Britain and Northern Ireland
      166    USA                             United States of America

> library(rnaturalearth)
> library(sf)
> library(cartogram)
> library(ggforce)
> # 准备绘图数据集
> world <- ne_countries(scale = 110, type = "countries", returnclass = "sf")%>%
+   # Convert WGS84 to projected crs (here Robinson)
+   sf::st_transform(world_ne, crs="ESRI:54030")

> map_Oc <- world %>%
+   left_join(Oc_country) %>%
+   drop_na(Oc_count)

2.2 制作简单地图，显示Occupier的地区分布

> ggplot(map_Oc, aes(fill=Oc_count))+
+   geom_sf()

2.3 将简单地图替换为dorling地图

朵林卡图法(Dorling cartogram)是一种世界地图制图方法,通过面积变形来可视化统计数据分布。其主要特点:将国家或地区描绘为大小不等的圆形。圆形面积代表某项统计数据的大小,如人口、GDP等。圆形位置代表地理分布,但不完全遵循地理比例。相邻圆形会稍微重叠,代表相邻关系。颜色也可表示分类或聚类信息。

> library(cartogram)
> dorl_Oc <- cartogram_dorling(
+   map_Oc, weight = 'Oc_count', k = 5,
+   m_weight = 1, itermax = 100
+ )

> # 颜色设置
> col_world <- "#9CB4BF"
> col_back <- "#1D201F"
> 
> # 主题设置
> theme_custom <- theme_void()+
+   theme(plot.background = element_rect(fill = col_back,color = NA))
> 
> ggplot()+
+   # 世界底图
+   geom_sf(
+     world, mapping = aes(geometry = geometry),
+     fill = col_world, color = alpha("dimgrey", 0.25)
+   )+
+   # Dorling cartogram
+   geom_sf(
+     dorl_Oc, mapping=aes(geometry = geometry),
+     fill = alpha("#D0355D",0.75),color=alpha("white",0.2)
+   )+
+   theme_custom

> # 计算图中每个圆环的面积和半径
> dorl_Oc <- dorl_Oc %>%
+   mutate(
+     # Compute area
+     ar=as.numeric(st_area(dorl_Oc)),
+     # Compute radius based on area
+     rad=as.numeric(sqrt(ar/pi))
+   )
> 
> # 提取每个圆的质心
> centr <- dorl_Oc %>%
+   st_centroid() %>%
+   st_coordinates()
> 
> # 组合数据
> dorl2 <- tibble(dorl_Oc, X=centr[,1], Y=centr[,2]) %>%
+   arrange(-Oc_count)

> ggplot()+
+   # 世界底图
+   geom_sf(
+     world,mapping=aes(geometry=geometry),
+     fill=col_world,color=alpha("dimgrey",0.25)
+   )+
+   # 使用geom_circle()绘制dorling图
+   ggforce::geom_circle(
+     data = dorl2, aes(x0 = X, y0 = Y, r = rad),
+     fill=alpha("#D0355D",0.75), color=alpha("#D0355D",0.2)
+   )+
+   theme_custom+
+   geom_text(
+     data = dorl2,
+     aes(x=X,y=Y, label=paste0(Country,'(',Oc_count,')'), size=Oc_count),
+     color="white"
+   )+
+   scale_size(range=c(2,4))+
+   guides(size='none')

3.哑铃图可视化

Question:过去12个月中，领先的公司在实现以下领域的科技项目目标方面有多成功（Occupier & Investor）

3.1原始数据清洗

> # 选项字符串获取
> Oc_Q23_Text <- read_xlsx('/Users/hufeiran/Desktop/Data\ Analysis\ protfolio/Global\ Technology\ Survey/JLL\ Technology\ Survey\ 2023\ -\ final\ raw\ dataset.xlsx', sheet = 'occupier_keybook') %>%
+   select(c('Data source','Text')) %>%
+   filter(grepl('Q23_[[:digit:]]', `Data source`)) %>%
+   rowwise() %>%
+   mutate(
+     options = unlist(strsplit(`Text`, ' - '))[2]
+   )
> Oc_Q23_Text
# A tibble: 10 × 3
# Rowwise: 
   `Data source` Text                                                    options
   <chr>         <chr>                                                   <chr>  
 1 Q23_1         "How successful have you been in meeting your goals or… Direct…
 2 Q23_2         "How successful have you been in meeting your goals or… Increa…
 3 Q23_3         "How successful have you been in meeting your goals or… Custom…
 4 Q23_4         "How successful have you been in meeting your goals or… Incide…
 5 Q23_5         "How successful have you been in meeting your goals or… Improv…
 6 Q23_6         "How successful have you been in meeting your goals or… Enhanc…
 7 Q23_7         "How successful have you been in meeting your goals or… Improv…
 8 Q23_8         "How successful have you been in meeting your goals or… Develo…
 9 Q23_9         "How successful have you been in meeting your goals or… User a…
10 Q23_10        "How successful have you been in meeting your goals or… [QID14…

> # Global‘Highly successful’计算
> library(janitor)
> # 筛选出以‘Q23’开头的列
> Oc_Global_Q23 <- Oc_Global %>%
+   select(starts_with('Q23'))
> # 计算成功率列联表
> Global_success <- map_df(Oc_Global_Q23, tabyl)
> # 赋值
> Oc_Global_Q23 <- Oc_Global_Q23 %>%
+   slice(1:6)
> f <- sort(rep(c(1:10),6))
> for (i in 1:10){
+   Oc_Global_Q23[paste0('Q23_',i)] = split(Global_success$percent,f)[i]
+ }
> row.names(Oc_Global_Q23) = c("Don't know",'Highly successful','Not very successful',
+                    'Somewhat successful','Successful','NA')
> colnames(Oc_Global_Q23) <- Oc_Q23_Text$options
> # 筛选出‘Highly successful’的数据并进行转置
> Oc_Global_Q23 <- Oc_Global_Q23 %>%
+   select(1:9) %>%
+   slice(2) %>%
+   pivot_longer(cols = 1:9,names_to = 'options',
+             values_to = 'Highly successful Global')
> Oc_Global_Q23
# A tibble: 9 × 2
  options                                                 Highly successful Gl…¹
  <chr>                                                                    <dbl>
1 Direct monetary benefits (cost savings or increased re…                  0.318
2 Increased employee productivity                                          0.3  
3 Customer / employee engagement & satisfaction                            0.325
4 Incident response time                                                   0.217
5 Improved energy efficiency or carbon emissions perform…                  0.275
6 Enhanced data reporting & analytics capabilities                         0.307
7 Improved portfolio utilization & efficiency                              0.255
8 Development of new external or internal tools or produ…                  0.233
9 User adoption & retention rates per product                              0.265
# ℹ abbreviated name: ¹`Highly successful Global`

> # China‘Highly successful’计算
> library(janitor)
> # 筛选出以‘Q23’开头的列
> Oc_China_Q23 <- Oc_Global %>%
+   filter(Country == 'China') %>%
+   select(starts_with('Q23'))
> # 计算成功率列联表
> China_success <- map_df(Oc_China_Q23, tabyl) %>%
+   filter(`.x[[i]]` == 'Highly successful')
> # 赋值
> for (k in 1:10){
+   Oc_China_Q23 <- Oc_China_Q23 %>%
+     slice(c(1)) 
+   Oc_China_Q23[paste0('Q23_',k)] = China_success$percent[k]
+ }
> colnames(Oc_China_Q23) <- Oc_Q23_Text$options
> Oc_China_Q23 <- Oc_China_Q23 %>%
+   select(1:9) %>%
+   pivot_longer(cols = 1:9, names_to = 'options', values_to = 'Highly successful China')
> Oc_China_Q23
# A tibble: 9 × 2
  options                                                 Highly successful Ch…¹
  <chr>                                                                    <dbl>
1 Direct monetary benefits (cost savings or increased re…                   0.24
2 Increased employee productivity                                           0.3 
3 Customer / employee engagement & satisfaction                             0.3 
4 Incident response time                                                    0.22
5 Improved energy efficiency or carbon emissions perform…                   0.24
6 Enhanced data reporting & analytics capabilities                          0.18
7 Improved portfolio utilization & efficiency                               0.28
8 Development of new external or internal tools or produ…                   0.18
9 User adoption & retention rates per product                               0.32
# ℹ abbreviated name: ¹`Highly successful China`

> # Global 与 China数据合并
> Oc_success <- Oc_Global_Q23 %>%
+   mutate(`Highly successful China` = Oc_China_Q23$`Highly successful China`)
> Oc_success
# A tibble: 9 × 3
  options                          Highly successful Gl…¹ Highly successful Ch…²
  <chr>                                             <dbl>                  <dbl>
1 Direct monetary benefits (cost …                  0.318                   0.24
2 Increased employee productivity                   0.3                     0.3 
3 Customer / employee engagement …                  0.325                   0.3 
4 Incident response time                            0.217                   0.22
5 Improved energy efficiency or c…                  0.275                   0.24
6 Enhanced data reporting & analy…                  0.307                   0.18
7 Improved portfolio utilization …                  0.255                   0.28
8 Development of new external or …                  0.233                   0.18
9 User adoption & retention rates…                  0.265                   0.32
# ℹ abbreviated names: ¹`Highly successful Global`, ²`Highly successful China`

> # 绘图
> # 前置参数
> breaks1 <- 1:length(unique(Oc_success$options))
> labels1 <- Oc_success %>% pull(options) %>% unique()
> par1 <- "#BCDEE6"
> par2 <- "#003E51"

> # 字体
> library(showtext)
> showtext_auto(enable = TRUE)
> options(encoding = 'UTF-8')
> # ggplot绘图
> Oc_success %>%
+   mutate(options = options %>% as_factor(),
+          y = options %>% as.numeric(),
+          `Highly successful Global` = `Highly successful Global` *100,
+          `Highly successful China` = `Highly successful China` *100
+          ) %>%
+ ggplot(aes(y = y))+
+    geom_vline(xintercept = 0, size = 2, alpha = .5, color = "grey50")+ # x = 0零值线
+    geom_segment(aes(x = `Highly successful China`, xend = `Highly successful Global`, yend = y))+ # 点之间连线
+    geom_point(aes(x = `Highly successful China`), shape = 16, size = 4, fill = par1)+
+    geom_point(aes(x = `Highly successful Global`), shape = 21, size = 4, fill = 'white')+
+    scale_color_manual()+
+    scale_y_continuous(breaks = breaks1, labels = labels1, expand = c(.01, .01))+
+    theme_minimal(base_family = 'wqy-microhei', base_size = 16)+ # 字体格式
+    theme(legend.position = "none", 
+         panel.grid.minor.y = element_blank(),
+         panel.grid.major.y = element_line(size = 4, color = "grey95"),
+         axis.text.y = element_text(vjust = .3, size = 12))+
+    labs(x = "Change of successful rate between Global and China, %", y = NULL)+
+ # 添加其他注释
+   annotate("rect", xmin = 50, xmax = 60, ymin = 1, ymax = 3,
+            color = "grey50", fill = "white")+
+   annotate("text", x = 55, y = 2.5, label = "LEGEND", 
+            size = 4, hjust = .5, family = 'wqy-microhei', color = "grey20")+
+   annotate("text", x = 55, y = 2, label = "Successful rate between Global and China", 
+            size = 2, hjust = .5, family = 'wqy-microhei', color = "grey20")+
+   annotate("point", x = c(53, 56), y = 1.5, 
+            pch = c(16, 21), size = 2, color = 1)+
+   annotate("text", x = c(54, 57), y = 1.5, 
+            label = c("China", "Global"), 
+            size = 2, hjust = 0, family = 'wqy-microhei', color = "grey20")