library(pacman)
p_load(tidyverse, metaverse, eviatlas, citationchaser, PRISMA2020, topictagger, litsearchr, robvis, metaDigitise, synthesisr, bibliometrix, litsearchr, gmodels, ggpubr, kableExtra)
data <- read_delim("data.csv", delim = ";", escape_double = FALSE, locale = locale(decimal_mark = ",", grouping_mark = "."), trim_ws = TRUE)
#str(data)
h1 <- data %>%
select(metro, size, number_ware_t0, population_t0, number_ware_t1, population_t1) %>%
mutate(size = as.factor(case_when(size == "Small" ~ "Small",
size == "Medium" ~ "Medium/Large",
size == "Large" ~ "Medium/Large"))) %>%
mutate(ware_pop_t0 = ceiling(number_ware_t0/(population_t0/1000000)), ware_pop_t1 = ceiling(number_ware_t1/(population_t1/1000000)))
data_h1 <- h1 %>%
filter(!is.na(ware_pop_t0)) %>%
group_by(size) %>%
summarise(t0 = mean(ware_pop_t0), t1 = mean(ware_pop_t1))
| Variable Name | Description |
|---|---|
| metro | The name of the metropolitan area. |
| size | The size of the metropolitan area (small, medium, or large). |
| population_t0 | The population of the metropolitan area at the start of the period covered by the dataset. |
| number_ware_t0 | The number of warehouses in the metropolitan area at the start of the period covered by the dataset. |
| population_t1 | The population of the metropolitan area at the end of the period covered by the dataset. |
| number_ware_t1 | The number of warehouses in the metropolitan area at the end of the period covered by the dataset. |
| ware_pop_t0 | Number of warehouses per million inhabitants for t0. |
| ware_pop_t1 | Number of warehouses per million inhabitants for t1. |
h1 %>%
kbl() %>%
kable_paper() %>%
scroll_box(width = "900px", height = "600px")
| metro | size | number_ware_t0 | population_t0 | number_ware_t1 | population_t1 | ware_pop_t0 | ware_pop_t1 |
|---|---|---|---|---|---|---|---|
| atlanta | Medium/Large | 132.0 | 2621089.0 | 401.00 | 3603409 | 51 | 112 |
| belo horizonte | Medium/Large | 43.7 | 781333.3 | 156.39 | 5000000 | 56 | 32 |
| berlin | Medium/Large | 18.0 | 3413084.6 | 22.00 | 4341000 | 6 | 6 |
| bogota | Medium/Large | 347.0 | 8106481.0 | 475.00 | 8779734 | 43 | 55 |
| bordeaux | Small | 11.0 | 583760.0 | 22.00 | 721744 | 19 | 31 |
| brussels | Medium/Large | NA | 1662000.0 | 10553.00 | 2500000 | NA | 4222 |
| calgary | Medium/Large | 21.0 | 1021060.0 | 59.00 | 1310000 | 21 | 46 |
| cali | Medium/Large | NA | 2083171.0 | 27.00 | 2120000 | NA | 13 |
| chicago | Medium/Large | 217.0 | 2676215.0 | 415.00 | 3202509 | 82 | 130 |
| chongqing | Medium/Large | 401.0 | 17801658.0 | 3490.00 | 30000000 | 23 | 117 |
| flevoland | Small | 60.0 | 371572.0 | 59.00 | 396879 | 162 | 149 |
| gothenburg mea | Small | 132.0 | 810000.0 | 207.00 | 973000 | 163 | 213 |
| gothenburg vgc | Medium/Large | 261.0 | 1495000.0 | 390.00 | 1615000 | 175 | 242 |
| halifax | Small | 6.0 | 359183.0 | 9.00 | 390328 | 17 | 24 |
| los angeles | Medium/Large | 220.0 | 12365597.0 | 515.00 | 13234696 | 18 | 39 |
| montreal | Medium/Large | 79.0 | 2605738.0 | 70.00 | 2849318 | 31 | 25 |
| noord holland | Medium/Large | 318.0 | 2614302.0 | 278.00 | 2700000 | 122 | 103 |
| paris all | Medium/Large | 713.0 | 11356295.0 | 955.00 | 11900000 | 63 | 81 |
| paris parcels | Medium/Large | 93.0 | 9485564.0 | 93.00 | 11771621 | 10 | 8 |
| phoenix | Medium/Large | 41.0 | 3251884.0 | 183.00 | 4578519 | 13 | 40 |
| sao paulo | Medium/Large | 228.0 | 15082000.0 | 2066.00 | 21600000 | 16 | 96 |
| seattle | Medium/Large | 85.0 | 622023.0 | 212.00 | 789074 | 137 | 269 |
| shenzhen | Medium/Large | 1430.0 | 9582772.0 | 1660.00 | 12000000 | 150 | 139 |
| randstad | Medium/Large | 589.0 | 7629594.0 | 583.00 | 7100000 | 78 | 83 |
| tokio | Medium/Large | 420.0 | 27106000.0 | 209.00 | 36000000 | 16 | 6 |
| toronto ggh | Medium/Large | 217.0 | 7566300.0 | 350.00 | 8463688 | 29 | 42 |
| toronto gta | Medium/Large | 165.0 | 5081826.0 | 228.00 | 6054191 | 33 | 38 |
| utrecht | Medium/Large | 43.0 | 1222262.0 | 61.00 | 1200000 | 36 | 51 |
| vancouver | Medium/Large | 135.0 | 2224515.0 | 134.00 | 2590921 | 61 | 52 |
| winnipeg | Small | 26.0 | 621457.0 | 41.00 | 666832 | 42 | 62 |
| zuid holland | Medium/Large | 168.0 | 3421459.0 | 185.00 | 3600000 | 50 | 52 |
| new york | Medium/Large | 938.0 | 14983782.0 | 914.00 | 16118232 | 63 | 57 |
| washington dc | Medium/Large | 285.0 | 4709434.0 | 318.00 | 5720217 | 61 | 56 |
| san francisco | Medium/Large | 305.0 | 4123734.0 | 349.00 | 4647924 | 74 | 76 |
| boston | Medium/Large | 290.0 | 2634378.0 | 294.00 | 2872310 | 111 | 103 |
| philadelphia | Medium/Large | 288.0 | 4264068.0 | 340.00 | 4564258 | 68 | 75 |
| dallas | Medium/Large | 338.0 | 2846428.0 | 402.00 | 4321973 | 119 | 94 |
| miami | Medium/Large | 193.0 | 5007956.0 | 235.00 | 5969135 | 39 | 40 |
| detroit | Medium/Large | 196.0 | 2201458.0 | 210.00 | 2350511 | 90 | 90 |
| houston | Medium/Large | 221.0 | 1193312.0 | 298.00 | 1762483 | 186 | 170 |
| cleveland | Medium/Large | 148.0 | 1978890.0 | 150.00 | 1898436 | 75 | 80 |
| san diego | Medium/Large | 84.0 | 2813839.0 | 86.00 | 3280850 | 30 | 27 |
| st louis | Medium/Large | 148.0 | 2148575.0 | 144.00 | 2311690 | 69 | 63 |
| pittsburgh | Medium/Large | 92.0 | 1787955.0 | 98.00 | 1711755 | 52 | 58 |
| denver | Medium/Large | 118.0 | 2316068.0 | 147.00 | 2908463 | 51 | 51 |
| portland | Medium/Large | 160.0 | 1161090.0 | 163.00 | 1365871 | 138 | 120 |
| tampa | Medium/Large | 63.0 | 2396038.0 | 79.00 | 2983928 | 27 | 27 |
| orlando | Medium/Large | 75.0 | 3442581.0 | 91.00 | 3985594 | 22 | 23 |
| kansas city | Medium/Large | 159.0 | 729993.0 | 153.00 | 796646 | 218 | 193 |
| columbus | Medium/Large | 208.0 | 1625491.0 | 195.00 | 1785971 | 128 | 110 |
| cincinnati | Medium/Large | 112.0 | 642221.0 | 122.00 | 683709 | 175 | 179 |
| indianapolis | Medium/Large | 121.0 | 824209.0 | 171.00 | 1033843 | 147 | 166 |
| milwaukee | Medium/Large | 101.0 | 1401336.0 | 98.00 | 1459422 | 73 | 68 |
| charlotte | Medium/Large | 124.0 | 2091897.0 | 145.00 | 2712974 | 60 | 54 |
| salt lake city | Medium/Large | 88.0 | 939169.0 | 117.00 | 1164912 | 94 | 101 |
| san antonio | Medium/Large | 47.0 | 1747863.0 | 67.00 | 2405335 | 27 | 28 |
| virginia beach | Medium/Large | 90.0 | 1931738.0 | 98.00 | 2106945 | 47 | 47 |
| las vegas | Medium/Large | 51.0 | 23541.0 | 80.00 | 22513 | 2167 | 3554 |
| new orleans | Medium/Large | 77.0 | 1337740.0 | 83.00 | 1260281 | 58 | 66 |
| nashville | Medium/Large | 116.0 | 512532.0 | 121.00 | 559425 | 227 | 217 |
| raleigh | Medium/Large | 76.0 | 781161.0 | 77.00 | 1238938 | 98 | 63 |
| greensboro | Medium/Large | 88.0 | 720580.0 | 88.00 | 842338 | 123 | 105 |
| louisville | Medium/Large | 81.0 | 1135588.0 | 89.00 | 1244745 | 72 | 72 |
| grand rapids | Medium/Large | 62.0 | 250981.0 | 72.00 | 301251 | 248 | 240 |
| buffalo | Medium/Large | 57.0 | 1170022.0 | 57.00 | 1133002 | 49 | 51 |
| austin | Medium/Large | 38.0 | 1041753.0 | 50.00 | 1534758 | 37 | 33 |
| birmingham | Medium/Large | 47.0 | 1051340.0 | 51.00 | 1144683 | 45 | 45 |
| greenville | Medium/Large | 101.0 | 453531.0 | 97.00 | 566978 | 223 | 172 |
| rochester | Medium/Large | 45.0 | 259409.0 | 48.00 | 263632 | 174 | 183 |
| albany | Small | 54.0 | 825920.0 | 48.00 | 879085 | 66 | 55 |
| dayton | Small | 54.0 | 269526.0 | 49.00 | 271791 | 201 | 181 |
| richmond | Medium/Large | 58.0 | 990282.0 | 87.00 | 1186339 | 59 | 74 |
| tulsa | Medium/Large | 39.0 | 822560.0 | 37.00 | 939783 | 48 | 40 |
| tucson | Medium/Large | 33.0 | 843702.0 | 55.00 | 1009103 | 40 | 55 |
| cape town | Medium/Large | 3899.0 | 4260700.0 | 4349.00 | 4592195 | 916 | 948 |
| eThekwini | Medium/Large | 2673.0 | 3477000.0 | 2733.00 | 3702000 | 769 | 739 |
| gauteng | Medium/Large | 8401.0 | 11190000.0 | 8766.00 | 12910000 | 751 | 680 |
| seoul | Medium/Large | 984.0 | 10553000.0 | 3340.00 | 9963000 | 94 | 336 |
We have performed a chi-square test to analyse the first hypothesis. For that, we first need to categorize the cities into two groups based on their size: small or medium/large. Then we have calculated the number of warehouses per million inhabitants in each of these groups.
Next, we can created a contingency table that shows the average of the number of warehouses per million inhabitants in each size group (Table 1).
data_h1 <- data_h1 %>%
mutate(t0 = ceiling(t0), t1 = ceiling(t1))
data_h1 %>%
kbl() %>%
kable_paper() %>%
scroll_box(width = "300px", height = "100px")
| size | t0 | t1 |
|---|---|---|
| Medium/Large | 142 | 170 |
| Small | 96 | 103 |
After that, We have performed the chi-square test to determine if there is a significant difference between warehouse per million inhabitants in each size group. The null hypothesis for this test is that there is no significant difference between the distribution of number of warehouses/population in small and medium/large cities. If the p-value of the chi-square test is less than the significance level (usually 0.05), we can reject the null hypothesis and conclude that there is a significant difference between the distribution of number of warehouses/population in different city sizes.
chisq.test(h1$ware_pop_t0, h1$size)
##
## Pearson's Chi-squared test
##
## data: h1$ware_pop_t0 and h1$size
## X-squared = 76, df = 68, p-value = 0.2365
chisq.test(h1$ware_pop_t1, h1$size)
##
## Pearson's Chi-squared test
##
## data: h1$ware_pop_t1 and h1$size
## X-squared = 69.839, df = 66, p-value = 0.3499
To test whether ware_pop_t0 is significantly different from ware_pop_t1, you can use a paired t-test. This is because the two samples (ware_pop_t0 and ware_pop_t1) are related, as they come from the same cities at different times.
# Calculate the difference between ware_pop_t1 and ware_pop_t0
diff <- h1$ware_pop_t0 - h1$ware_pop_t1
# Perform a paired t-test
t.test(diff)
##
## One Sample t-test
##
## data: diff
## t = -1.3891, df = 75, p-value = 0.1689
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -63.28553 11.28553
## sample estimates:
## mean of x
## -26
# Outliers remove
Q1_t0 <- quantile(h1$ware_pop_t0, 0.25, na.rm = TRUE)
Q3_t0 <- quantile(h1$ware_pop_t0, 0.75, na.rm = TRUE)
IQR_t0 <- Q3_t0 - Q1_t0
lower_t0 <- Q1_t0 - 1.5*IQR_t0
upper_t0 <- Q3_t0 + 1.5*IQR_t0
h1$ware_pop_t0_is_outlier <- h1$ware_pop_t0 < lower_t0 | h1$ware_pop_t0 > upper_t0
Q1_t1 <- quantile(h1$ware_pop_t1, 0.25, na.rm = TRUE)
Q3_t1 <- quantile(h1$ware_pop_t1, 0.75, na.rm = TRUE)
IQR_t1 <- Q3_t1 - Q1_t1
lower_t1 <- Q1_t1 - 1.5*IQR_t1
upper_t1 <- Q3_t1 + 1.5*IQR_t1
h1$ware_pop_t1_is_outlier <- h1$ware_pop_t1 < lower_t1 | h1$ware_pop_t1 > upper_t1
h1_plot <- h1 %>%
filter(ware_pop_t0_is_outlier == FALSE & ware_pop_t1_is_outlier == FALSE) %>%
select(size, ware_pop_t0, ware_pop_t1) %>%
gather(ware_pop_t0, ware_pop_t1, -size)
h1_plot <- h1_plot %>%
mutate(ware_pop_t0 = case_when(ware_pop_t0 == "ware_pop_t0" ~ "t0",
ware_pop_t0 == "ware_pop_t1" ~ "t1"))
ggplot(h1_plot) +
geom_boxplot(aes(y = ware_pop_t1, fill = ware_pop_t0)) +
facet_wrap(~size) +
labs(fill = "Timeframe", y = "Warehouse/pop")