base_data <- st_read(params$file)
## Reading layer `run2-handlebar' from data source `C:\websites\fivegbp_locatienet_com\R\geodata\run2-handlebar.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2370 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 4.4774 ymin: 51.0312 xmax: 4.485282 ymax: 51.04226
## Geodetic CRS: WGS 84
base_data = base_data[base_data$speed > 0.5,]
data <- base_data
pop1 <- data[data$network == '4G',]
pop2 <- data[data$network == '5G',]
## [1] 1000
## pop1.latency pop1.rsrp pop1.arrived
## Min. : 115.0 Min. :-97.00 Min. :36.00
## 1st Qu.: 231.8 1st Qu.:-91.25 1st Qu.:45.00
## Median : 357.5 Median :-89.00 Median :45.00
## Mean : 371.9 Mean :-86.69 Mean :44.95
## 3rd Qu.: 437.5 3rd Qu.:-82.00 3rd Qu.:45.00
## Max. :1681.0 Max. :-60.00 Max. :54.00
## [1] 760
## pop2.latency pop2.rsrp pop2.arrived
## Min. : -1.0 Min. :-95.00 Min. : 0.00
## 1st Qu.: 119.0 1st Qu.:-85.00 1st Qu.:45.00
## Median : 152.5 Median :-80.00 Median :45.00
## Mean : 264.3 Mean :-80.13 Mean :44.09
## 3rd Qu.: 271.0 3rd Qu.:-75.00 3rd Qu.:45.00
## Max. :1956.0 Max. :-64.00 Max. :63.00
## Warning: 'scattergl' trace types don't currently render in RStudio on Windows. Open in another web browser (IE, Chrome, Firefox, etc).
Remove outliers and Bad measurements and create equal size samples
data = data[data$latency > 0,]
data = data[data$latency < 1000,]
data = data[data$arrived == 45,]
pop1 <- data[data$network == '4G',]
pop2 <- data[data$network == '5G',]
n <- min(c(nrow(pop1), nrow(pop2)))
pop1 = pop1[sample(1:nrow(pop1)),]
pop1 <- head(pop1, n)
pop2 = pop2[sample(1:nrow(pop2)),]
pop2 <- head(pop2, n)
data = rbind(pop1, pop2)
## [1] 676
## pop1.latency pop1.rsrp pop1.arrived
## Min. :115.0 Min. :-97.00 Min. :45
## 1st Qu.:231.8 1st Qu.:-91.00 1st Qu.:45
## Median :352.0 Median :-89.00 Median :45
## Mean :352.6 Mean :-86.44 Mean :45
## 3rd Qu.:426.5 3rd Qu.:-82.00 3rd Qu.:45
## Max. :977.0 Max. :-60.00 Max. :45
## [1] 676
## pop2.latency pop2.rsrp pop2.arrived
## Min. : 89.0 Min. :-95.00 Min. :45
## 1st Qu.:118.0 1st Qu.:-85.00 1st Qu.:45
## Median :142.0 Median :-80.00 Median :45
## Mean :204.2 Mean :-80.34 Mean :45
## 3rd Qu.:213.0 3rd Qu.:-75.00 3rd Qu.:45
## Max. :929.0 Max. :-64.00 Max. :45
## Warning: 'scattergl' trace types don't currently render in RStudio on Windows. Open in another web browser (IE, Chrome, Firefox, etc).
## [1] 159 564
Because of lack of overlap the sample is assumed not normal distributed.
If the distribution of Latency values is not normal, non-parametric tests, such as the Wilcoxon rank test or the Kruskal-Wallis test, can be used to determine the significance of the differences between the groups. These tests do not make assumptions about the distribution of the data and are therefore useful when analyzing non-normally distributed data. As the resulting p-value is less than the significance level 0.05, we can conclude that there are significant differences between the populations.
Dunn’s test, also known as the Dunn’s post-hoc test or the Dunn-Bonferroni test, is a pairwise comparison test that is often used as a follow-up analysis after performing a Kruskal-Wallis test. The purpose of Dunn’s test is to identify which specific groups differ significantly from each other when there is a significant result in the Kruskal-Wallis test.
kruskal.test(latency ~ network, data = data)
##
## Kruskal-Wallis rank sum test
##
## data: latency by network
## Kruskal-Wallis chi-squared = 414.92, df = 1, p-value < 2.2e-16
pairwise.wilcox.test(data$latency, data$network, p.adjust.method = "bonferroni")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: data$latency and data$network
##
## 4G
## 5G <2e-16
##
## P value adjustment method: bonferroni
as.data.frame(data) %>% rstatix::dunn_test(latency ~ network, p.adjust.method = "bonferroni", detailed = FALSE)
In a Dunn’s test:
In the context of a Dunn’s test, the Z-value is typically used to determine the significance of the pairwise comparisons between groups. The Z-value in this case represents the standardized difference in the average ranks between two groups being compared.
Here’s how to interpret the Z-value in Dunn’s test:
Remember that Dunn’s test is a non-parametric test and does not make assumptions about the distribution of the data. The Z-values in Dunn’s test are based on the ranks rather than the raw data values.
```