library(readr)
library(stats)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-8
data <- read.csv("C:/Users/prase/OneDrive/Documents/STATISTICS/signal_metrics.csv")
head(data)
## Timestamp Locality Latitude Longitude SignalStrength DataThroughput
## 1 51:30.7 Danapur 25.42617 85.09443 -76.72446 1.105452
## 2 23:56.4 Bankipore 25.59105 85.25081 -77.52335 2.476287
## 3 24:39.7 Ashok Rajpath 25.48233 85.14868 -78.55790 1.031408
## 4 02:26.4 Rajendra Nagar 25.46116 85.23826 -78.77064 1.461008
## 5 32:12.7 Ashok Rajpath 25.61583 85.10455 -77.27129 1.792531
## 6 58:31.2 Rajendra Nagar 25.56698 85.12149 -75.67285 2.572450
## Latency NetworkType BB60C srsRAN BladeRFxA9
## 1 138.9383 LTE -72.50342 -84.97208 -75.12779
## 2 137.6606 LTE -73.45848 -84.77590 -77.94294
## 3 165.4447 LTE -73.88210 -84.76128 -77.21692
## 4 101.6800 LTE -74.04047 -87.27312 -77.86791
## 5 177.4726 LTE -74.08004 -85.93112 -75.57369
## 6 131.5178 LTE -74.66450 -85.16332 -74.51283
str(data)
## 'data.frame': 12621 obs. of 11 variables:
## $ Timestamp : chr "51:30.7" "23:56.4" "24:39.7" "02:26.4" ...
## $ Locality : chr "Danapur" "Bankipore" "Ashok Rajpath" "Rajendra Nagar" ...
## $ Latitude : num 25.4 25.6 25.5 25.5 25.6 ...
## $ Longitude : num 85.1 85.3 85.1 85.2 85.1 ...
## $ SignalStrength: num -76.7 -77.5 -78.6 -78.8 -77.3 ...
## $ DataThroughput: num 1.11 2.48 1.03 1.46 1.79 ...
## $ Latency : num 139 138 165 102 177 ...
## $ NetworkType : chr "LTE" "LTE" "LTE" "LTE" ...
## $ BB60C : num -72.5 -73.5 -73.9 -74 -74.1 ...
## $ srsRAN : num -85 -84.8 -84.8 -87.3 -85.9 ...
## $ BladeRFxA9 : num -75.1 -77.9 -77.2 -77.9 -75.6 ...
summary(data)
## Timestamp Locality Latitude Longitude
## Length:12621 Length:12621 Min. :25.41 Min. :84.96
## Class :character Class :character 1st Qu.:25.52 1st Qu.:85.07
## Mode :character Mode :character Median :25.59 Median :85.14
## Mean :25.59 Mean :85.14
## 3rd Qu.:25.67 3rd Qu.:85.21
## Max. :25.77 Max. :85.32
## SignalStrength DataThroughput Latency NetworkType
## Min. :-116.94 Min. : 1.001 Min. : 10.02 Length:12621
## 1st Qu.: -94.88 1st Qu.: 2.492 1st Qu.: 39.96 Class :character
## Median : -91.41 Median : 6.463 Median : 75.21 Mode :character
## Mean : -91.76 Mean :20.909 Mean : 85.28
## 3rd Qu.: -88.34 3rd Qu.:31.504 3rd Qu.:125.96
## Max. : -74.64 Max. :99.986 Max. :199.99
## BB60C srsRAN BladeRFxA9
## Min. :-115.67 Min. :-124.65 Min. :-119.21
## 1st Qu.: -95.49 1st Qu.:-102.55 1st Qu.: -95.17
## Median : -91.60 Median : -98.96 Median : -91.46
## Mean : -91.77 Mean : -99.26 Mean : -91.77
## 3rd Qu.: -87.79 3rd Qu.: -95.67 3rd Qu.: -88.15
## Max. : -72.50 Max. : -81.32 Max. : -74.51
I created a binary column “GoodSignal” from the “SignalStrength” Column by providing a threshold value.
summary(data$SignalStrength)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -116.94 -94.88 -91.41 -91.76 -88.34 -74.64
threshold <- -91
data$GoodSignal <- ifelse(data$SignalStrength > threshold, 1, 0)
head(data,50)
## Timestamp Locality Latitude Longitude SignalStrength DataThroughput
## 1 51:30.7 Danapur 25.42617 85.09443 -76.72446 1.105452
## 2 23:56.4 Bankipore 25.59105 85.25081 -77.52335 2.476287
## 3 24:39.7 Ashok Rajpath 25.48233 85.14868 -78.55790 1.031408
## 4 02:26.4 Rajendra Nagar 25.46116 85.23826 -78.77064 1.461008
## 5 32:12.7 Ashok Rajpath 25.61583 85.10455 -77.27129 1.792531
## 6 58:31.2 Rajendra Nagar 25.56698 85.12149 -75.67285 2.572450
## 7 05:10.6 Anisabad 25.64325 85.22548 -79.78586 7.055229
## 8 25:02.5 Fraser Road 25.50409 85.15215 -79.56979 2.990245
## 9 01:41.0 Anandpuri 25.71732 85.00773 -79.62299 2.714386
## 10 58:53.6 Kumhrar 25.53426 85.02893 -74.64485 2.049014
## 11 48:14.1 Pataliputra 25.73458 85.24577 -79.41169 1.963675
## 12 19:58.4 Bankipore 25.49777 85.24539 -78.54243 4.803491
## 13 43:07.5 Anisabad 25.46295 85.09027 -78.54834 5.444352
## 14 54:45.6 Rajendra Nagar 25.58205 85.14008 -76.37918 1.487846
## 15 55:11.4 Gardanibagh 25.53767 85.21657 -79.47539 2.494880
## 16 08:51.1 Bailey Road 25.45919 85.13344 -78.70136 4.036939
## 17 19:51.3 Exhibition Road 25.73730 85.20469 -80.37612 5.560403
## 18 45:27.0 Gandhi Maidan 25.43104 85.15088 -80.45239 7.751439
## 19 44:46.5 Danapur 25.62924 85.25111 -79.63338 7.567241
## 20 55:23.6 Boring Canal Road 25.66722 85.18380 -79.52348 4.408633
## 21 08:32.9 Patliputra Colony 25.62467 85.24504 -79.75668 2.885384
## 22 59:56.7 Gardanibagh 25.45433 85.13229 -79.19649 91.712154
## 23 58:38.5 Bankipore 25.46229 85.23099 -78.74127 5.053948
## 24 31:13.9 Kumhrar 25.69694 85.00577 -80.39014 2.647859
## 25 11:33.0 Kankarbagh 25.53632 84.98680 -81.04179 6.567972
## 26 34:44.0 Boring Road 25.53574 85.29738 -79.08490 1.771200
## 27 38:36.7 Exhibition Road 25.47124 85.07854 -79.05149 17.014677
## 28 28:24.4 Kankarbagh 25.58727 85.09570 -81.22762 6.749505
## 29 04:22.7 Danapur 25.53218 85.28254 -81.60143 2.093142
## 30 25:58.1 Danapur 25.53095 85.08731 -80.06807 5.919178
## 31 56:44.3 Boring Canal Road 25.74663 85.14328 -81.65836 4.149295
## 32 49:29.8 Kankarbagh 25.63683 85.25327 -80.33459 6.335099
## 33 11:10.4 Ashok Rajpath 25.59772 85.27217 -81.66899 1.616805
## 34 43:05.4 Bankipore 25.58512 85.12152 -81.10474 5.736821
## 35 09:46.1 Phulwari Sharif 25.56541 85.01148 -80.74996 9.245613
## 36 37:45.9 Fraser Road 25.70434 85.04780 -81.66115 8.389815
## 37 32:55.4 Phulwari Sharif 25.73233 85.03220 -80.99253 1.685927
## 38 42:34.9 Exhibition Road 25.44588 85.07742 -81.54229 9.877065
## 39 17:44.8 Kumhrar 25.50266 84.99563 -80.69694 1.392717
## 40 40:22.8 Danapur 25.47281 85.08097 -82.08078 2.846367
## 41 48:26.2 Bankipore 25.58000 85.11384 -80.77411 9.607038
## 42 37:23.5 Exhibition Road 25.75129 85.18701 -82.42370 1.948491
## 43 14:34.8 Kumhrar 25.61121 85.04688 -79.85168 2.188680
## 44 33:18.1 Patliputra Colony 25.74947 85.21585 -82.46717 2.131396
## 45 57:22.4 Fraser Road 25.51345 85.28793 -82.00365 1.418214
## 46 27:04.0 Gandhi Maidan 25.76242 85.17402 -80.90676 1.379695
## 47 39:12.7 Boring Road 25.57070 85.25366 -81.78015 1.233512
## 48 19:28.1 Danapur 25.71364 85.17703 -80.59119 2.550639
## 49 16:41.4 Anandpuri 25.57993 85.19552 -82.00665 9.601905
## 50 04:27.5 Patliputra Colony 25.74304 85.07968 -79.10144 5.409707
## Latency NetworkType BB60C srsRAN BladeRFxA9 GoodSignal
## 1 138.93828 LTE -72.50342 -84.97208 -75.12779 1
## 2 137.66062 LTE -73.45848 -84.77590 -77.94294 1
## 3 165.44468 LTE -73.88210 -84.76128 -77.21692 1
## 4 101.68002 LTE -74.04047 -87.27312 -77.86791 1
## 5 177.47257 LTE -74.08004 -85.93112 -75.57369 1
## 6 131.51783 LTE -74.66450 -85.16332 -74.51283 1
## 7 80.06226 4G -74.87395 -88.32040 -80.94109 1
## 8 196.70610 LTE -75.20550 -84.81174 -77.65664 1
## 9 176.08350 LTE -75.24133 -86.50868 -81.01483 1
## 10 148.26785 LTE -75.38481 -81.32009 -76.43542 1
## 11 173.61780 LTE -75.55510 -87.23246 -79.51853 1
## 12 66.28832 4G -75.57373 -84.19625 -79.59106 1
## 13 95.61426 4G -75.58124 -86.49970 -77.42733 1
## 14 127.23123 LTE -75.70932 -81.98091 -76.68469 1
## 15 173.12382 LTE -75.77120 -84.70266 -76.48276 1
## 16 73.33284 4G -75.79857 -85.57995 -79.23622 1
## 17 51.98722 4G -75.87241 -89.24276 -82.59162 1
## 18 75.80653 4G -75.95761 -89.55060 -80.25780 1
## 19 93.82124 4G -76.01485 -86.44516 -80.37899 1
## 20 82.72454 4G -76.13552 -88.52299 -82.17284 1
## 21 147.21622 LTE -76.18193 -85.50319 -77.43203 1
## 22 29.93184 5G -76.25924 -87.54305 -76.45113 1
## 23 61.02337 4G -76.38739 -88.44806 -75.90696 1
## 24 164.41264 LTE -76.45485 -90.35892 -80.98320 1
## 25 65.02414 4G -76.63643 -86.67732 -79.91372 1
## 26 182.29193 LTE -76.66324 -88.82760 -80.88318 1
## 27 18.02683 5G -76.75894 -84.10311 -76.95299 1
## 28 72.99390 4G -76.75943 -89.33791 -81.28751 1
## 29 182.09215 LTE -76.80326 -86.81165 -79.96883 1
## 30 95.08057 4G -76.85047 -88.66085 -78.15180 1
## 31 56.17318 4G -76.85784 -89.68976 -83.29383 1
## 32 92.19727 4G -76.89185 -87.44430 -82.59454 1
## 33 115.07861 LTE -76.93213 -87.77232 -79.90982 1
## 34 53.20048 4G -76.97834 -88.49144 -83.97862 1
## 35 60.52121 4G -76.99054 -86.07749 -83.21303 1
## 36 55.19797 4G -77.03750 -87.21004 -83.03906 1
## 37 159.03922 LTE -77.11803 -90.62727 -82.10897 1
## 38 89.30251 4G -77.23280 -89.18233 -81.69675 1
## 39 163.97185 LTE -77.34661 -87.81753 -78.85530 1
## 40 173.38548 LTE -77.46782 -89.83772 -81.91104 1
## 41 97.14400 4G -77.47208 -89.44943 -78.96597 1
## 42 166.91099 LTE -77.49886 -87.98487 -81.11721 1
## 43 143.98176 LTE -77.52306 -86.76499 -81.02427 1
## 44 197.11201 LTE -77.52908 -92.38100 -82.37504 1
## 45 114.95091 LTE -77.60146 -87.09757 -81.71829 1
## 46 135.31408 LTE -77.62841 -90.30810 -83.89171 1
## 47 110.18800 LTE -77.66142 -88.60098 -81.34927 1
## 48 189.91804 LTE -77.70028 -87.05853 -79.85920 1
## 49 88.40060 4G -77.72962 -89.63120 -84.01149 1
## 50 52.75487 4G -77.77540 -84.31198 -78.62352 1
count <- table(data$GoodSignal)
count
##
## 0 1
## 6722 5899
Explanatory Variable: DataThroughput
model <- glm(GoodSignal ~ DataThroughput, data = data, family = binomial(link = "logit"))
summary(model)
##
## Call:
## glm(formula = GoodSignal ~ DataThroughput, family = binomial(link = "logit"),
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3841369 0.0228389 16.82 <2e-16 ***
## DataThroughput -0.0276248 0.0008373 -32.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 17443 on 12620 degrees of freedom
## Residual deviance: 16021 on 12619 degrees of freedom
## AIC: 16025
##
## Number of Fisher Scoring iterations: 3
The intercept represents the log-odds when “DataThroughput” is zero, and the coefficient for “DataThroughput” indicates the change in log-odds for a one-unit increase in “DataThroughput.”
Intercept:
Estimate: 0.3841369, Std. Error: 0.0228389, Z value: 16.82, Pr(>|z|): <2e-16
The intercept represents the log-odds of the “GoodSignal” when “DataThroughput” is zero. In other words, when “DataThroughput” is zero, the estimated log-odds of having a “GoodSignal” are 0.3841369.
Coefficient for DataThroughput:
Estimate: -0.0276248, Std. Error: 0.0008373 Z value: -32.99, Pr(>|z|): <2e-16
The coefficient for “DataThroughput” represents how a one-unit increase in “DataThroughput” affects the log-odds of having a “GoodSignal” while holding other variables constant. In this case, for each one-unit increase in “DataThroughput,” the log-odds of having a “GoodSignal” increase by -0.0276248.
conf_interval <- confint(model, parm = "DataThroughput")
## Waiting for profiling to be done...
conf_interval
## 2.5 % 97.5 %
## -0.02928140 -0.02599818
ci <- confint(model, parm = "DataThroughput", level = 0.95)
## Waiting for profiling to be done...
coef_DataThroughput <- coef(model)["DataThroughput"]
se_DataThroughput <- sqrt(vcov(model)["DataThroughput", "DataThroughput"])
lower_bound <- coef_DataThroughput - 1.96 * se_DataThroughput
upper_bound <- coef_DataThroughput + 1.96 * se_DataThroughput
cat("Coefficient (DataThroughput):", coef_DataThroughput, "\n")
## Coefficient (DataThroughput): -0.02762481
cat("Standard Error (SE):", se_DataThroughput, "\n")
## Standard Error (SE): 0.0008373393
cat("95% Confidence Interval (CI): [", lower_bound, ",", upper_bound, "]\n")
## 95% Confidence Interval (CI): [ -0.029266 , -0.02598363 ]
The coefficient estimate provides insight into the strength and direction of the relationship between “DataThroughput” and the probability of a “GoodSignal.”
The standard error quantifies the uncertainty associated with the coefficient estimate.
The 95% confidence interval offers a range of values where the true coefficient is likely to lie, with a 95% level of confidence.
data %>%
ggplot(mapping = aes(x =DataThroughput, y = GoodSignal)) +
geom_jitter(width = 0, height = 0.1, shape = 'O', size = 3) +
geom_smooth(method = 'lm', se = FALSE) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data, aes(x = DataThroughput, y = GoodSignal)) +
geom_point() +
geom_smooth(method = "glm", formula = y ~ poly(x, 2), se = FALSE) +
labs(title = "Scatterplot of DataThroughput vs. GoodSignal",
x = "DataThroughput",
y = "GoodSignal") +
theme_minimal()
In this transformed plot, the logistic regression line is a curve that fits the data more closely. This transformation allows the model to capture the potential nonlinear relationship between “DataThroughput” and the probability of a “GoodSignal.” It provides a better representation of the underlying trend in the data and can lead to a more accurate model.
data <- data %>%
mutate(log_DataThroughput = log(DataThroughput))
model <- lm(GoodSignal ~ log_DataThroughput, data = data)
rsquared <- summary(model)$r.squared
data %>%
ggplot(mapping = aes(x = log_DataThroughput, y = GoodSignal)) +
geom_point() +
geom_smooth(method = 'lm', color = 'gray', linetype = 'dashed', se = FALSE) +
labs(
title = "Relationship between log_DataThroughput and GoodSignal",
subtitle = paste("Linear Fit R-Squared =", round(rsquared, 3))
) +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
In our model, the positive coefficient for “DataThroughput” suggests that higher data throughput is associated with an increased likelihood of a “GoodSignal.” The polynomial transformation allows the model to capture the curvature in the data, providing a more accurate representation of the underlying trend. Confidence intervals are vital for assessing the precision and significance of a coefficient estimate. If the interval does not include zero, it suggests that the variable is statistically significant in predicting the outcome.