library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
library(readr)
library(stats)
library(dplyr)
library(xts)
## Warning: package 'xts' was built under R version 4.3.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.2
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
##
## Attaching package: 'tsibble'
##
## The following object is masked from 'package:zoo':
##
## index
##
## The following object is masked from 'package:lubridate':
##
## interval
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data <- read.csv("C:/Users/prase/OneDrive/Documents/STATISTICS/signal_metrics.csv")
head(data)
## Timestamp Locality Latitude Longitude SignalStrength DataThroughput
## 1 51:30.7 Danapur 25.42617 85.09443 -76.72446 1.105452
## 2 23:56.4 Bankipore 25.59105 85.25081 -77.52335 2.476287
## 3 24:39.7 Ashok Rajpath 25.48233 85.14868 -78.55790 1.031408
## 4 02:26.4 Rajendra Nagar 25.46116 85.23826 -78.77064 1.461008
## 5 32:12.7 Ashok Rajpath 25.61583 85.10455 -77.27129 1.792531
## 6 58:31.2 Rajendra Nagar 25.56698 85.12149 -75.67285 2.572450
## Latency NetworkType BB60C srsRAN BladeRFxA9
## 1 138.9383 LTE -72.50342 -84.97208 -75.12779
## 2 137.6606 LTE -73.45848 -84.77590 -77.94294
## 3 165.4447 LTE -73.88210 -84.76128 -77.21692
## 4 101.6800 LTE -74.04047 -87.27312 -77.86791
## 5 177.4726 LTE -74.08004 -85.93112 -75.57369
## 6 131.5178 LTE -74.66450 -85.16332 -74.51283
str(data)
## 'data.frame': 12621 obs. of 11 variables:
## $ Timestamp : chr "51:30.7" "23:56.4" "24:39.7" "02:26.4" ...
## $ Locality : chr "Danapur" "Bankipore" "Ashok Rajpath" "Rajendra Nagar" ...
## $ Latitude : num 25.4 25.6 25.5 25.5 25.6 ...
## $ Longitude : num 85.1 85.3 85.1 85.2 85.1 ...
## $ SignalStrength: num -76.7 -77.5 -78.6 -78.8 -77.3 ...
## $ DataThroughput: num 1.11 2.48 1.03 1.46 1.79 ...
## $ Latency : num 139 138 165 102 177 ...
## $ NetworkType : chr "LTE" "LTE" "LTE" "LTE" ...
## $ BB60C : num -72.5 -73.5 -73.9 -74 -74.1 ...
## $ srsRAN : num -85 -84.8 -84.8 -87.3 -85.9 ...
## $ BladeRFxA9 : num -75.1 -77.9 -77.2 -77.9 -75.6 ...
summary(data)
## Timestamp Locality Latitude Longitude
## Length:12621 Length:12621 Min. :25.41 Min. :84.96
## Class :character Class :character 1st Qu.:25.52 1st Qu.:85.07
## Mode :character Mode :character Median :25.59 Median :85.14
## Mean :25.59 Mean :85.14
## 3rd Qu.:25.67 3rd Qu.:85.21
## Max. :25.77 Max. :85.32
## SignalStrength DataThroughput Latency NetworkType
## Min. :-116.94 Min. : 1.001 Min. : 10.02 Length:12621
## 1st Qu.: -94.88 1st Qu.: 2.492 1st Qu.: 39.96 Class :character
## Median : -91.41 Median : 6.463 Median : 75.21 Mode :character
## Mean : -91.76 Mean :20.909 Mean : 85.28
## 3rd Qu.: -88.34 3rd Qu.:31.504 3rd Qu.:125.96
## Max. : -74.64 Max. :99.986 Max. :199.99
## BB60C srsRAN BladeRFxA9
## Min. :-115.67 Min. :-124.65 Min. :-119.21
## 1st Qu.: -95.49 1st Qu.:-102.55 1st Qu.: -95.17
## Median : -91.60 Median : -98.96 Median : -91.46
## Mean : -91.77 Mean : -99.26 Mean : -91.77
## 3rd Qu.: -87.79 3rd Qu.: -95.67 3rd Qu.: -88.15
## Max. : -72.50 Max. : -81.32 Max. : -74.51
missing_Timestamp_count <- sum(is.na(data$Timestamp))
missing_SignalStrength_count <- sum(is.na(data$SignalStrength))
cat("Missing values in 'Date' column:", missing_Timestamp_count, "\n")
## Missing values in 'Date' column: 0
cat("Missing values in 'temp_max' column:", missing_SignalStrength_count, "\n")
## Missing values in 'temp_max' column: 0
data <- data %>% distinct(Timestamp, .keep_all = TRUE)
num_duplicates <- sum(duplicated(data$Timestamp))
cat("Number of duplicates in the 'Timestamp' column:", num_duplicates, "\n")
## Number of duplicates in the 'Timestamp' column: 0
I chose ‘Timestamp’ column from my dataset. And, i chose ‘SignalStrength’ column(response variable) to analyze over time.
# Assuming df is your dataframe
data$Timestamp <- as.POSIXct(paste("2023-01-01", data$Timestamp), format="%Y-%m-%d %M:%S")
head(data)
## Timestamp Locality Latitude Longitude SignalStrength
## 1 2023-01-01 00:51:30 Danapur 25.42617 85.09443 -76.72446
## 2 2023-01-01 00:23:56 Bankipore 25.59105 85.25081 -77.52335
## 3 2023-01-01 00:24:39 Ashok Rajpath 25.48233 85.14868 -78.55790
## 4 2023-01-01 00:02:26 Rajendra Nagar 25.46116 85.23826 -78.77064
## 5 2023-01-01 00:32:12 Ashok Rajpath 25.61583 85.10455 -77.27129
## 6 2023-01-01 00:58:31 Rajendra Nagar 25.56698 85.12149 -75.67285
## DataThroughput Latency NetworkType BB60C srsRAN BladeRFxA9
## 1 1.105452 138.9383 LTE -72.50342 -84.97208 -75.12779
## 2 2.476287 137.6606 LTE -73.45848 -84.77590 -77.94294
## 3 1.031408 165.4447 LTE -73.88210 -84.76128 -77.21692
## 4 1.461008 101.6800 LTE -74.04047 -87.27312 -77.86791
## 5 1.792531 177.4726 LTE -74.08004 -85.93112 -75.57369
## 6 2.572450 131.5178 LTE -74.66450 -85.16332 -74.51283
tsib_data <- tsibble::tibble(
Timestamp = data$Timestamp,
SignalStrength = data$SignalStrength
)
ggplot(tsib_data, aes(x = Timestamp, y = SignalStrength)) +
geom_line() +
labs(title = "Signal Strength Over Time",
x = "Timestamp",
y = "Signal Strength") +
theme_minimal()
To consider different windows of time, we might want to zoom in on specific time ranges in the plot or perform further analysis on subsets of the data based on specific time periods. Without specific details about the data, it’s challenging to provide more detailed guidance on what stands out immediately. We might observe trends, patterns, or anomalies in the data based on the plotted time series.
data$Timestamp_numeric <- as.numeric(data$Timestamp)
model <- lm(SignalStrength ~ Timestamp_numeric, data = data)
summary(model)
##
## Call:
## lm(formula = SignalStrength ~ Timestamp_numeric, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.7616 -3.0082 0.3745 3.3463 16.6025
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.687e+04 7.633e+04 -0.614 0.539
## Timestamp_numeric 2.797e-05 4.564e-05 0.613 0.540
##
## Residual standard error: 4.867 on 10580 degrees of freedom
## Multiple R-squared: 3.55e-05, Adjusted R-squared: -5.901e-05
## F-statistic: 0.3756 on 1 and 10580 DF, p-value: 0.54
coef_value <- coef(model)[2]
p_value <- summary(model)$coefficients["Timestamp_numeric", "Pr(>|t|)"]
cat("Coefficient:", coef_value, "\n")
## Coefficient: 2.796984e-05
cat("P-value:", p_value, "\n")
## P-value: 0.5399764
ggplot(data, aes(x = Timestamp, y = SignalStrength)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = "Linear Regression: Signal Strength Over Time",
x = "Timestamp",
y = "Signal Strength") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
To assess the strength of trends in linear regression, you can look at the coefficient of the predictor variable (“Timestamp”) and consider its magnitude. A larger absolute value of the coefficient generally indicates a stronger trend. Additionally, the statistical significance of the coefficient, as indicated by the p-value, provides information on whether the trend is likely to be real or if it could have occurred by chance.
To detect seasonality in your data, we can use smoothing techniques like moving averages or exponential smoothing. After smoothing, we can analyze the autocorrelation function (ACF) to identify the seasonality pattern. The resulting plot should show the original time series along with the smoothed version, making it easier to detect any seasonality.
ts_data <- ts(data$SignalStrength, frequency = 1, start = 1)
smoothed_data <- forecast::ets(ts_data)
autoplot(ts_data) +
autolayer(smoothed_data$fitted, series = "Smoothed") +
labs(title = "Signal Strength with Exponential Smoothing",
x = "Timestamp",
y = "Signal Strength") +
theme_minimal()
par(mfrow = c(1, 2))
acf(ts_data, main = "ACF Plot")
The ACF (Autocorrelation Function) plots will help you identify any repeating patterns or seasonality in the data. Look for significant spikes at regular intervals in the plots, as these indicate potential seasonality. We can observe significant spikes at regular intervals in the ACF i.e., it suggests the presence of seasonality. The lag at which spikes occur corresponds to the length of the season.