R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stats)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Load the Data:

path= 'C:/Users/prase/OneDrive/Documents/signal_metrics.csv'
data_frame = read.csv('C:/Users/prase/OneDrive/Documents/signal_metrics.csv',header=TRUE, sep = ",")

Build at least three sets of variable combinations:

variable set 1:

variable_set_1 <- data_frame %>%
summarise(SignalStrength=(SignalStrength)/1)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
summary(variable_set_1$SignalStrength)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -116.94  -94.88  -91.41  -91.76  -88.34  -74.64
variable_set_1$Signal_Range <- cut(variable_set_1$SignalStrength, 
                              breaks = c(-Inf, -94.88, -88.34, Inf), 
                              labels = c('Low_signal', 'Moderate_signal', 'High_signal'),
                              ordered_result = TRUE)
view(variable_set_1)

variable set 2:

variable_set_2 <- data_frame %>%
summarise(DataThroughput=(DataThroughput)/1)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
summary(variable_set_2$DataThroughput)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.001   2.492   6.463  20.909  31.504  99.986
variable_set_2$DataThroughput_Range <- cut(variable_set_2$DataThroughput, 
                              breaks = c(-Inf, 2.492, 31.504, Inf), 
                              labels = c('Low_DataThroughput', 'Moderate_DataThroughput', 'High_DataThroughput'),
                              ordered_result = TRUE)
view(variable_set_2)

variable set 3:

variable_set_3 <- data_frame %>%
summarise(Latency=(Latency)/1)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
summary(variable_set_3$Latency)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.02   39.96   75.21   85.28  125.96  199.99
variable_set_3$Latency_Range <- cut(variable_set_3$Latency, 
                              breaks = c(-Inf, 39.96, 125.96, Inf), 
                              labels = c('Low_Latency', 'Moderate_Latency', 'High_Latency'),
                              ordered_result = TRUE)
view(variable_set_3)

Plot a visualization for each response-explanatory relationship:

visualization for variable set 1:

ggplot(variable_set_1, aes(x = SignalStrength, y = Signal_Range, fill = Signal_Range)) +
  geom_point(aes(color = Signal_Range)) +
  labs(x = "Signal Strength", y = "Signal Range") +
  ggtitle("Signal Range vs. Signal Strength")

In the above plot, There are more data points in Low_signal indicating that the low signal strength is common in dataset.

visualization for variable set 2:

ggplot(variable_set_2, aes(x = DataThroughput, y = DataThroughput_Range, fill = DataThroughput_Range)) +
  geom_point(aes(color = DataThroughput_Range)) +
  labs(x = "Data Throughput", y = "DataThroughput Range") +
  ggtitle("DataThroughput Range vs. Data Throughput")

visualization for variable set 3:

ggplot(variable_set_3, aes(x = Latency, y = Latency_Range, fill = Latency_Range)) +
  geom_point(aes(color = Latency_Range)) +
  labs(x = "Latency", y = "Latency Range") +
  ggtitle("Latency Range vs. Latency")

Calculate the appropriate correlation coefficient:

Correlation for Variable Set 1:

cor(variable_set_1$SignalStrength, as.numeric(variable_set_1$Signal_Range), method = "pearson")
## [1] 0.8939718

The correlation coefficient between SignalStrength and Signal_Range is 0.8939718.

Correlation for Variable Set 2:

cor(variable_set_2$DataThroughput, as.numeric(variable_set_2$DataThroughput_Range), method = "pearson")
## [1] 0.8025748

The correlation coefficient between DataThroughput and DataThroughput_Range is 0.8025748.

Correlation for Variable Set 3:

cor(variable_set_3$Latency, as.numeric(variable_set_3$Latency_Range), method = "pearson")
## [1] 0.9124618

The correlation coefficient between Latency and Latency_Range is 0.9124618.

Build a confidence interval for each response variable:

Confidence Interval for Variable Set 1:

SignalStrength_range <- variable_set_1$SignalStrength
conf_interval_SignalStrength_range <- t.test(SignalStrength_range)$conf.int
conf_interval_SignalStrength_range
## [1] -91.84628 -91.67383
## attr(,"conf.level")
## [1] 0.95

The Confidence interval for SignalStrength indicates, we are 95% sure that data values of SignalStrength fall between lower bound of -91.84628 and upper bound of -91.67383.

Confidence Interval for Variable Set 2:

Data_range <- variable_set_2$DataThroughput
conf_interval_Data_range <- t.test(Data_range)$conf.int
conf_interval_Data_range
## [1] 20.41862 21.40033
## attr(,"conf.level")
## [1] 0.95

The Confidence interval for DataThroughput indicates, we are 95% sure that data values of DataThroughput fall between lower bound of 20.41862 and upper bound of 21.40033.

Confidence Interval for Variable Set 3:

latency_range <- variable_set_3$Latency
conf_interval_Latency_range <- t.test(latency_range)$conf.int
conf_interval_Latency_range
## [1] 84.34132 86.21097
## attr(,"conf.level")
## [1] 0.95

The Confidence interval for Latency indicates, we are 95% sure that data values of Latency fall between lower bound of 84.34132 and upper bound of 86.21097.