library(ggplot2)
library(dplyr)
library(tidyr)
library(car)
library(gridExtra)
library(e1071)
#data <- read.csv("C:/Users/leduc/Downloads/depression.csv", sep=",",header=T)
#"C:/Users/leduc/OneDrive/Desktop/University/Undergrad/Biostats 3400/bolts.RData"
load("C:/Users/leduc/OneDrive/Desktop/University/Undergrad/Biostats 3400/bolts.RData")
A manufacturer of automotive health accessories provides hardware, e.g. nuts, bolts, washers and screws, to fasten the accessory to the ambulance car or truck. Hardware is counted and packaged automatically. Specifically, bolts are dumped into a large metal dish. A plate that forms the bottom of the dish rotates clockwise. This rotation forces bolts to the outside of the dish and up along a(n) (adjustable) narrow ledge. Due to the vibration of the dish, some bolts fall off the ledge and back into the dish. The ledge spirals up to a point where the bolts are allowed to drop into a pan on a conveyor belt. As a bolt drops, it passes by an electronic eye that counts it. When the electronic counter reaches the preset number of bolts (twenty in this case), the rotation is stopped and the conveyor belt is moved forward.
There are several adjustments on the machine that affect its operation which are detailed in the variables: - Speed: a setting that controls the speed of rotation of the plate at the bottom of the dish - Rotation: a setting that is used to change the rotation of the plate - BatchSize: the machine can be set to count in batches of 10 (coded as zero) or 20 (coded as 1) - LedgeWidth: the width of the ledge (in mm) for a given run - Sensitivity: the sensitivity of the electronic eye
The measured response is the time, in seconds, it takes to count twenty bolts.
bolts$Rotation <- as.factor(bolts$Rotation)
bolts$Success <- as.factor(bolts$Success)
bolts$Sensitivity <- as.factor(bolts$Sensitivity)
bolts$Time <- as.numeric(bolts$Time)
bolts$Speed <- as.numeric(bolts$Speed)
bolts$LedgeWidth <- as.numeric(bolts$LedgeWidth)
bolts$Rotation <- droplevels(na.omit(bolts$Rotation))
bolts$Success <- droplevels(na.omit(bolts$Success))
bolts$Sensitivity <- droplevels(na.omit(bolts$Sensitivity))
bolts$Time[is.na(bolts$Time)] <- mean(bolts$Time, na.rm = TRUE) # Replace NA with mean
bolts$Time[is.na(bolts$Speed)] <- mean(bolts$Speed, na.rm = TRUE) # Replace NA with mean
bolts$Time[is.na(bolts$LedgeWidth)] <- mean(bolts$LedgeWidth, na.rm = TRUE) # Replace NA with mean
# Categorical Variables: Distribution
categorical_vars <- bolts %>% select(Rotation, Success, Sensitivity)
cat_summary <- categorical_vars %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Category") %>%
group_by(Variable, Category) %>%
summarise(Count = n(), .groups = "drop")
# Create Bar Plot for Categorical Variables
categorical_plot <- ggplot(cat_summary, aes(x = Category, y = Count, fill = Variable)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~Variable, scales = "free_x") +
labs(
title = "Distribution of Categorical Variables",
x = "Category",
y = "Count"
) +
theme_minimal()
# Select only the continuous variables
numeric_vars <- bolts %>% select(Time, Speed, LedgeWidth)
# Reshape data for plotting (long format)
numeric_vars_long <- numeric_vars %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Value")
# Define custom y-axis labels for each facet
custom_labels <- c(
"Time" = "Time (seconds)",
"Speed" = "Speed (units/sec)",
"LedgeWidth" = "Ledge Width (mm)"
)
# Create Faceted Boxplots
ggplot(numeric_vars_long, aes(x = "", y = Value, fill = Variable)) + # x = "" for single boxplot per facet
geom_boxplot() +
facet_wrap(
~Variable,
scales = "free_y",
labeller = as_labeller(custom_labels)
) +
labs(
title = "Boxplots of Continuous Variables",
x = "Variable",
y = NULL # Remove global y-axis label
) +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"), # Emphasize facet labels
axis.text.x = element_blank(), # Hide x-axis text (optional)
axis.ticks.x = element_blank() # Hide x-axis ticks (optional)
)
# Print Plots
print(categorical_plot)
# Convert data to long format for plotting
bolts_long <- bolts %>%
gather(key = "Variable", value = "Value",
Speed, LedgeWidth, Sensitivity, Rotation, Success)
# Scatter Plots for Time vs Speed, Time vs LedgeWidth
scatter_plot <- ggplot(bolts, aes(x = Speed, y = Time)) +
geom_point() +
geom_smooth(method = "lm", color = "blue", se = TRUE) +
labs(title = "Time vs. Speed (Scatter Plot)", x = "Speed (m/s)", y = "Time (seconds)") +
theme_minimal()
scatter_plot_2 <- ggplot(bolts, aes(x = LedgeWidth, y = Time)) +
geom_point() +
geom_smooth(method = "lm", color = "blue", se = TRUE) +
labs(title = "Time vs. Ledge Width (Scatter Plot)", x = "Ledge Width (mm)", y = "Time (seconds)") +
theme_minimal()
# Boxplots for Time vs Sensitivity, Time vs Rotation, Time vs Success
boxplot1 <- ggplot(bolts, aes(x = Sensitivity, y = Time, fill = Sensitivity)) +
geom_boxplot() +
labs(title = "Time vs Sensitivity (Boxplot)", x = "Sensitivity (High/Low)", y = "Time (seconds)") +
theme_minimal() +
scale_fill_manual(values = c("High" = "skyblue", "Low" = "salmon"))
boxplot2 <- ggplot(bolts, aes(x = Rotation, y = Time, fill = Rotation)) +
geom_boxplot() +
labs(title = "Time vs Rotation (Boxplot)", x = "Rotation (Clockwise/Counterclockwise)", y = "Time (seconds)") +
theme_minimal() +
scale_fill_manual(values = c("Clockwise" = "lightgreen", "Alternating" = "orange"))
boxplot3 <- ggplot(bolts, aes(x = Success, y = Time, fill = Success)) +
geom_boxplot() +
labs(title = "Time vs Success (Boxplot)", x = "Success (Yes/No)", y = "Time (seconds)") +
theme_minimal() +
scale_fill_manual(values = c("1" = "purple", "0" = "yellow"))
scatter_plot
scatter_plot_2
boxplot1
boxplot2
boxplot3
# Define a function to calculate the summary
summarize_continuous <- function(data, variable_name) {
variable <- data[[variable_name]]
summary_stats <- data.frame(
Mean = round(mean(variable, na.rm = TRUE), 3),
SD = round(sd(variable, na.rm = TRUE), 3),
Median = round(median(variable, na.rm = TRUE)),
IQR = round(IQR(variable, na.rm = TRUE)),
Variance = round(var(variable, na.rm = TRUE), 3),
Skewness = round(skewness(variable, na.rm = TRUE), 3),
Kurtosis = round(kurtosis(variable, na.rm = TRUE), 3),
Min = round(min(variable, na.rm = TRUE)),
Max = round(max(variable, na.rm = TRUE)),
`1%` = round(quantile(variable, 0.01, na.rm = TRUE)),
`5%` = round(quantile(variable, 0.05, na.rm = TRUE)),
`10%` = round(quantile(variable, 0.10, na.rm = TRUE)),
`90%` = round(quantile(variable, 0.90, na.rm = TRUE)),
`95%` = round(quantile(variable, 0.95, na.rm = TRUE)),
`99%` = round(quantile(variable, 0.99, na.rm = TRUE))
)
# Add a range column (Min to Max)
summary_stats$Range <- paste0(summary_stats$Min, " to ", summary_stats$Max)
# Drop Min and Max columns after creating Range
summary_stats <- summary_stats %>% select(-Min, -Max)
# Transpose for better readability
t(summary_stats)
}
# Example usage
summary_time <- summarize_continuous(bolts, "Time")
summary_speed <- summarize_continuous(bolts, "Speed")
summary_ledgewidth <- summarize_continuous(bolts, "LedgeWidth")
# Display summaries
cat("Summary for Time:\n")
Summary for Time:
print(summary_time)
1%
Mean "10.884"
SD "2.785"
Median "11"
IQR "4"
Variance "7.754"
Skewness "0.419"
Kurtosis "1.325"
X1. "5"
X5. "7"
X10. "8"
X90. "14"
X95. "15"
X99. "18"
Range "1 to 26"
cat("\nSummary for Speed:\n")
Summary for Speed:
print(summary_speed)
1%
Mean "50.514"
SD "3.923"
Median "51"
IQR "5"
Variance "15.386"
Skewness "-0.077"
Kurtosis "0.019"
X1. "41"
X5. "44"
X10. "45"
X90. "55"
X95. "57"
X99. "60"
Range "38 to 62"
cat("\nSummary for LedgeWidth:\n")
Summary for LedgeWidth:
print(summary_ledgewidth)
1%
Mean "14.967"
SD "1.461"
Median "15"
IQR "2"
Variance "2.136"
Skewness "0.035"
Kurtosis "-0.134"
X1. "12"
X5. "13"
X10. "13"
X90. "17"
X95. "17"
X99. "18"
Range "11 to 20"
M1 <- glm(Time ~ Speed + Rotation + LedgeWidth + Sensitivity + Success,
family = gaussian, data = bolts)
summary(M1)
Call:
glm(formula = Time ~ Speed + Rotation + LedgeWidth + Sensitivity +
Success, family = gaussian, data = bolts)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.41504 1.20406 4.497 7.91e-06 ***
Speed 0.01069 0.01841 0.581 0.56156
RotationClockwise 3.15466 0.14452 21.829 < 2e-16 ***
LedgeWidth 0.14868 0.04949 3.004 0.00275 **
SensitivityHigh 2.07058 0.14577 14.204 < 2e-16 ***
Success1 0.26581 0.14597 1.821 0.06898 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 4.152716)
Null deviance: 6195.7 on 799 degrees of freedom
Residual deviance: 3297.3 on 794 degrees of freedom
AIC: 3417.3
Number of Fisher Scoring iterations: 2
rss <- sum(residuals(M1)^2) # Residual Sum of Squares
tss <- sum((bolts$Time - mean(bolts$Time))^2) # Total Sum of Squares
r_squared <- 1 - (rss / tss)
cat("R-squared:", r_squared, "\n")
R-squared: 0.4678119
cat("AIC", M1$aic, "\n")
AIC 3417.289
model_linear <- lm(Time ~ poly(LedgeWidth,1), data = bolts)
model_quadratic <- lm(Time ~ poly(LedgeWidth, 2), data = bolts)
model_cubic <- lm(Time ~ poly(LedgeWidth, 3), data = bolts)
model_quartic <- lm(Time ~ poly(LedgeWidth, 4), data = bolts)
anova(model_linear, model_quadratic, model_cubic, model_quartic)
Analysis of Variance Table
Model 1: Time ~ poly(LedgeWidth, 1)
Model 2: Time ~ poly(LedgeWidth, 2)
Model 3: Time ~ poly(LedgeWidth, 3)
Model 4: Time ~ poly(LedgeWidth, 4)
Res.Df RSS Df Sum of Sq F Pr(>F)
1 798 6191.7
2 797 4036.4 1 2155.36 425.0796 <2e-16 ***
3 796 4032.4 1 3.92 0.7736 0.3794
4 795 4031.0 1 1.42 0.2801 0.5968
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
AIC(model_linear, model_quadratic, model_cubic, model_quartic)
# Scatter plot with polynomial fits
library(ggplot2)
ggplot(bolts, aes(x = LedgeWidth, y = Time)) +
geom_point() +
stat_smooth(method = "lm", formula = y ~ x, color = "orange", se = FALSE, linetype = "dashed") + # Linear fit
stat_smooth(method = "lm", formula = y ~ poly(x, 2), color = "green", se = FALSE) + # Quadratic fit
stat_smooth(method = "lm", formula = y ~ poly(x, 3), color = "red", se = FALSE) + # Cubic fit
stat_smooth(method = "lm", formula = y ~ poly(x, 4), color = "blue", se = FALSE) + # Quartic fit
labs(title = "Polynomial Relationship Between Time and LedgeWidth",
x = "LedgeWidth", y = "Time") +
theme_minimal()
crPlots(M1, ~ LedgeWidth, lwd=4, col="grey", smooth=list(lwd.smooth=4))
bolts$LedgeWidth2 <- bolts$LedgeWidth^2
M2 <- glm(Time ~ Rotation + LedgeWidth + LedgeWidth2 + Sensitivity + Success,
family = gaussian, data = bolts)
#M2 <- glm(Time ~ Rotation + poly(LedgeWidth,2) + Sensitivity,
#family = gaussian, data = bolts)
#M2$coefficients
par(mfrow=c(1,2))
crPlots(M2, ~ LedgeWidth, lwd=4, col="grey", smooth=list(lwd.smooth=4))
crPlots(M2, ~ LedgeWidth2, lwd=4, col="grey", smooth=list(lwd.smooth=4))
par(mfrow=c(1,1))
residuals <- residuals(M1)
# Histogram plot
hist_plot <- ggplot(bolts, aes(x = residuals)) +
geom_histogram(bins=15, color = "black", alpha = 0.7) +
labs(title = "Histogram Plot",
x = "Residuals",
y = "Density")
# Boxplot
boxplot <- ggplot(bolts, aes(x = residuals)) +
geom_boxplot() +
coord_flip() +
labs(title = "Boxplot",
x = "Residuals")
# Density plot
density_plot <- ggplot(bolts, aes(x = residuals)) +
geom_density() +
stat_function(fun = dnorm, args = list(mean = mean(residuals),
sd = sd(residuals)),
color = "red", linetype = "dashed", size = 1) +
labs(title = "Density Plot",
x = "Residuals",
y = "Density")
# QQ Plot
qq_plot <- ggplot(bolts, aes(sample = residuals)) +
stat_qq() +
stat_qq_line(color = "red") +
labs(title = "QQ Plot",
y = "Residuals",
x = "Inverse Normal")
grid_arrangement <- grid.arrange(hist_plot, boxplot, density_plot, qq_plot, ncol = 2,
top = grid::textGrob
("Residual plots with Time (Model 1)",
gp = grid::gpar(fontsize = 14, fontface = "bold")))
residuals_tf <- residuals(M2)
# Histogram plot
hist_plot <- ggplot(bolts, aes(x = residuals_tf)) +
geom_histogram(bins=15, color = "black", alpha = 0.7) +
labs(title = "Histogram Plot",
x = "Residuals",
y = "Density")
# Boxplot
boxplot <- ggplot(bolts, aes(x = residuals_tf)) +
geom_boxplot() +
coord_flip() +
labs(title = "Boxplot",
x = "Residuals")
# Density plot
density_plot <- ggplot(bolts, aes(x = residuals_tf)) +
geom_density() +
stat_function(fun = dnorm, args = list(mean = mean(residuals_tf),
sd = sd(residuals_tf)),
color = "red", linetype = "dashed", size = 1) +
labs(title = "Density Plot",
x = "Residuals",
y = "Density")
# QQ Plot
qq_plot <- ggplot(bolts, aes(sample = residuals_tf)) +
stat_qq() +
stat_qq_line(color = "red") +
labs(title = "QQ Plot",
y = "Residuals",
x = "Inverse Normal")
grid_arrangement1 <- grid.arrange(hist_plot, boxplot, density_plot, qq_plot, ncol = 2,
top = grid::textGrob
("Residual plots with Time (Model 2)",
gp = grid::gpar(fontsize = 14, fontface = "bold")))
# Before adjustment
shapiro_test_result <- shapiro.test(residuals)
skewness_value_before <- skewness(residuals)
kurtosis_value_before <- kurtosis(residuals)
# After adjustment
shapiro_test_result_after <- shapiro.test(residuals_tf)
skewness_value_after <- skewness(residuals_tf)
kurtosis_value_after <- kurtosis(residuals_tf)
knitr::kable(
cbind(
"Before Adjustment (Model 1)" = c("Shapiro-Wilk Test statistics:" =
sprintf("%.3f", shapiro_test_result$statistic),
"Shapiro-Wilk Test p-value:" =
sprintf("%.2e", shapiro_test_result$p.value),
"Skewness:" = sprintf("%.3f",skewness_value_before),
"Kurtosis:" = sprintf("%.3f",kurtosis_value_before)),
"After Adjustment (Model 2)" = c("Shapiro-Wilk Test statistics:" =
sprintf("%.2f",shapiro_test_result_after$statistic),
"Shapiro-Wilk Test p-value:" =
sprintf("%.2e", shapiro_test_result_after$p.value),
"Skewness:" = sprintf("%.3f",skewness_value_after),
"Kurtosis:" = sprintf("%.3f",kurtosis_value_after))
),
col.names = c("Before transformation (Model 1)", "After transformation (Model 2)"),
format = "markdown"
)
| Before transformation (Model 1) | After transformation (Model 2) | |
|---|---|---|
| Shapiro-Wilk Test statistics: | 0.935 | 0.93 |
| Shapiro-Wilk Test p-value: | 3.81e-18 | 1.01e-18 |
| Skewness: | 0.454 | -1.312 |
| Kurtosis: | 4.871 | 9.145 |
# Make predictions and calculate residuals
predicted <- predict(M1, newdata = bolts, type = "response")
residuals <- residuals(M1)
predicted_tf <- predict(M2, newdata = bolts, type = "response")
predicted_tf1 <- predict(M2, newdata = bolts)
residuals_tf <- residuals(M2)
# Residuals vs Predictor (RVP) plot
rvp_plot <- ggplot(bolts, aes(x = LedgeWidth, y = residuals)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "solid", color = "red") +
labs(title = "Model 1 (RVP) Plot",
x = "Ledge Width",
y = "Residuals")
# Residuals vs Fitted Value (RVF) plot
rvf_plot <- ggplot(bolts, aes(x = predicted, y = residuals)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "solid", color = "red") +
labs(title = "Model 1 (RVF) Plot",
x = "Fitted (Linear prediction)",
y = "Residuals")
# Residuals vs Predictor (RVP) plot
rvp_plot_tf <- ggplot(bolts, aes(x = LedgeWidth, y = residuals_tf)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "solid", color = "red") +
labs(title = "Model 2 (RVP) Plot",
x = "Ledge Width",
y = "Residuals")
# Residuals vs Fitted Value (RVF) plot
rvf_plot_tf <- ggplot(bolts, aes(x = predicted_tf, y = residuals_tf)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "solid", color = "red") +
labs(title = "Model 2 (RVF) Plot",
x = "Fitted (Linear prediction)",
y = "Residuals")
grid_arrangement <- grid.arrange(rvp_plot, rvf_plot, rvp_plot_tf, rvf_plot_tf,
ncol = 2, nrow = 2,
top = grid::textGrob
("Residuals vs Predictor and Fitted Value Plots",
gp = grid::gpar(fontsize = 14, fontface = "bold")))
bolts$LedgeWidth2 <- bolts$LedgeWidth^2
M2 <- glm(Time ~ Rotation + LedgeWidth + LedgeWidth2 + Sensitivity + Success,
family = gaussian, data = bolts)
summary(M2)
Call:
glm(formula = Time ~ Rotation + LedgeWidth + LedgeWidth2 + Sensitivity +
Success, family = gaussian, data = bolts)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 122.29143 3.72327 32.845 <2e-16 ***
RotationClockwise 3.08250 0.09629 32.013 <2e-16 ***
LedgeWidth -15.48453 0.49676 -31.171 <2e-16 ***
LedgeWidth2 0.52114 0.01652 31.539 <2e-16 ***
SensitivityHigh 1.81934 0.09739 18.681 <2e-16 ***
Success1 0.17774 0.09730 1.827 0.0681 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 1.844145)
Null deviance: 6195.7 on 799 degrees of freedom
Residual deviance: 1464.3 on 794 degrees of freedom
AIC: 2767.9
Number of Fisher Scoring iterations: 2
M1 <- glm(Time ~ Speed + Rotation + LedgeWidth + Sensitivity + Success,
family = gaussian, data = bolts)
summary(M1)
Call:
glm(formula = Time ~ Speed + Rotation + LedgeWidth + Sensitivity +
Success, family = gaussian, data = bolts)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.41504 1.20406 4.497 7.91e-06 ***
Speed 0.01069 0.01841 0.581 0.56156
RotationClockwise 3.15466 0.14452 21.829 < 2e-16 ***
LedgeWidth 0.14868 0.04949 3.004 0.00275 **
SensitivityHigh 2.07058 0.14577 14.204 < 2e-16 ***
Success1 0.26581 0.14597 1.821 0.06898 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 4.152716)
Null deviance: 6195.7 on 799 degrees of freedom
Residual deviance: 3297.3 on 794 degrees of freedom
AIC: 3417.3
Number of Fisher Scoring iterations: 2
#with(M1, cbind(res.deviance = deviance, df = df.residual,
#p = pchisq(deviance, df.residual, lower.tail=FALSE)))
rss <- sum(residuals(M1)^2) # Residual Sum of Squares
tss <- sum((bolts$Time - mean(bolts$Time))^2) # Total Sum of Squares
r_squared <- 1 - (rss / tss)
cat("R-squared:", r_squared, "\n")
R-squared: 0.4678119
cat("AIC", M1$aic, "\n")
AIC 3417.289
bolts$LedgeWidth2 <- bolts$LedgeWidth^2
M2 <- glm(Time ~ Rotation + LedgeWidth + LedgeWidth2 + Sensitivity + Success,
family = gaussian, data = bolts)
summary(M2)
Call:
glm(formula = Time ~ Rotation + LedgeWidth + LedgeWidth2 + Sensitivity +
Success, family = gaussian, data = bolts)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 122.29143 3.72327 32.845 <2e-16 ***
RotationClockwise 3.08250 0.09629 32.013 <2e-16 ***
LedgeWidth -15.48453 0.49676 -31.171 <2e-16 ***
LedgeWidth2 0.52114 0.01652 31.539 <2e-16 ***
SensitivityHigh 1.81934 0.09739 18.681 <2e-16 ***
Success1 0.17774 0.09730 1.827 0.0681 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for gaussian family taken to be 1.844145)
Null deviance: 6195.7 on 799 degrees of freedom
Residual deviance: 1464.3 on 794 degrees of freedom
AIC: 2767.9
Number of Fisher Scoring iterations: 2
#with(M2, cbind(res.deviance = deviance, df = df.residual,
#p = pchisq(deviance, df.residual, lower.tail=FALSE)))
rss <- sum(residuals(M2)^2) # Residual Sum of Squares
tss <- sum((bolts$Time - mean(bolts$Time))^2) # Total Sum of Squares
r_squared <- 1 - (rss / tss)
cat("R-squared:", r_squared, "\n")
R-squared: 0.763665
cat("AIC", M2$aic, "\n")
AIC 2767.892
#M_2 <- glm(Time ~ Rotation + LedgeWidth + Sensitivity,
#family = gaussian, data = bolts)
#summary(M_2)
#with(M_2, cbind(res.deviance = deviance, df = df.residual,
#p = pchisq(deviance, df.residual, lower.tail=FALSE)))
# Calculate R-squared
#rss <- sum(residuals(M_2)^2) # Residual Sum of Squares
#tss <- sum((bolts$Time - mean(bolts$Time))^2) # Total Sum of Squares
#r_squared <- 1 - (rss / tss)
#cat("R-squared:", r_squared, "\n")
#N1 <- lm(Time ~ Speed + Rotation + LedgeWidth + Sensitivity + Success, data = bolts)
#N2 <- lm(Time ~ Rotation + LedgeWidth + Sensitivity, data = bolts)
# Do F-test for nested models (ANOVA):
#anova(M_2,M1)
# Define a sequence of LedgeWidth values for prediction
ledge_width_values <- seq(min(bolts$LedgeWidth), max(bolts$LedgeWidth), length.out = 100)
# Create a new data frame with varying LedgeWidth and fixed settings for other variables
new_data <- data.frame(
Rotation = "Alternating", # Fixed at the optimal level
Sensitivity = "Low", # Fixed at the baseline level
LedgeWidth = ledge_width_values
)
# Predict Time using the model
new_data$PredictedTime <- predict(M2, newdata = new_data)
Error in eval(predvars, data, env) : object 'LedgeWidth2' not found
library(ggplot2)
ggplot(new_data, aes(x = LedgeWidth, y = PredictedTime)) +
geom_line(color = "blue", linewidth = 1.5) +
labs(
title = "Predicted Time vs. LedgeWidth",
x = "LedgeWidth",
y = "Predicted Time"
) +
theme_minimal()
optimal_ledge_width <- new_data$LedgeWidth[which.min(new_data$PredictedTime)]
cat("The optimal LedgeWidth is:", optimal_ledge_width, "\n")
The optimal LedgeWidth is: 14.83773
Model assumption first (hypo?)