project salim said alghefeili (138537) 2025-04-27 ```{r.part 1} library(readxl) muscat_data <- read_excel(“muscat_data.xlsx”) # Load libraries library(readxl) library(dplyr) ## ## Attaching package: ‘dplyr’ ## The following objects are masked from ‘package:stats’: ## ## filter, lag ## The following objects are masked from ‘package:base’: ## ## intersect, setdiff, setequal, union library(ggpubr) ## Loading required package: ggplot2 library(car) ## Loading required package: carData ## ## Attaching package: ‘car’ ## The following object is masked from ‘package:dplyr’: ## ## recode library(effects) ## lattice theme set by effectsTheme() ## See ?effectsTheme for details. library(gtsummary) library(broom)
library(broom.helpers) ## ## Attaching package: ‘broom.helpers’ ##
The following objects are masked from ‘package:gtsummary’: ## ##
all_categorical, all_continuous, all_contrasts, all_dichotomous, ##
all_interaction, all_intercepts # Import the data muscat_data <-
read_excel(“muscat_data.xlsx”) head(muscat_data) ## # A tibble: 6 × 6 ##
Location Category Sales Expenses Profit Employees ##
ggscatter(muscat_data, x = “Sales”, y = “Expenses”, color = “Category”, add = “reg.line”, conf.int = TRUE) + labs(title = “Expenses vs Sales by Category”)
by(muscat_data\(Profit,
muscat_data\)Category, shapiro.test) ## muscat_data\(Category: Retail
##
## Shapiro-Wilk normality test
##
## data: dd[x, ]
## W = 0.89428, p-value = 0.001309
##
## ------------------------------------------------------------
## muscat_data\)Category: Services ## ## Shapiro-Wilk normality
test ## ## data: dd[x, ] ## W = 0.94207, p-value = 0.1034 # Test for
Homogeneity of Variance leveneTest(Profit ~ Category, data =
muscat_data) ## Warning in leveneTest.default(y = y, group = group, …):
group coerced to ## factor. ## Levene’s Test for Homogeneity of Variance
(center = median) ## Df F value Pr(>F) ## group 1 0.8129 0.3705 ## 68
# ANOVA Analysis # 1. One-Way ANOVA anova1 <- aov(Profit ~ Category,
data = muscat_data) summary(anova1) ## Df Sum Sq Mean Sq F value
Pr(>F)
## Category 1 1891452 1891452 51.62 6.58e-10 ## Residuals
68 2491457 36639
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’
0.05 ‘.’ 0.1 ’ ’ 1 # 2. Two-Way ANOVA anova2 <- aov(Profit ~
Category, data = muscat_data) Anova(anova2, type = 3) ## Anova Table
(Type III tests) ## ## Response: Profit ## Sum Sq Df F value
Pr(>F)
## (Intercept) 137863690 1 3762.751 < 2.2e-16 ##
Category 1891452 1 51.624 6.58e-10 *** ## Residuals 2491457 68
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’
0.05 ‘.’ 0.1 ’ ’ 1 # 3. Effect Plot plot(allEffects(anova2), main =
“Interaction Effects: Location * Category”)
model <- lm(Profit ~ Sales + Expenses + Employees + Category, data
= muscat_data) summary(model) ## Warning in summary.lm(model):
essentially perfect fit: summary may be ## unreliable ## ## Call: ##
lm(formula = Profit ~ Sales + Expenses + Employees + Category, ## data =
muscat_data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.510e-12
-2.480e-14 1.861e-14 5.834e-14 2.427e-13 ## ## Coefficients: ## Estimate
Std. Error t value Pr(>|t|)
## (Intercept) 0.000e+00 4.660e-13 0.000e+00 1.0000
## Sales 1.000e+00 2.701e-16 3.703e+15 <2e-16 ##
Expenses -1.000e+00 3.197e-16 -3.128e+15 <2e-16 ##
Employees -4.875e-14 2.439e-14 -1.999e+00 0.0498 *
## CategoryServices -3.818e-14 8.161e-14 -4.680e-01 0.6414
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’
0.05 ‘.’ 0.1 ’ ’ 1 ## ## Residual standard error: 2.033e-13 on 65
degrees of freedom ## Multiple R-squared: 1, Adjusted R-squared: 1 ##
F-statistic: 2.652e+31 on 4 and 65 DF, p-value: < 2.2e-16 #
Diagnostic Plots par(mfrow = c(2, 2)) plot(model)
vif(model) ## Warning in summary.lm(object, …): essentially perfect fit: summary may be ## unreliable ## Sales Expenses Employees Category ## 110.138307 87.612871 39.302312 2.762847 # Publication-Ready Output # Regression Summary Table tbl_regression(model, exponentiate = FALSE) %>% bold_labels() ## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable ## Warning in summary.lm(object, …): essentially perfect fit: summary may be ## unreliable Characteristic Beta 95% CI p-value Sales 1.0 1.0, 1.0 <0.001 Expenses -1.0 -1.0, -1.0 <0.001 Employees 0.00 0.00, 0.00 0.050 Category
Retail — —
Services 0.00 0.00, 0.00 0.6 Abbreviation: CI = Confidence Interval #
ANOVA Summary Table tidy(Anova(anova2, type = 3)) ## # A tibble: 3 × 5
## term sumsq df statistic p.value ##
```{r.part 2}
library(shiny)
library(readxl)
library(dplyr)
library(car)
library(ggplot2)
library(ggpubr)
# Load data
muscat_data <- read_excel("muscat_data.xlsx")
# UI
ui <- fluidPage(
titlePanel("Muscat Business Profit Analysis"),
sidebarLayout(
sidebarPanel(
checkboxGroupInput("predictors", "Select Predictors:",
choices = names(muscat_data)[!(names(muscat_data) %in% c("Profit", "Location"))],
selected = c("Sales", "Expenses", "Employees", "Category"))
),
mainPanel(
tabsetPanel(
tabPanel("Model Summary", verbatimTextOutput("model_summary")),
tabPanel("VIF Table", tableOutput("vif_table")),
tabPanel("Diagnostic Plots",
plotOutput("resid_fitted"),
plotOutput("qq_plot"),
plotOutput("scale_loc"))
)
)
)
)
# Server
server <- function(input, output) {
model_reactive <- reactive({
req(input$predictors)
predictors_formula <- paste(input$predictors, collapse = " + ")
formula <- as.formula(paste("Profit ~", predictors_formula))
lm(formula, data = muscat_data)
})
output$model_summary <- renderPrint({
summary(model_reactive())
})
output$vif_table <- renderTable({
vif_values <- vif(model_reactive())
data.frame(Variable = names(vif_values), VIF = round(vif_values, 2))
}, striped = TRUE)
output$resid_fitted <- renderPlot({
model <- model_reactive()
plot(model$fitted.values, resid(model),
xlab = "Fitted Values", ylab = "Residuals", main = "Residuals vs Fitted")
abline(h = 0, col = "red")
})
output$qq_plot <- renderPlot({
qqnorm(resid(model_reactive()), main = "Normal Q-Q Plot")
qqline(resid(model_reactive()), col = "red")
})
output$scale_loc <- renderPlot({
model <- model_reactive()
sqrt_abs_resid <- sqrt(abs(resid(model)))
plot(model$fitted.values, sqrt_abs_resid,
xlab = "Fitted Values", ylab = "Sqrt(|Residuals|)", main = "Scale-Location Plot")
abline(h = mean(sqrt_abs_resid), col = "blue")
})
}
# Run app
shinyApp(ui = ui, server = server)