project salim said alghefeili (138537) 2025-04-27 ```{r.part 1} library(readxl) muscat_data <- read_excel(“muscat_data.xlsx”) # Load libraries library(readxl) library(dplyr) ## ## Attaching package: ‘dplyr’ ## The following objects are masked from ‘package:stats’: ## ## filter, lag ## The following objects are masked from ‘package:base’: ## ## intersect, setdiff, setequal, union library(ggpubr) ## Loading required package: ggplot2 library(car) ## Loading required package: carData ## ## Attaching package: ‘car’ ## The following object is masked from ‘package:dplyr’: ## ## recode library(effects) ## lattice theme set by effectsTheme() ## See ?effectsTheme for details. library(gtsummary) library(broom)

library(broom.helpers) ## ## Attaching package: ‘broom.helpers’ ## The following objects are masked from ‘package:gtsummary’: ## ## all_categorical, all_continuous, all_contrasts, all_dichotomous, ## all_interaction, all_intercepts # Import the data muscat_data <- read_excel(“muscat_data.xlsx”) head(muscat_data) ## # A tibble: 6 × 6 ## Location Category Sales Expenses Profit Employees ## ## 1 Muscat Retail 5210 3200 2010 15 ## 2 Muscat Services 6400 4200 2200 25 ## 3 Muscat Retail 4830 3000 1830 12 ## 4 Muscat Retail 5920 3800 2120 18 ## 5 Muscat Services 7600 5000 2600 30 ## 6 Muscat Services 5010 3400 1610 14 # Exploratory Data Analysis # 1. Summary Statistics muscat_data %>% group_by(Location, Category) %>% summarise(across(where(is.numeric), list(mean = mean, sd = sd), .names = “{.col}_{.fn}“), .groups =”drop”) ## # A tibble: 2 × 10 ## Location Category Sales_mean Sales_sd Expenses_mean Expenses_sd Profit_mean ## ## 1 Muscat Retail 5232. 611. 3375 455. 1856. ## 2 Muscat Services 6672 650. 4483. 465. 2189. ## # ℹ 3 more variables: Profit_sd , Employees_mean , Employees_sd # 2. Visualizations # Boxplot: Profit by Category ggboxplot(muscat_data, x = “Category”, y = “Profit”, color = “Category”, palette = “jco”, add = “jitter”, title = “Profit by Category”)

Scatterplot: Expenses vs Sales

ggscatter(muscat_data, x = “Sales”, y = “Expenses”, color = “Category”, add = “reg.line”, conf.int = TRUE) + labs(title = “Expenses vs Sales by Category”)

Assumptions and Statistical Framework

Test for Normality

by(muscat_data\(Profit, muscat_data\)Category, shapiro.test) ## muscat_data\(Category: Retail ## ## Shapiro-Wilk normality test ## ## data: dd[x, ] ## W = 0.89428, p-value = 0.001309 ## ## ------------------------------------------------------------ ## muscat_data\)Category: Services ## ## Shapiro-Wilk normality test ## ## data: dd[x, ] ## W = 0.94207, p-value = 0.1034 # Test for Homogeneity of Variance leveneTest(Profit ~ Category, data = muscat_data) ## Warning in leveneTest.default(y = y, group = group, …): group coerced to ## factor. ## Levene’s Test for Homogeneity of Variance (center = median) ## Df F value Pr(>F) ## group 1 0.8129 0.3705 ## 68 # ANOVA Analysis # 1. One-Way ANOVA anova1 <- aov(Profit ~ Category, data = muscat_data) summary(anova1) ## Df Sum Sq Mean Sq F value Pr(>F)
## Category 1 1891452 1891452 51.62 6.58e-10 ## Residuals 68 2491457 36639
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 # 2. Two-Way ANOVA anova2 <- aov(Profit ~ Category, data = muscat_data) Anova(anova2, type = 3) ## Anova Table (Type III tests) ## ## Response: Profit ## Sum Sq Df F value Pr(>F)
## (Intercept) 137863690 1 3762.751 < 2.2e-16 ## Category 1891452 1 51.624 6.58e-10 *** ## Residuals 2491457 68
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 # 3. Effect Plot plot(allEffects(anova2), main = “Interaction Effects: Location * Category”)

Linear Regression and Diagnostics

Fit the Model

model <- lm(Profit ~ Sales + Expenses + Employees + Category, data = muscat_data) summary(model) ## Warning in summary.lm(model): essentially perfect fit: summary may be ## unreliable ## ## Call: ## lm(formula = Profit ~ Sales + Expenses + Employees + Category, ## data = muscat_data) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.510e-12 -2.480e-14 1.861e-14 5.834e-14 2.427e-13 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.000e+00 4.660e-13 0.000e+00 1.0000
## Sales 1.000e+00 2.701e-16 3.703e+15 <2e-16 ## Expenses -1.000e+00 3.197e-16 -3.128e+15 <2e-16 ## Employees -4.875e-14 2.439e-14 -1.999e+00 0.0498 *
## CategoryServices -3.818e-14 8.161e-14 -4.680e-01 0.6414
## — ## Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1 ## ## Residual standard error: 2.033e-13 on 65 degrees of freedom ## Multiple R-squared: 1, Adjusted R-squared: 1 ## F-statistic: 2.652e+31 on 4 and 65 DF, p-value: < 2.2e-16 # Diagnostic Plots par(mfrow = c(2, 2)) plot(model)

Check Multicollinearity

vif(model) ## Warning in summary.lm(object, …): essentially perfect fit: summary may be ## unreliable ## Sales Expenses Employees Category ## 110.138307 87.612871 39.302312 2.762847 # Publication-Ready Output # Regression Summary Table tbl_regression(model, exponentiate = FALSE) %>% bold_labels() ## Warning in summary.lm(x): essentially perfect fit: summary may be unreliable ## Warning in summary.lm(object, …): essentially perfect fit: summary may be ## unreliable Characteristic Beta 95% CI p-value Sales 1.0 1.0, 1.0 <0.001 Expenses -1.0 -1.0, -1.0 <0.001 Employees 0.00 0.00, 0.00 0.050 Category

Retail — —
Services 0.00 0.00, 0.00 0.6 Abbreviation: CI = Confidence Interval # ANOVA Summary Table tidy(Anova(anova2, type = 3)) ## # A tibble: 3 × 5 ## term sumsq df statistic p.value ## ## 1 (Intercept) 137863690. 1 3763. 2.89e-61 ## 2 Category 1891452. 1 51.6 6.58e-10 ## 3 Residuals 2491457. 68 NA NA


```{r.part 2}

library(shiny)
library(readxl)
library(dplyr)
library(car)
library(ggplot2)
library(ggpubr)

# Load data
muscat_data <- read_excel("muscat_data.xlsx")

# UI
ui <- fluidPage(
  titlePanel("Muscat Business Profit Analysis"),
  sidebarLayout(
    sidebarPanel(
      checkboxGroupInput("predictors", "Select Predictors:",
                         choices = names(muscat_data)[!(names(muscat_data) %in% c("Profit", "Location"))],
                         selected = c("Sales", "Expenses", "Employees", "Category"))
    ),
    mainPanel(
      tabsetPanel(
        tabPanel("Model Summary", verbatimTextOutput("model_summary")),
        tabPanel("VIF Table", tableOutput("vif_table")),
        tabPanel("Diagnostic Plots", 
                 plotOutput("resid_fitted"),
                 plotOutput("qq_plot"),
                 plotOutput("scale_loc"))
      )
    )
  )
)

# Server
server <- function(input, output) {
  model_reactive <- reactive({
    req(input$predictors)
    predictors_formula <- paste(input$predictors, collapse = " + ")
    formula <- as.formula(paste("Profit ~", predictors_formula))
    lm(formula, data = muscat_data)
  })
  
  output$model_summary <- renderPrint({
    summary(model_reactive())
  })
  
  output$vif_table <- renderTable({
    vif_values <- vif(model_reactive())
    data.frame(Variable = names(vif_values), VIF = round(vif_values, 2))
  }, striped = TRUE)
  
  output$resid_fitted <- renderPlot({
    model <- model_reactive()
    plot(model$fitted.values, resid(model),
         xlab = "Fitted Values", ylab = "Residuals", main = "Residuals vs Fitted")
    abline(h = 0, col = "red")
  })
  
  output$qq_plot <- renderPlot({
    qqnorm(resid(model_reactive()), main = "Normal Q-Q Plot")
    qqline(resid(model_reactive()), col = "red")
  })
  
  output$scale_loc <- renderPlot({
    model <- model_reactive()
    sqrt_abs_resid <- sqrt(abs(resid(model)))
    plot(model$fitted.values, sqrt_abs_resid,
         xlab = "Fitted Values", ylab = "Sqrt(|Residuals|)", main = "Scale-Location Plot")
    abline(h = mean(sqrt_abs_resid), col = "blue")
  })
}

# Run app
shinyApp(ui = ui, server = server)