Load and Explore Data

dodgers_data <- read.csv("DodgersData (2) (1).csv")
summary(dodgers_data)
##     month                day            attend      day_of_week       
##  Length:81          Min.   : 1.00   Min.   :24312   Length:81         
##  Class :character   1st Qu.: 8.00   1st Qu.:34493   Class :character  
##  Mode  :character   Median :15.00   Median :40284   Mode  :character  
##                     Mean   :16.14   Mean   :41040                     
##                     3rd Qu.:25.00   3rd Qu.:46588                     
##                     Max.   :31.00   Max.   :56000                     
##    opponent              temp          skies            day_night        
##  Length:81          Min.   :54.00   Length:81          Length:81         
##  Class :character   1st Qu.:67.00   Class :character   Class :character  
##  Mode  :character   Median :73.00   Mode  :character   Mode  :character  
##                     Mean   :73.15                                        
##                     3rd Qu.:79.00                                        
##                     Max.   :95.00                                        
##      cap               shirt            fireworks          bobblehead       
##  Length:81          Length:81          Length:81          Length:81         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Classification Tree

# Convert categorical variables to factors
dodgers_data$day_of_week <- as.factor(dodgers_data$day_of_week)
dodgers_data$opponent <- as.factor(dodgers_data$opponent)
dodgers_data$skies <- as.factor(dodgers_data$skies)
dodgers_data$day_night <- as.factor(dodgers_data$day_night)
dodgers_data$cap <- as.factor(dodgers_data$cap)
dodgers_data$shirt <- as.factor(dodgers_data$shirt)
dodgers_data$fireworks <- as.factor(dodgers_data$fireworks)
dodgers_data$bobblehead <- as.factor(dodgers_data$bobblehead)

# Create a classification tree predicting high vs low attendance (threshold at median attendance)
dodgers_data$high_attendance <- ifelse(dodgers_data$attend > median(dodgers_data$attend), "High", "Low")
dodgers_data$high_attendance <- as.factor(dodgers_data$high_attendance)

# Build the classification tree
res <- rpart(high_attendance ~ day_of_week + opponent + temp + skies + day_night + cap + shirt + fireworks + bobblehead, 
             data = dodgers_data, method = "class")

# Visualize the classification tree with softer, floral-inspired colors
visTree(res, main = "Dodgers Attendance Classification Tree", width = "100%",
        colorY = c("High" = "#FFC0CB", "Low" = "lightyellow"),  # Soft pink for flowers, light green for stems
        colorEdges = "#90EE90")  # Soft brown for soil/stems