library(rpart)
## Warning: package 'rpart' was built under R version 4.3.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Loading required package: lattice
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(ggplot2)

indy_dec <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_december_media_report.xlsx", sheet = 2)
table(indy_dec$`Location Category`)
## 
## Local 
##    21

# Assuming your original dataframe is called df
length(indy_dec$`Location Category` == "Local")
## [1] 21
local_df <- indy_dec[indy_dec$`Location Category` == "Local", ]
local_df
## # A tibble: 21 × 15
##    Article               URL   Author `Media Outlet` Published           Snippet
##    <chr>                 <chr> <chr>  <chr>          <dttm>              <chr>  
##  1 Indy Ignite preparin… http… Charl… IndyStar       2024-12-29 07:00:00 "But, …
##  2 Clean Sweep - Indian… http… Andre… Indianapolis … 2024-12-27 08:03:13 "At th…
##  3 'It's a perfect mark… http… <NA>   IndyStar       2024-12-20 07:00:00 "But, …
##  4 'It's a perfect mark… http… <NA>   IndyStar       2024-12-19 23:04:50 "But, …
##  5 Fishers to host Pro … http… <NA>   Current Publi… 2024-12-16 12:19:12 "Pro V…
##  6 Fishers Event Center… http… <NA>   WISH-TV (Indi… 2024-12-16 11:40:16 "(WISH…
##  7 Win tickets to see t… http… <NA>   WRTV-TV (Indi… 2024-12-16 10:53:52 "Indy …
##  8 Indy Ignite, Fishers… http… <NA>   Indianapolis … 2024-12-16 10:09:38 "Ignit…
##  9 'Nothing short of ep… http… <NA>   WTHR-TV (Indi… 2024-12-16 09:44:47 "The P…
## 10 Indy Ignite, Fishers… http… Matt … WTTV-TV (Indi… 2024-12-16 06:35:27 "In Fe…
## # ℹ 11 more rows
## # ℹ 9 more variables: `UVM (Insights by Similarweb)` <dbl>,
## #   `Journalist Shares` <dbl>, `Journalist Reach` <dbl>,
## #   `Total Engagement` <dbl>, Sentiment <chr>,
## #   `Advertising Value Equivalency` <dbl>, `Pitch Placement` <lgl>,
## #   Location <chr>, `Location Category` <chr>
colSums(local_df[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     11851508                            5 
##             Journalist Reach             Total Engagement 
##                        37239                          742
indy_jan <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_ignite_january_media_report_2025-01-01--2025-01-31 (version 1).xlsx", sheet = 2)
nrow(indy_jan)
## [1] 212
table(indy_jan$`Location Category`)
## 
## International         Local    Semi-Local           USA 
##            30            72            12            97
local_df2 <- indy_jan[indy_jan$`Location Category` == "Local", ]
nrow(local_df2)
## [1] 73
colSums(local_df2[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                    120437535                           26 
##             Journalist Reach             Total Engagement 
##                       453453                         2163
indy_feb <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_ignite_february_media_report_2025-02-01--2025-02-28 (version 1).xlsx", sheet = 2)
nrow(indy_feb)
## [1] 161
table(indy_feb$`Location Category`)
## 
## International         Local    Semi-Local           USA 
##            11            74             3            73
local_df3 <- indy_feb[indy_feb$`Location Category` == "Local", ]
nrow(local_df3)
## [1] 74
colSums(local_df3[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     74791322                            6 
##             Journalist Reach             Total Engagement 
##                        72844                          564
indy_mar <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_ignite_march_media_report_2025-03-01--2025-03-31.xlsx", sheet = 2)
nrow(indy_mar)
## [1] 76
table(indy_mar$`Location Category`)
## 
## International         Local    Semi-Local           USA 
##             2            16             1            57
local_df4 <- indy_mar[indy_mar$`Location Category` == "Local", ]
nrow(local_df4)
## [1] 16
colSums(local_df4[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                      9815948                            1 
##             Journalist Reach             Total Engagement 
##                         8408                          108
indy_apr <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_ignite_april_media_report_2025-04-01--2025-04-30.xlsx", sheet = 2)
nrow(indy_apr)
## [1] 68
table(indy_apr$`Location Category`)
## 
## International         Local           USA 
##             1            32            35
local_df5 <- indy_apr[indy_apr$`Location Category` == "Local", ]
nrow(local_df5)
## [1] 32
colSums(local_df5[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     49817738                            1 
##             Journalist Reach             Total Engagement 
##                        16253                          478
indy_may <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_ignite_may_media_report_2025-05-01--2025-05-31 (1).xlsx", sheet = 2)
nrow(indy_may)
## [1] 87
table(indy_may$`Location Category`)
## 
## International         Local           USA 
##            11            31            45
local_df6 <- indy_may[indy_may$`Location Category` == "Local", ]
nrow(local_df6)
## [1] 31
colSums(local_df6[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     17062632                            0 
##             Journalist Reach             Total Engagement 
##                            0                           98
df <- data.frame(
  Month = c("December", "January", "February", "March", "April", "May"),
  `Local Articles` = c(21, 74, 76, 18, 34, 31),
  `Local Percentage` = c(100, 35, 47, 23, 50, 36),
  `Earned Media` = c(109626.45, 1114047.20, 691819.73, 90797.52, 460814.08, 157829.35),
  `Earned Percentage` = c(55, 4, 10, 2, 5, 4),
  `Total Engagement` = c(742, 3287, 781, 258, 624, 309),
  `Local Engagement` = c(742, 2163, 564, 108, 478, 98)
)

# View the dataframe
print(df)
##      Month Local.Articles Local.Percentage Earned.Media Earned.Percentage
## 1 December             21              100    109626.45                55
## 2  January             74               35   1114047.20                 4
## 3 February             76               47    691819.73                10
## 4    March             18               23     90797.52                 2
## 5    April             34               50    460814.08                 5
## 6      May             31               36    157829.35                 4
##   Total.Engagement Local.Engagement
## 1              742              742
## 2             3287             2163
## 3              781              564
## 4              258              108
## 5              624              478
## 6              309               98
library(ggplot2)

df$Month <- factor(df$Month, levels = c("December", "January", "February", "March", "April", "May"))


ggplot(df, aes(x = Month, y = Local.Articles, group = 1)) +
  geom_line(color = "steelblue", size = 1.2) +
  geom_point(color = "darkblue", size = 3) +
  labs(title = "Local Articles by Month", y = "Number of Local Articles") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(df, aes(x = Month, y = Earned.Media)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  labs(title = "Earned Media Value by Month", y = "Earned Media ($)") +
  theme_minimal()

library(tidyr)

df_long <- df %>%
  pivot_longer(cols = c(Local.Engagement, Total.Engagement),
               names_to = "Engagement Type",
               values_to = "Value")

ggplot(df_long, aes(x = Month, y = Value, fill = `Engagement Type`)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Local vs Total Engagement", y = "Engagement") +
  theme_minimal()

ggplot(df, aes(x = Month, y = Local.Percentage, group = 1)) +
  geom_line(color = "orange", size = 1.2) +
  geom_point(color = "red", size = 3) +
  labs(title = "Local Media Percentage Over Time", y = "Local %") +
  theme_minimal()

fuel_oct <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_-_oct_2024_2024-10-01--2024-10-31.xlsx", sheet = 2)
nrow(fuel_oct)
## [1] 55
table(fuel_oct$`Location Category`)
## 
## International         Local           USA 
##             4            32            19
fuel_df <- fuel_oct[fuel_oct$`Location Category` == "Local", ]
nrow(fuel_df)
## [1] 32
colSums(fuel_df[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                      1503171                            7 
##             Journalist Reach             Total Engagement 
##                        18584                          207
fuel_nov <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_-_nov_2024_2024-11-01--2024-11-30.xlsx", sheet = 2)
nrow(fuel_nov)
## [1] 69
table(fuel_nov$`Location Category`)
## 
## International         Local           USA 
##             4            41            24
fuel_df2 <- fuel_nov[fuel_nov$`Location Category` == "Local", ]
nrow(fuel_df2)
## [1] 41
colSums(fuel_df2[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     19305474                            2 
##             Journalist Reach             Total Engagement 
##                         1162                          184
fuel_dec <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_dec.xlsx")
nrow(fuel_dec)
## [1] 123
table(fuel_dec$`Location Category`)
## 
## International         Local           USA 
##             6            71            46
fuel_df3 <- fuel_dec[fuel_dec$`Location Category` == "Local", ]
nrow(fuel_df3)
## [1] 71
colSums(fuel_df3[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     21451927                           12 
##             Journalist Reach             Total Engagement 
##                        34149                         1698
fuel_jan <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_january.xlsx")
nrow(fuel_jan)
## [1] 89
table(fuel_jan$`Location Category`)
## 
## International         Local           USA 
##             3            46            40
fuel_df4 <- fuel_jan[fuel_jan$`Location Category` == "Local", ]
nrow(fuel_df4)
## [1] 46
colSums(fuel_df4[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     37630101                            4 
##             Journalist Reach             Total Engagement 
##                        10407                          190
fuel_feb <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_feb.xlsx")
nrow(fuel_feb)
## [1] 67
table(fuel_feb$`Location Category`)
## 
## International         Local           USA 
##             4            36            27
fuel_df5 <- fuel_feb[fuel_feb$`Location Category` == "Local", ]
nrow(fuel_df5)
## [1] 36
colSums(fuel_df5[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     33947605                            7 
##             Journalist Reach             Total Engagement 
##                         6787                          433
fuel_mar <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_march.xlsx")
nrow(fuel_mar)
## [1] 86
table(fuel_mar$`Location Category`)
## 
## International         Local           USA 
##             4            32            49
fuel_df6 <- fuel_mar[fuel_mar$`Location Category` == "Local", ]
nrow(fuel_df6)
## [1] 33
colSums(fuel_df6[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     35283247                            1 
##             Journalist Reach             Total Engagement 
##                         2172                          147
fuel_apr <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_fuel_april.xlsx", sheet = 2)
nrow(fuel_apr)
## [1] 84
table(fuel_apr$`Location Category`)
## 
## International         Local           USA 
##             5            29            50
fuel_df7 <- fuel_apr[fuel_apr$`Location Category` == "Local", ]
nrow(fuel_df7)
## [1] 29
colSums(fuel_df7[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     32329451                            7 
##             Journalist Reach             Total Engagement 
##                        39823                          117
eleven_feb <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_eleven_-_feb_2025_2025-02-01--2025-02-28.xlsx", sheet = 2)
nrow(eleven_feb)
## [1] 42
table(eleven_feb$`Location Category`)
## 
## International         Local           USA 
##             7            12            23
eleven_df <- eleven_feb[eleven_feb$`Location Category` == "Local", ]
nrow(eleven_df)
## [1] 12
colSums(eleven_df[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                      5999092                            3 
##             Journalist Reach             Total Engagement 
##                        33105                          109
eleven_mar <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_eleven_-_march_2025_2025-03-01--2025-03-31.xlsx", sheet = 2)
nrow(eleven_mar)
## [1] 72
table(eleven_mar$`Location Category`)
## 
## International         Local           USA 
##            11            27            34
eleven_df2 <- eleven_mar[eleven_mar$`Location Category` == "Local", ]
nrow(eleven_df2)
## [1] 27
colSums(eleven_df2[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     49265287                            1 
##             Journalist Reach             Total Engagement 
##                         7664                          178
eleven_apr <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indy_eleven_-_april_2025_2025-04-01--2025-04-30.xlsx", sheet = 2)
nrow(eleven_apr)
## [1] 64
table(eleven_apr$`Location Category`)
## 
## International         Local           USA 
##             8            19            37
eleven_df3 <- eleven_apr[eleven_apr$`Location Category` == "Local", ]
nrow(eleven_df3)
## [1] 19
colSums(eleven_df3[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     44938416                            1 
##             Journalist Reach             Total Engagement 
##                         7670                           37
indians_apr <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indianapolis_indians_-_april_2025_2025-04-01--2025-04-30.xlsx", sheet = 2)
nrow(indians_apr)
## [1] 108
table(indians_apr$`Location Category`)
## 
## International         Local           USA 
##            18            23            67
indians_df <- indians_apr[indians_apr$`Location Category` == "Local", ]
nrow(indians_df)
## [1] 23
colSums(indians_df[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     58198749                            3 
##             Journalist Reach             Total Engagement 
##                        62997                          224
indians_sep <- read_excel("C:\\Users\\brian\\OneDrive\\Documents\\Indy Ignite Summer Project\\indianapolis_indians_-_sept_2024_2024-09-01--2024-09-30.xlsx", sheet = 2)
nrow(indians_sep)
## [1] 113
table(indians_sep$`Location Category`)
## 
## International         Local           USA 
##            12            37            64
indians_df2 <- indians_sep[indians_sep$`Location Category` == "Local", ]
nrow(indians_df2)
## [1] 37
colSums(indians_df2[, c("UVM (Insights by Similarweb)", "Journalist Shares", "Journalist Reach", "Total Engagement")], na.rm = TRUE)
## UVM (Insights by Similarweb)            Journalist Shares 
##                     13932215                            6 
##             Journalist Reach             Total Engagement 
##                       135258                          563