df <- read.csv('income_data_frame.csv')
str(df[, 1:5]) 
## 'data.frame':    10 obs. of  5 variables:
##  $ Date                : chr  "7/1/2021" "8/1/2021" "9/1/2021" "10/1/2021" ...
##  $ Actual.ADM          : int  146 146 144 151 155 161 170 175 179 169
##  $ Actual.Enrollment   : int  141 161 168 173 173 177 177 183 183 180
##  $ Projected.ADM       : int  134 134 140 140 149 149 149 157 157 162
##  $ Projected.Enrollment: int  134 134 140 140 149 149 149 157 157 162
df$Date <- mdy(df$Date)

df <- df %>%
      gather(8:10,
         key = revenue_type,
         value = revenue_amount) %>% 
     mutate(operating_margin = round(((Total.Revenue - Total.Expense) / Total.Revenue) * 100, 2)) %>% 
        select(Date,operating_margin,revenue_type,revenue_amount,everything()) %>% 
        gather(11:38,
         key = ledger,
         value = ledger_amount) %>%
  mutate(
    revenue_per_student = round(Total.Revenue / Actual.Enrollment, 2),
    expense_per_student = round(Total.Expense / Actual.Enrollment, 2),
    percent_revenue = round(revenue_amount / Total.Revenue, 2),
    percent_expense = round(ledger_amount/Total.Expense, 2))

head(df,5) 
##         Date operating_margin    revenue_type revenue_amount Actual.ADM
## 1 2021-07-01            -8.62 Program.Revenue          52862        146
## 2 2021-08-01             1.67 Program.Revenue          52862        146
## 3 2021-09-01           -30.12 Program.Revenue          52862        144
## 4 2021-10-01            18.96 Program.Revenue          52862        151
## 5 2021-11-01            10.48 Program.Revenue          52862        155
##   Actual.Enrollment Projected.ADM Projected.Enrollment Total.Revenue
## 1               141           134                  134         52862
## 2               161           134                  134         52862
## 3               168           140                  140         52862
## 4               173           140                  140         59460
## 5               173           149                  149         59460
##   Total.Expense            ledger ledger_amount revenue_per_student
## 1         57417 X60400.Curriculum         25000              374.91
## 2         51981 X60400.Curriculum         25421              328.34
## 3         68786 X60400.Curriculum         29129              314.65
## 4         48189 X60400.Curriculum         24120              343.70
## 5         53227 X60400.Curriculum         24570              343.70
##   expense_per_student percent_revenue percent_expense
## 1              407.21            1.00            0.44
## 2              322.86            1.00            0.49
## 3              409.44            1.00            0.42
## 4              278.55            0.89            0.50
## 5              307.67            0.89            0.46

Cleaned this data for Tableau.

operating_model <- read.csv('income_data_frame.csv')
operating_model <- operating_model %>% 
     mutate(operating_margin = round(((Total.Revenue - Total.Expense) / Total.Revenue) * 100, 2)) %>% 
     select(operating_margin,everything()) %>% 
     select(1:8) #keep operating margins
C_V <- cor(df[, c("Total.Revenue", "Total.Expense",
                  "Actual.Enrollment", "Actual.ADM")], use = "complete.obs")

# Enhanced correlation plot
corrplot(C_V, method = "color", type = "upper", 
  order = "hclust", addCoef.col = "black", 
  col = colorRampPalette(brewer.pal(n = 8, name = "RdYlBu"))(200),
  tl.col = "darkblue", tl.srt = 45, 
  diag = FALSE)

Large Dataset (df): This format produced better results for the revenue model because the reshaped structure was better to preserve key relationships, and variability across revenue-related columns.

Small Dataset (operating_model): Avoids additional variability or aggregation from reshaping, possibly simplifying the data for the margin calculation.

\[Revenue_i = \beta_1 + \beta_2Enrollment_i + \beta_3Expense_i + u_i\]

#Forecasting revenue.
model_revenue <- lm(Total.Revenue ~ Actual.Enrollment + Total.Expense, df)
summary(model_revenue)
## 
## Call:
## lm(formula = Total.Revenue ~ Actual.Enrollment + Total.Expense, 
##     data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -15910  -1097   2553   3368   3796 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.689e+04  3.844e+03  12.199  < 2e-16 ***
## Actual.Enrollment  7.686e+01  1.736e+01   4.427 1.08e-05 ***
## Total.Expense     -8.490e-02  2.992e-02  -2.837  0.00466 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5665 on 837 degrees of freedom
## Multiple R-squared:  0.04784,    Adjusted R-squared:  0.04557 
## F-statistic: 21.03 on 2 and 837 DF,  p-value: 1.228e-09

\[Revenue= 46890 + 76.86(Enrollment) - .09(Expense)\] \[Enrollment = 180\] \[Expense = 49188\] \[Revenue = 46890 + (76.86*(180)) - (.09*(49188)) = 56297.88\] Random numbers but the estimations are nearly the same as other monthly revenues.

We can rearrange the formal if we are looking for predictor for actual enrollment.

\[Enrollment = (46890 - .09(Expense) - Revenue) /\beta_3\] Lets say funding for this month was 56,358 and expenses amounted to 46,777 \[Enrollment = (46890 - .09(46777) - 56358) /(-76.86)\] \[Enrollment = 178\] Same can be done for expenses, ADM, etc.

\[OperatingMargin_i = \beta_1 + \beta_2Enrollment_i + \beta_3Revenue_i + \beta_4Expense_i + u_i\]

#Analyze operating margins efficiency.
model_margin <- lm(operating_margin ~ Actual.Enrollment + Total.Revenue + Total.Expense, operating_model)
summary(model_margin)
## 
## Call:
## lm(formula = operating_margin ~ Actual.Enrollment + Total.Revenue + 
##     Total.Expense, data = operating_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56588 -0.40082 -0.17666 -0.08669  1.55940 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -7.293e+00  5.492e+00  -1.328   0.2325    
## Actual.Enrollment -5.614e-02  2.313e-02  -2.428   0.0513 .  
## Total.Revenue      2.024e-03  4.551e-05  44.468 8.66e-09 ***
## Total.Expense     -1.742e-03  3.959e-05 -44.001 9.23e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8139 on 6 degrees of freedom
## Multiple R-squared:  0.9988, Adjusted R-squared:  0.9982 
## F-statistic:  1627 on 3 and 6 DF,  p-value: 4.049e-09

\[OperatingMargin = -(7.293) - 0.05614(180) + 0.002024(52999) -0.001742(49188)\] \[OperatingMargin = 4.18\]