Este documento proporciona un análisis de los datos financieros de la empresa Nala.
Índice
Cargar bibliotecas necesarias
Leer los datos de gastos y ventas de NALA 2020-2024
Combinar los datos
Análisis financiero
4.1 Ventas totales vs. gastos
Análisis de las ventas
5.1 Gráficas: Ventas a lo largo del tiempo
5.2 Tendencias de ventas por plataforma
5.3 Modelos lineales y resultados
Estado de Resultados
6.1 Cálculo de Utilidades
6.2 Cálculo de márgenes y porcentajes
1. Cargar bibliotecas
Code
library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R
2. Cargar los datos
2.1 Leer datos de gastos 2020-2024
Code
# Step 1: Read and clean the raw data, and rename it to raw_expense_dataraw_expense_data <-read_csv("/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (2).csv") %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),# Convert parentheses numbers to negative and clean other symbolscantidad =as.numeric(ifelse(grepl("\\(", cantidad), paste0("-", gsub("[^0-9.]", "", cantidad)), gsub("[^0-9.]", "", cantidad)))) %>%mutate(source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto")
2.2 Leer datos de ventas 2020-2024
Code
# Read the CSV filesales_data <-read_csv("/Users/javier.patron/Downloads/pagos Nala - Ventas Data Analisis (1).csv") %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")sales_data_long <- sales_data_long %>%arrange(fecha) %>%slice(1:168)
3. Combinar los datos
Code
combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion # Default case to handle any unexpected values ) )
3.1 Tabla de Gastos
3.2 Tabla de Ventas
Code
# Create an interactive table for expenses_data with smaller fontdatatable(sales_data, options =list(autoWidth =TRUE, searching =TRUE), caption ='Expenses Data') %>%formatStyle(columns =names(sales_data),fontSize ='10px')
4. Análisis Financiero
4.1 Ventas Totales vs. Gastos
Code
# Summarize total sales and expenses across the entire datasettotal_summary <- combined_data %>%group_by(source) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE))# Define a custom color palette similar to Set1 or Set2custom_palette <-c("#E41A1C", "#377EB8", "#4DAF4A")# Create the ggplot objectp <-ggplot(total_summary, aes(x = source, y = total_amount, fill = source)) +geom_bar(stat ="identity") +scale_fill_manual(values = custom_palette) +# Apply the custom palettelabs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal()# Convert the ggplot2 plot to an interactive plotly graphggplotly(p)
Code
# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDAB9", # Pink"Gastos de Admin"="#FF9F50", # Coral"Gastos de Ventas"="#FF7347"# Tomato)# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses (Overall History)", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =4, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =5, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =5, fontface ="bold", color ="darkgreen")
Code
# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año)
Code
# Assuming combined_data has a date column named "fecha"month_filter <-11# Filter data for the last XX monthsfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Ensure "fecha" is in Date formatfilter(fecha >=as.Date(Sys.Date() %m-%months(month_filter))) # Filter last XXX months# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =4, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =5, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =5, fontface ="bold", color ="darkgreen")
# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Box plot graph using the filtered monthly summaryggplot(filtered_monthly_summary, aes(x = total_amount, y = source_grouped, fill = source_grouped)) +geom_boxplot() +scale_x_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter," Months)"),x ="Total Amount",y ="Source Group",fill ="Source Group") +theme_minimal()
Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.
Parte inferior de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están por debajo de este valor.
Parte superior de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están por debajo de este valor, y el 25% están por encima.
Línea dentro de la caja: Esta línea representa la mediana.
Code
# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Improved boxplot using the existing monthly_summary dataframeggplot(filtered_monthly_summary, aes(x = total_amount, y = source, fill = source)) +geom_boxplot(outlier.colour ="red", outlier.shape =16, outlier.size =2) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Distribution of Expenses by Classification",subtitle =paste0("(Last ", month_filter," Months)"),x ="Total Monthly Sum (MXN)",y ="Classification",fill ="Classification") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold"),legend.position ="none" ) +scale_fill_brewer(palette ="Set2")
Code
monthly_summary %>%ggplot(aes(x = fecha, y = total_amount, color = source_grouped)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount",color ="Source Group") +theme_minimal()
Code
# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
# Create an interactive table for expenses_data with smaller fontdatatable(monthly_summary_table, options =list(autoWidth =TRUE, searching =TRUE), caption ='monthly_summary_table') %>%formatStyle(columns =names(monthly_summary_table),fontSize ='14px')
Gráfica final de ganancias y perdidas por mes
Code
# Ensure that 'profit' is numericmonthly_summary_table <- monthly_summary_table %>%mutate(profit_numeric =as.numeric(gsub("[$,]", "", profit)),date =as.Date(paste(year, month_abbr, "01", sep ="-"), format ="%Y-%b-%d"))# Create the ggplot objectp <-ggplot(monthly_summary_table, aes(x = date, y = profit_numeric, fill = profit_numeric >0, text =paste("Date: ", format(date, "%b %Y"), "<br>Profit: $", profit_numeric))) +geom_bar(stat ="identity") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +scale_x_date(date_labels ="%Y", date_breaks ="1 year", minor_breaks =NULL) +scale_fill_manual(values =c("TRUE"="darkgreen", "FALSE"="coral")) +labs(title ="Monthly Profits Over Time (2020-2024)",x ="Year",y ="Profit ($)" ) +theme_minimal() +theme(axis.text.x =element_text(angle =0, hjust =1, vjust =0.5),legend.position ="none")# Convert the ggplot object to an interactive plotly objectp_interactive <-ggplotly(p, tooltip ="text")# Display the interactive plotp_interactive
Code
# Data processing and filteringprocessed_expenses <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)) ) %>%filter(producto %in%c("Producto", "Sueldo", "Transporte y Logistica", "Marketing y Ventas", "Envases", "Servicios Profesionales"))# Create the ggplotexpense_plot <- processed_expenses %>%ggplot(aes(x = fecha, y = total_amount, color = producto)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),plot.subtitle =element_text(face ="italic", hjust =0.5),axis.text.x =element_text(angle =45, hjust =1),legend.position ="bottom" )# Convert the ggplot2 plot to an interactive plotly graphinteractive_plot <-ggplotly(expense_plot)# Display the interactive plotinteractive_plot
5. Análisis de las ventas
5.1 Gráficas: Ventas a lo largo del tiempo
Code
# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") p <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion, group = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +scale_color_manual(values = palette) +# Apply the chosen color palettelabs(title ="Sales Over Time",x =" ",y ="Sales (MXN)",color ="Sales Type") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal() # Convert the ggplot2 plot to an interactive plotly graphggplotly(p)
5.2 Tendencias de ventas por plataforma
Code
sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Sales Trends by Platform", x ="Date", y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)
5.3 Modelos lineales y resultados
Code
# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Modelos lineales por clasificación") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)
Modelos lineales por clasificación
Classification
Estimate
Std. Error
t-value
p-value
Significance
Deposito
1717.8205
177.68295
9.667898
0.00e+00
***
Efectivo
-103.2204
23.89972
-4.318894
8.28e-05
***
Pagina
1408.2854
101.52694
13.871052
0.00e+00
***
Total
3022.8856
200.93745
15.043913
0.00e+00
***
Resúmen de Estadística de Ventas
Depósito: Aumento estadísticamente significativo de 1818.58 MXN por mes (p < 0.001).
Efectivo: Disminución estadísticamente significativa de -108.72 MXN por mes (p < 0.001).
Página: Aumento estadísticamente significativo de 1398.58 MXN por mes (p < 0.001).
Total: Aumento estadísticamente significativo de XXXXXX.XX MXN por mes (p < 0.001).
5.4 Modelos futuros de venta.
Code
# Step 1: Create a new column with year and quarter combinedcombined_data <- combined_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Step 2: Summarize data by year_quarter and sourcequarterly_data <- combined_data %>%group_by(source_grouped, source, year_quarter) %>%summarize(total_amount =sum(cantidad, na.rm =TRUE)) %>%ungroup() %>%filter(year_quarter !="2024-Q3")# Plotting the dataggplot(quarterly_data, aes(x = year_quarter, y = total_amount, color = source, group = source)) +geom_line(size =1) +geom_point(size =2) +labs(title ="Quarterly Data by Source",x ="Year-Quarter",y ="Total Amount",color ="Source") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Creating the model
Code
# Step 3: Split year_quarter into separate columns for modelingquarterly_data <- quarterly_data %>%separate(year_quarter, into =c("año", "quarter"), sep ="-Q") %>%mutate(año =as.numeric(año), quarter =as.numeric(quarter))# Step 4: Create a model for each source typemodels <- quarterly_data %>%group_by(source) %>%do(model =lm(total_amount ~ año + quarter, data = .))# Step 5: Generate projections for the next 5 years (20 quarters)future_years <-rep(seq(max(quarterly_data$año) +1, by =1, length.out =6), each =4)future_quarters <-rep(1:4, 6)projected_data <- models %>%rowwise() %>%do(data.frame(año = future_years, quarter = future_quarters,total_amount =predict(.$model, newdata =data.frame(año = future_years, quarter = future_quarters)),source = .$source)) %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Combine real and projected dataquarterly_data <- quarterly_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"), type ="Real")projected_data <- projected_data %>%mutate(type ="Projected",source_grouped =case_when( source =="Costos"~"expenses", source =="Gastos"~"expenses", source =="Ventas"~"sales"))# Reorder columns in `projected_data` to match `quarterly_data`projected_data <- projected_data[, c("source_grouped","source","total_amount", "año", "quarter","type" , "year_quarter")]plot_data <-bind_rows(quarterly_data, projected_data)
Code
x <-ggplot(plot_data, aes(x = year_quarter, # Use 'año' for the x-axis (year)y = total_amount, color = source, linetype = type, group =interaction(source, type))) +# Ensure the grouping works across yearsgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data by Source",x ="Year", # Use "Year" for the x-axis labely ="Total Amount",color ="Source",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50,color ="gray80",hjust =1, size =10)) +# Adjust label size for readabilityscale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)]) # Show fewer labels (every 4th)# Display the plotx
\(\beta_0\) is the intercept (the starting value of total_amount when both año and quarter are zero).
\(\beta_1\) is the coefficient for año, representing the expected change in total_amount for each one-unit increase in año, holding quarter constant.
\(\beta_2\) is the coefficient for quarter, representing the expected change in total_amount for each one-unit increase in quarter, holding año constant.
\(\epsilon\) represents the error term, which accounts for the variability in total_amount that cannot be explained by año and quarter.
Code
# Group Costos and Gastos into 'Expenses', and leave Ventas as 'Sales'plot_data_grouped <- plot_data %>%mutate(source_grouped =case_when( source %in%c("Costos", "Gastos") ~"Expenses", source =="Ventas"~"Sales" )) %>%group_by(year_quarter, source_grouped, type) %>%summarize(total_amount =sum(total_amount, na.rm =TRUE)) %>%ungroup()x <-ggplot(plot_data_grouped, aes(x = year_quarter, y = total_amount, color = source_grouped, # Use 'source_grouped' for colorlinetype = type, group =interaction(source_grouped, type))) +# Ensure the grouping worksgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data (Expenses vs. Sales)",x ="Year",y ="Total Amount",color ="Category",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50, color ="gray80",hjust =1, size =10)) +scale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)])# Display the plotx
6. Estado de Resultados
La tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta. Esta tabla ayuda para ver como los ingresos y gastos afectan las ganancias finales de la empresa.
6.1 Cálculo de Utilidades
Code
# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,Costo_de_Ventas =`Costos de Ventas`,Utilidad_Bruta = Ventas - Costo_de_Ventas,Gastos_de_ventas =`Gastos de Ventas`,Gastos_administrativos =`Gastos de Admin`,Utilidad_de_Operacion = Utilidad_Bruta - Gastos_de_ventas - Gastos_administrativos,Gastos_financieros =0, Otros_gastos =0, Utilidad_Periodo = Utilidad_de_Operacion,Utilidad_Acumulada =0,Utilidad_Neta = Utilidad_de_Operacion )# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo_de_Ventas", "Utilidad_Bruta", "Gastos_de_ventas", "Gastos_administrativos", "Utilidad_de_Operacion", "Gastos_financieros", "Otros_gastos", "Utilidad_Periodo", "Utilidad_Acumulada", "Utilidad_Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))# Step 5: Print the estado_resultados dataframe with kable and highlight rowsestado_resultados %>%kable(format ="html", caption ="Estado de Resultados") %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F) %>%row_spec(c(1, 2, 3), background ="#F4FBF4") %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(5, 7, 8, 10, 11), background ="#FDECEC") %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE) %>%row_spec(c(13,14), background ="#E6F2FA",bold =TRUE)
La tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.
Code
# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2020`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Margen de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)
Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)
Metrica
Growth_2021
Growth_2022
Growth_2023
Growth_2024
Ventas
392%
169%
76%
-22%
Costo_de_Ventas
252%
89%
71%
-24%
Gastos_de_ventas
-51%
241%
203%
1.7%
Gastos_administrativos
3%
1 545%
35%
0.3%
Utilidad_Bruta
-634%
557%
84%
-18%
Utilidad_de_Operacion
-138%
-283%
-250%
-208%
Utilidad_Neta
-38%
113%
-80%
425%
7.0 Next Steps
Cambiar “tags” o dropdown list de gastos.
Source Code
---title: "Financial Data Analysis of Nala"author: "Javier Patrón"date: "`r Sys.Date()`"format: html: toc: true toc_depth: 2 code-fold: true code-tools: trueexecute: warning: false message: false---# Análisis Financiero de NalaEste documento proporciona un análisis de los datos financieros de la empresa Nala.## Índice1. **Cargar bibliotecas necesarias**2. **Leer los datos de gastos y ventas de NALA 2020-2024**3. **Combinar los datos**4. **Análisis financiero** 4.1 Ventas totales vs. gastos<!-- -->5. **Análisis de las ventas** 5.1 Gráficas: Ventas a lo largo del tiempo 5.2 Tendencias de ventas por plataforma 5.3 Modelos lineales y resultados6. **Estado de Resultados** 6.1 Cálculo de Utilidades 6.2 Cálculo de márgenes y porcentajes## 1. Cargar bibliotecas```{r}library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R```## 2. Cargar los datos#### 2.1 Leer datos de gastos 2020-2024```{r}# Step 1: Read and clean the raw data, and rename it to raw_expense_dataraw_expense_data <-read_csv("/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (2).csv") %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),# Convert parentheses numbers to negative and clean other symbolscantidad =as.numeric(ifelse(grepl("\\(", cantidad), paste0("-", gsub("[^0-9.]", "", cantidad)), gsub("[^0-9.]", "", cantidad)))) %>%mutate(source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto")```#### 2.2 Leer datos de ventas 2020-2024```{r}# Read the CSV filesales_data <-read_csv("/Users/javier.patron/Downloads/pagos Nala - Ventas Data Analisis (1).csv") %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")sales_data_long <- sales_data_long %>%arrange(fecha) %>%slice(1:168)```## 3. Combinar los datos```{r}combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion # Default case to handle any unexpected values ) )```#### 3.1 Tabla de Gastos```{r, echo=FALSE, collapse = T}# Create an interactive table for expenses_data with smaller fontdatatable(expenses_data, options = list(autoWidth = TRUE, searching = TRUE), caption = 'Expenses Data') %>% formatStyle( columns = names(expenses_data), fontSize = '10px')```#### 3.2 Tabla de Ventas```{r}# Create an interactive table for expenses_data with smaller fontdatatable(sales_data, options =list(autoWidth =TRUE, searching =TRUE), caption ='Expenses Data') %>%formatStyle(columns =names(sales_data),fontSize ='10px') ```## 4. Análisis Financiero### 4.1 Ventas Totales vs. Gastos```{r}# Summarize total sales and expenses across the entire datasettotal_summary <- combined_data %>%group_by(source) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE))# Define a custom color palette similar to Set1 or Set2custom_palette <-c("#E41A1C", "#377EB8", "#4DAF4A")# Create the ggplot objectp <-ggplot(total_summary, aes(x = source, y = total_amount, fill = source)) +geom_bar(stat ="identity") +scale_fill_manual(values = custom_palette) +# Apply the custom palettelabs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal()# Convert the ggplot2 plot to an interactive plotly graphggplotly(p)``````{r}# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDAB9", # Pink"Gastos de Admin"="#FF9F50", # Coral"Gastos de Ventas"="#FF7347"# Tomato)# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses (Overall History)", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =4, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =5, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =5, fontface ="bold", color ="darkgreen")``````{r}# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año) ``````{r}# Assuming combined_data has a date column named "fecha"month_filter <-11# Filter data for the last XX monthsfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Ensure "fecha" is in Date formatfilter(fecha >=as.Date(Sys.Date() %m-%months(month_filter))) # Filter last XXX months# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =4, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =5, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =5, fontface ="bold", color ="darkgreen")```### 4.2 Análisis de Gastos#### Box Plots```{r}# Cleaned dataframe manipulationmonthly_summary <- combined_data %>%rename(month = mes) %>%group_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE), fecha =make_date(year = año, month = month_num, day =days_in_month(month_num))) ``````{r}# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Box plot graph using the filtered monthly summaryggplot(filtered_monthly_summary, aes(x = total_amount, y = source_grouped, fill = source_grouped)) +geom_boxplot() +scale_x_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter," Months)"),x ="Total Amount",y ="Source Group",fill ="Source Group") +theme_minimal()```**Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.**Parte inferior de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están por debajo de este valor.Parte superior de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están por debajo de este valor, y el 25% están por encima.Línea dentro de la caja: Esta línea representa la mediana.```{r}# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Improved boxplot using the existing monthly_summary dataframeggplot(filtered_monthly_summary, aes(x = total_amount, y = source, fill = source)) +geom_boxplot(outlier.colour ="red", outlier.shape =16, outlier.size =2) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Distribution of Expenses by Classification",subtitle =paste0("(Last ", month_filter," Months)"),x ="Total Monthly Sum (MXN)",y ="Classification",fill ="Classification") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold"),legend.position ="none" ) +scale_fill_brewer(palette ="Set2")``````{r}monthly_summary %>%ggplot(aes(x = fecha, y = total_amount, color = source_grouped)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount",color ="Source Group") +theme_minimal()``````{r}# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))```#### Tablas Resumen```{r}# Calculate the yearly totals and profityearly_summary_table <- monthly_summary %>%group_by(year =year(fecha)) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))kable(yearly_summary_table, align ="c", caption ="Year-by-Year Financial Summary") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)``````{r}# Calculate the monthly totals for all yearsmonthly_summary_table <- monthly_summary %>%group_by(year =year(fecha), month_abbr) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses ) %>%arrange(year, match(month_abbr, month.abb)) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))```#### Tabla resumen de ganancias y perdidas mes con mes```{r}# Create an interactive table for expenses_data with smaller fontdatatable(monthly_summary_table, options =list(autoWidth =TRUE, searching =TRUE), caption ='monthly_summary_table') %>%formatStyle(columns =names(monthly_summary_table),fontSize ='14px') ```#### Gráfica final de ganancias y perdidas por mes```{r}# Ensure that 'profit' is numericmonthly_summary_table <- monthly_summary_table %>%mutate(profit_numeric =as.numeric(gsub("[$,]", "", profit)),date =as.Date(paste(year, month_abbr, "01", sep ="-"), format ="%Y-%b-%d"))# Create the ggplot objectp <-ggplot(monthly_summary_table, aes(x = date, y = profit_numeric, fill = profit_numeric >0, text =paste("Date: ", format(date, "%b %Y"), "<br>Profit: $", profit_numeric))) +geom_bar(stat ="identity") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +scale_x_date(date_labels ="%Y", date_breaks ="1 year", minor_breaks =NULL) +scale_fill_manual(values =c("TRUE"="darkgreen", "FALSE"="coral")) +labs(title ="Monthly Profits Over Time (2020-2024)",x ="Year",y ="Profit ($)" ) +theme_minimal() +theme(axis.text.x =element_text(angle =0, hjust =1, vjust =0.5),legend.position ="none")# Convert the ggplot object to an interactive plotly objectp_interactive <-ggplotly(p, tooltip ="text")# Display the interactive plotp_interactive``````{r}# Data processing and filteringprocessed_expenses <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)) ) %>%filter(producto %in%c("Producto", "Sueldo", "Transporte y Logistica", "Marketing y Ventas", "Envases", "Servicios Profesionales"))# Create the ggplotexpense_plot <- processed_expenses %>%ggplot(aes(x = fecha, y = total_amount, color = producto)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),plot.subtitle =element_text(face ="italic", hjust =0.5),axis.text.x =element_text(angle =45, hjust =1),legend.position ="bottom" )# Convert the ggplot2 plot to an interactive plotly graphinteractive_plot <-ggplotly(expense_plot)# Display the interactive plotinteractive_plot```## 5. Análisis de las ventas5.1 Gráficas: Ventas a lo largo del tiempo```{r}# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") p <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion, group = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +scale_color_manual(values = palette) +# Apply the chosen color palettelabs(title ="Sales Over Time",x =" ",y ="Sales (MXN)",color ="Sales Type") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal() # Convert the ggplot2 plot to an interactive plotly graphggplotly(p)```### 5.2 Tendencias de ventas por plataforma```{r}sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Sales Trends by Platform", x ="Date", y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)```### 5.3 Modelos lineales y resultados```{r}# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Modelos lineales por clasificación") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)```### Resúmen de Estadística de Ventas- **Depósito**: Aumento estadísticamente significativo de 1818.58 MXN por mes (p \< 0.001).- **Efectivo**: Disminución estadísticamente significativa de -108.72 MXN por mes (p \< 0.001).- **Página**: Aumento estadísticamente significativo de 1398.58 MXN por mes (p \< 0.001).**Total: Aumento estadísticamente significativo de XXXXXX.XX MXN por mes (p \< 0.001).**### 5.4 Modelos futuros de venta.```{r}# Step 1: Create a new column with year and quarter combinedcombined_data <- combined_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Step 2: Summarize data by year_quarter and sourcequarterly_data <- combined_data %>%group_by(source_grouped, source, year_quarter) %>%summarize(total_amount =sum(cantidad, na.rm =TRUE)) %>%ungroup() %>%filter(year_quarter !="2024-Q3")# Plotting the dataggplot(quarterly_data, aes(x = year_quarter, y = total_amount, color = source, group = source)) +geom_line(size =1) +geom_point(size =2) +labs(title ="Quarterly Data by Source",x ="Year-Quarter",y ="Total Amount",color ="Source") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))```Creating the model```{r}# Step 3: Split year_quarter into separate columns for modelingquarterly_data <- quarterly_data %>%separate(year_quarter, into =c("año", "quarter"), sep ="-Q") %>%mutate(año =as.numeric(año), quarter =as.numeric(quarter))# Step 4: Create a model for each source typemodels <- quarterly_data %>%group_by(source) %>%do(model =lm(total_amount ~ año + quarter, data = .))# Step 5: Generate projections for the next 5 years (20 quarters)future_years <-rep(seq(max(quarterly_data$año) +1, by =1, length.out =6), each =4)future_quarters <-rep(1:4, 6)projected_data <- models %>%rowwise() %>%do(data.frame(año = future_years, quarter = future_quarters,total_amount =predict(.$model, newdata =data.frame(año = future_years, quarter = future_quarters)),source = .$source)) %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Combine real and projected dataquarterly_data <- quarterly_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"), type ="Real")projected_data <- projected_data %>%mutate(type ="Projected",source_grouped =case_when( source =="Costos"~"expenses", source =="Gastos"~"expenses", source =="Ventas"~"sales"))# Reorder columns in `projected_data` to match `quarterly_data`projected_data <- projected_data[, c("source_grouped","source","total_amount", "año", "quarter","type" , "year_quarter")]plot_data <-bind_rows(quarterly_data, projected_data)``````{r}x <-ggplot(plot_data, aes(x = year_quarter, # Use 'año' for the x-axis (year)y = total_amount, color = source, linetype = type, group =interaction(source, type))) +# Ensure the grouping works across yearsgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data by Source",x ="Year", # Use "Year" for the x-axis labely ="Total Amount",color ="Source",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50,color ="gray80",hjust =1, size =10)) +# Adjust label size for readabilityscale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)]) # Show fewer labels (every 4th)# Display the plotx```$$\text{total_amount} = \beta_0 + \beta_1 \times \text{año} + \beta_2 \times \text{quarter} + \epsilon$$Here:- $\beta_0$ is the intercept (the starting value of `total_amount` when both `año` and `quarter` are zero).- $\beta_1$ is the coefficient for `año`, representing the expected change in `total_amount` for each one-unit increase in `año`, holding `quarter` constant.- $\beta_2$ is the coefficient for `quarter`, representing the expected change in `total_amount` for each one-unit increase in `quarter`, holding `año` constant.- $\epsilon$ represents the error term, which accounts for the variability in `total_amount` that cannot be explained by `año` and `quarter`.```{r}# Group Costos and Gastos into 'Expenses', and leave Ventas as 'Sales'plot_data_grouped <- plot_data %>%mutate(source_grouped =case_when( source %in%c("Costos", "Gastos") ~"Expenses", source =="Ventas"~"Sales" )) %>%group_by(year_quarter, source_grouped, type) %>%summarize(total_amount =sum(total_amount, na.rm =TRUE)) %>%ungroup()x <-ggplot(plot_data_grouped, aes(x = year_quarter, y = total_amount, color = source_grouped, # Use 'source_grouped' for colorlinetype = type, group =interaction(source_grouped, type))) +# Ensure the grouping worksgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data (Expenses vs. Sales)",x ="Year",y ="Total Amount",color ="Category",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50, color ="gray80",hjust =1, size =10)) +scale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)])# Display the plotx```## 6. Estado de ResultadosLa tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta. Esta tabla ayuda para ver como los ingresos y gastos afectan las ganancias finales de la empresa.#### 6.1 Cálculo de Utilidades```{r}# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,Costo_de_Ventas =`Costos de Ventas`,Utilidad_Bruta = Ventas - Costo_de_Ventas,Gastos_de_ventas =`Gastos de Ventas`,Gastos_administrativos =`Gastos de Admin`,Utilidad_de_Operacion = Utilidad_Bruta - Gastos_de_ventas - Gastos_administrativos,Gastos_financieros =0, Otros_gastos =0, Utilidad_Periodo = Utilidad_de_Operacion,Utilidad_Acumulada =0,Utilidad_Neta = Utilidad_de_Operacion )# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo_de_Ventas", "Utilidad_Bruta", "Gastos_de_ventas", "Gastos_administrativos", "Utilidad_de_Operacion", "Gastos_financieros", "Otros_gastos", "Utilidad_Periodo", "Utilidad_Acumulada", "Utilidad_Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))# Step 5: Print the estado_resultados dataframe with kable and highlight rowsestado_resultados %>%kable(format ="html", caption ="Estado de Resultados") %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F) %>%row_spec(c(1, 2, 3), background ="#F4FBF4") %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(5, 7, 8, 10, 11), background ="#FDECEC") %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE) %>%row_spec(c(13,14), background ="#E6F2FA",bold =TRUE) ``````{r}estado_resultados %>%kable(format ="html", caption ="Estado de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(4), bold =TRUE) %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.2 Cálculo de márgenes y porcentajesLa tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.```{r}# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2020`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Margen de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.3 Crecimiento de Nala porcentual```{r}# Calculate year-over-year growth for all relevant metricsventas_growth <- estado_resultados %>%filter(Metrica =="Ventas") %>%mutate(Growth_2021 = scales::percent((`2021`-`2020`) /`2020`),Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`))# Cost of Goods Sold (Costo_de_Ventas)cost_growth <- estado_resultados %>%filter(Metrica =="Costo_de_Ventas") %>%mutate(Growth_2021 = scales::percent((`2021`-`2020`) /`2020`),Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Expenses (Gastos_de_ventas and Gastos_administrativos)expenses_growth <- estado_resultados %>%filter(Metrica %in%c("Gastos_de_ventas", "Gastos_administrativos")) %>%mutate(Growth_2021 = scales::percent((`2021`-`2020`) /`2020`),Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)utility_growth <- estado_resultados %>%filter(Metrica %in%c("Utilidad_Bruta", "Utilidad_de_Operacion", "Utilidad_Neta")) %>%mutate(Growth_2021 = scales::percent((`2021`-`2020`) /`2020`),Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Combine all into a single data framecombined_growth <-bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%select(Metrica, Growth_2021, Growth_2022, Growth_2023, Growth_2024)# Display the combined growth tablecombined_growth %>%kable(format ="html", caption ="Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)", align ='c') %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F)```#### 7.0 Next Steps1. Cambiar "tags" o dropdown list de gastos.2.