| ```{r} |
|---|
| #Install Packages |
| ```{r} |
| #install.packages(“dplyr”) |
| #install.packages(“tidyr”) |
| #install.packages(“readr”) |
| #install.packages(“lubridate”) |
| #install.packages(“ggplot2”) |
| #install.packages(“tidyverse”) |
| #install.packages(“psych”) |
| #install.packages(“reshape2”) |
| #install.packages(“corrplot”) |
| #install.packages(“caret”) |
| #install.packages(“scales”) |
| ``` |
#Libraries
{r} library(tidyverse) library(psych) library(ggplot2) library(reshape2) library(corrplot) library(scales)
##Uploading the dataset
{r} rental_income <- read.csv("XYZ LLC 2021 PL.csv")
#EPA
##Head {r} head(rental_income) ##Structure
{r} str(rental_income)
#Summary {r} summary(rental_income) ##Omit NA values
{r} clean_rental_income <- na.omit(rental_income)
{r} #view(clean_rental_income)
#check the current column names
{r} #print(colnames(clean_rental_income)) # Change The
headers into a date structure ```{r} Col_headers <-
seq(as.Date(“2022-01-01”), as.Date(“2023-04-01”), by = “month”)
names(clean_rental_income)[-1] <- format(Col_headers, “%B %Y”)
```{r}
#print(colnames(clean_rental_income))
{r} total_rental_income_monthly <- clean_rental_income %>% filter(grepl("Total Rental Income", clean_rental_income[[1]])) %>% # Adjust the column index if necessary select(-1) # Remove the category column to focus on numerical data
```{r} # Transpose for better readability monthly_totals <- colSums(total_rental_income_monthly, na.rm = TRUE) monthly_totals_df <- data.frame(Month = names(monthly_totals), Total = as.numeric(monthly_totals), row.names = NULL)
```{r}
#print(monthly_totals_df)
#Bar Chart with the Total Rental Income per month
{r} ggplot(monthly_totals_df, aes(x = Month, y = Total)) + geom_bar(stat = "identity", fill = "steelblue") + theme_minimal() + labs(title = "Monthly Total Rental Income", x = "Month", y = "Total Income ($)") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability
```{r} # Define the specific order of the months months_order <- c(“January 2022”, “February 2022”, “March 2022”, “April 2022”, “May 2022”, “June 2022”, “July 2022”, “August 2022”, “September 2022”, “October 2022”, “November 2022”, “December 2022”, “January 2023”, “February 2023”, “March 2023”, “April 2023”)
```{r}
# Ensure that 'Month' is converted to a factor and ordered correctly
monthly_totals_df$Month <- factor(monthly_totals_df$Month, levels = months_order)
# Plotting the bar chart with the months in the specified order
ggplot(monthly_totals_df, aes(x = Month, y = Total)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0("$", format(Total, big.mark=","))), # Format numbers with commas
position = position_stack(vjust = 0.5), # Adjust vertical position to be inside the bars
color = "white", size = 3.5, angle = 90) + # Set text color and size, rotate text vertically
theme_minimal() +
labs(title = "Monthly Total Rental Income",
x = "Month",
y = "Total Income ($)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability
{r} # Filter the data for 'Net Income' net_income_data <- clean_rental_income %>% filter(grepl("Net Income", clean_rental_income[[1]])) %>% # Adjust the column index if necessary select(-1) # Remove the category column to focus on numerical data
{r} #print(net_income_data)
{r} # Prepare data for plotting net_income_data <- tibble( Month = months_order, Net_Income = as.numeric(net_income_data[1, ]) )
{r} #print(net_income_data)
{r} # Convert 'Month' into an ordered factor based on the months_order net_income_data$Month <- factor(net_income_data$Month, levels = months_order)
{r} # Plotting the line graph plot <- ggplot(net_income_data, aes(x = Month, y = Net_Income)) + geom_line(color = "dodgerblue", size = 1.5) + # Ensure line connects the points geom_point(color = "darkblue", size = 3.5) + # Points on the line geom_smooth(method = "lm", color = "red", se = FALSE, aes(group = 1)) + # Add a linear trendline theme_minimal() + scale_y_continuous(labels = dollar_format(prefix = "$", suffix = ""), breaks = pretty_breaks(n = 10)) + labs(title = "Monthly Net Income", x = "Month", y = "Net Income ($)") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
{r} # Print the plot #print(plot)
```{r} # Extract column names (months), which are the levels of the ‘Month’ factor month_names <- levels(net_income_data$Month) # This assumes ‘Month’ is a factor
total_income <- net_income_data$Net_Income
plot(1:length(month_names), total_income, type = “o”, xlab = “Months”, ylab = ““, # Remove ylab here main =”Net Income per Month”, col = “blue”, # Line color pch = 16, # Point shape lwd = 2, # Line width ylim = c(0, max(total_income) * 1.1), # Adjust y-axis limits xaxt = “n”, # Disable x-axis labels yaxt = “n”) # Disable y-axis to customize later
axis(1, at = 1:length(month_names), labels = month_names, las = 2, cex.axis = 0.8)
y_at <- pretty(range(c(0, max(total_income) * 1.1))) # Get pretty breaks for y-axis axis(2, at = y_at, labels = paste(“$”, format(y_at, big.mark = “,”, scientific = FALSE), sep = ““), las = 1, cex.axis = 0.8)
grid(nx = NULL, ny = NULL) # Add horizontal and vertical grid lines
legend(“topright”, legend = c(“Net Income”), col = “blue”, lty = 1, pch = 16, cex = 0.8, bg = “white”)
#Analysis of Expense Lines
```{r}
transposed_data <- t(clean_rental_income)
{r} #view(transposed_data)
{r} colnames(transposed_data) <- transposed_data[1, ] transposed_data <- transposed_data[-1, ]
{r} #view(transposed_data)
```{r} # Select only the rows corresponding to expenses rental_expenses <- transposed_data[, c(“Bright Star Credit Union”, “Bank Charges - Other”, “Total Commission”, “Filing Fees”,“Total Electricity”, “Total Utilities”)]
rental_expenses <- apply(rental_expenses, 2, as.numeric)
total_expenses <- colSums(rental_expenses)
expenses_df <- data.frame(expense = names(total_expenses), total = total_expenses)
##Bar Chart Expenses
```{r}
# Create the bar plot with enhancements
ggplot(expenses_df, aes(x = expense, y = total)) +
geom_bar(stat = "identity", fill = "skyblue") +
geom_text(aes(label = format(total, big.mark = ",")), # Adding formatted labels inside bars
position = position_stack(vjust = 0.5), # Center labels vertically in bars
color = "black", size = 3.5) + # Set text color and size
labs(title = "Total Expenses", x = "Expense", y = "Total") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels for better readability
axis.text.y = element_text(color = "gray20")) # Enhance y-axis labels readability
##Pie Chart Distribution of Expenses ```{r} # Calculate the percentages for the pie chart labels expenses_df\(percentage <- expenses_df\)total / sum(expenses_df$total) * 100
ggplot(expenses_df, aes(x = ““, y = total, fill = expense)) + geom_bar(width = 1, stat =”identity”) + # Use geom_bar and set width to 1 for pie chart coord_polar(theta = “y”) + # Convert bar chart to pie chart using polar coordinates geom_text(aes(label = paste0(round(percentage, 1), “%”)), position = position_stack(vjust = 0.5), color = “white”, size = 4) + # Add percentage labels inside each slice labs(title = “Distribution of Expenses”, x = NULL, y = NULL, fill = “Expense Type”) + theme_void() + # Remove axis and grid lines theme(legend.position = “right”, # Adjust legend position legend.title = element_text(size = 10), # Legend title size legend.text = element_text(size = 8)) # Legend text size
```{r}
#install.packages("randomForest")
``{r} # Fit multiple linear regression model multiple_lm <- lm(Net
Income~Total Rental Income+Bright Star Credit
Union+Bank Charges - Other+Total
Commission+Filing Fees+Total
Electricity+Total Utilities+Depreciation
Expense`, data = model_data)
summary(multiple_lm)
##Multiple Linear Regression
```{r}
# Fit multiple linear regression model
multiple_lm <- lm(`Net Income` ~ ., data = model_data)
{r} # Print summary of the model summary(multiple_lm)
```{r} # Remove rows with missing values numeric_data <- na.omit(numeric_data)
correlation_matrix <- cor(numeric_data)
#print(correlation_matrix)
```{r}
#Check for missing values in the relevant columns
missing_values <- sapply(correlation_data, function(x) sum(is.na(x)))
# Print the number of missing values for each column
#print(missing_values)
```