```{r}
#Install Packages
```{r}
#install.packages(“dplyr”)
#install.packages(“tidyr”)
#install.packages(“readr”)
#install.packages(“lubridate”)
#install.packages(“ggplot2”)
#install.packages(“tidyverse”)
#install.packages(“psych”)
#install.packages(“reshape2”)
#install.packages(“corrplot”)
#install.packages(“caret”)
#install.packages(“scales”)
```

#Libraries {r} library(tidyverse) library(psych) library(ggplot2) library(reshape2) library(corrplot) library(scales)

##Uploading the dataset {r} rental_income <- read.csv("XYZ LLC 2021 PL.csv")

#EPA

##Head {r} head(rental_income) ##Structure {r} str(rental_income)

#Summary {r} summary(rental_income) ##Omit NA values {r} clean_rental_income <- na.omit(rental_income)

{r} #view(clean_rental_income)

#check the current column names {r} #print(colnames(clean_rental_income)) # Change The headers into a date structure ```{r} Col_headers <- seq(as.Date(“2022-01-01”), as.Date(“2023-04-01”), by = “month”)

Assign dates as column names from the second column onwards

names(clean_rental_income)[-1] <- format(Col_headers, “%B %Y”)


```{r}
#print(colnames(clean_rental_income))

{r} total_rental_income_monthly <- clean_rental_income %>% filter(grepl("Total Rental Income", clean_rental_income[[1]])) %>% # Adjust the column index if necessary select(-1) # Remove the category column to focus on numerical data

```{r} # Transpose for better readability monthly_totals <- colSums(total_rental_income_monthly, na.rm = TRUE) monthly_totals_df <- data.frame(Month = names(monthly_totals), Total = as.numeric(monthly_totals), row.names = NULL)



```{r}
#print(monthly_totals_df)

#Bar Chart with the Total Rental Income per month {r} ggplot(monthly_totals_df, aes(x = Month, y = Total)) + geom_bar(stat = "identity", fill = "steelblue") + theme_minimal() + labs(title = "Monthly Total Rental Income", x = "Month", y = "Total Income ($)") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability

```{r} # Define the specific order of the months months_order <- c(“January 2022”, “February 2022”, “March 2022”, “April 2022”, “May 2022”, “June 2022”, “July 2022”, “August 2022”, “September 2022”, “October 2022”, “November 2022”, “December 2022”, “January 2023”, “February 2023”, “March 2023”, “April 2023”)



```{r}
# Ensure that 'Month' is converted to a factor and ordered correctly
monthly_totals_df$Month <- factor(monthly_totals_df$Month, levels = months_order)

# Plotting the bar chart with the months in the specified order
ggplot(monthly_totals_df, aes(x = Month, y = Total)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = paste0("$", format(Total, big.mark=","))), # Format numbers with commas
            position = position_stack(vjust = 0.5),  # Adjust vertical position to be inside the bars
            color = "white", size = 3.5, angle = 90) +  # Set text color and size, rotate text vertically
  theme_minimal() +
  labs(title = "Monthly Total Rental Income",
       x = "Month",
       y = "Total Income ($)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels for better readability

{r} # Filter the data for 'Net Income' net_income_data <- clean_rental_income %>% filter(grepl("Net Income", clean_rental_income[[1]])) %>% # Adjust the column index if necessary select(-1) # Remove the category column to focus on numerical data

{r} #print(net_income_data)

{r} # Prepare data for plotting net_income_data <- tibble( Month = months_order, Net_Income = as.numeric(net_income_data[1, ]) )

{r} #print(net_income_data)

{r} # Convert 'Month' into an ordered factor based on the months_order net_income_data$Month <- factor(net_income_data$Month, levels = months_order)

{r} # Plotting the line graph plot <- ggplot(net_income_data, aes(x = Month, y = Net_Income)) + geom_line(color = "dodgerblue", size = 1.5) + # Ensure line connects the points geom_point(color = "darkblue", size = 3.5) + # Points on the line geom_smooth(method = "lm", color = "red", se = FALSE, aes(group = 1)) + # Add a linear trendline theme_minimal() + scale_y_continuous(labels = dollar_format(prefix = "$", suffix = ""), breaks = pretty_breaks(n = 10)) + labs(title = "Monthly Net Income", x = "Month", y = "Net Income ($)") + theme(axis.text.x = element_text(angle = 45, hjust = 1))

{r} # Print the plot #print(plot)

```{r} # Extract column names (months), which are the levels of the ‘Month’ factor month_names <- levels(net_income_data$Month) # This assumes ‘Month’ is a factor

Extract values for Net Income

total_income <- net_income_data$Net_Income

Create the line plot with nicer aesthetics but without the y-axis

plot(1:length(month_names), total_income, type = “o”, xlab = “Months”, ylab = ““, # Remove ylab here main =”Net Income per Month”, col = “blue”, # Line color pch = 16, # Point shape lwd = 2, # Line width ylim = c(0, max(total_income) * 1.1), # Adjust y-axis limits xaxt = “n”, # Disable x-axis labels yaxt = “n”) # Disable y-axis to customize later

Add month labels on the x-axis with rotated labels

axis(1, at = 1:length(month_names), labels = month_names, las = 2, cex.axis = 0.8)

Customize y-axis with dollar sign

y_at <- pretty(range(c(0, max(total_income) * 1.1))) # Get pretty breaks for y-axis axis(2, at = y_at, labels = paste(“$”, format(y_at, big.mark = “,”, scientific = FALSE), sep = ““), las = 1, cex.axis = 0.8)

Add gridlines

grid(nx = NULL, ny = NULL) # Add horizontal and vertical grid lines

Add a legend

legend(“topright”, legend = c(“Net Income”), col = “blue”, lty = 1, pch = 16, cex = 0.8, bg = “white”)


#Analysis of Expense Lines
```{r}
transposed_data <- t(clean_rental_income)

{r} #view(transposed_data)

{r} colnames(transposed_data) <- transposed_data[1, ] transposed_data <- transposed_data[-1, ]

{r} #view(transposed_data)

```{r} # Select only the rows corresponding to expenses rental_expenses <- transposed_data[, c(“Bright Star Credit Union”, “Bank Charges - Other”, “Total Commission”, “Filing Fees”,“Total Electricity”, “Total Utilities”)]

Convert the selected columns to numeric

rental_expenses <- apply(rental_expenses, 2, as.numeric)

Calculate the sum of each expense column

total_expenses <- colSums(rental_expenses)

Create a dataframe with the expense names and their totals

expenses_df <- data.frame(expense = names(total_expenses), total = total_expenses)


##Bar Chart Expenses
```{r}
# Create the bar plot with enhancements
ggplot(expenses_df, aes(x = expense, y = total)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  geom_text(aes(label = format(total, big.mark = ",")),   # Adding formatted labels inside bars
            position = position_stack(vjust = 0.5),       # Center labels vertically in bars
            color = "black", size = 3.5) +                # Set text color and size
  
  labs(title = "Total Expenses", x = "Expense", y = "Total") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels for better readability
        axis.text.y = element_text(color = "gray20"))  # Enhance y-axis labels readability

##Pie Chart Distribution of Expenses ```{r} # Calculate the percentages for the pie chart labels expenses_df$percentage <- expenses_df$total / sum(expenses_df$total) * 100

Create the pie chart

ggplot(expenses_df, aes(x = ““, y = total, fill = expense)) + geom_bar(width = 1, stat =”identity”) + # Use geom_bar and set width to 1 for pie chart coord_polar(theta = “y”) + # Convert bar chart to pie chart using polar coordinates geom_text(aes(label = paste0(round(percentage, 1), “%”)), position = position_stack(vjust = 0.5), color = “white”, size = 4) + # Add percentage labels inside each slice labs(title = “Distribution of Expenses”, x = NULL, y = NULL, fill = “Expense Type”) + theme_void() + # Remove axis and grid lines theme(legend.position = “right”, # Adjust legend position legend.title = element_text(size = 10), # Legend title size legend.text = element_text(size = 8)) # Legend text size


```{r}
#install.packages("randomForest")

``{r} # Fit multiple linear regression model multiple_lm <- lm(Net Income~Total Rental Income+Bright Star Credit Union+Bank Charges - Other+Total Commission+Filing Fees+Total Electricity+Total Utilities+Depreciation Expense`, data = model_data)

Print summary of the model

summary(multiple_lm)


##Multiple Linear Regression

```{r}
# Fit multiple linear regression model
multiple_lm <- lm(`Net Income` ~ ., data = model_data)

{r} # Print summary of the model summary(multiple_lm)

```{r} # Remove rows with missing values numeric_data <- na.omit(numeric_data)

Calculate the correlation matrix

correlation_matrix <- cor(numeric_data)

Print the correlation matrix

#print(correlation_matrix)


```{r}
#Check for missing values in the relevant columns
missing_values <- sapply(correlation_data, function(x) sum(is.na(x)))

# Print the number of missing values for each column
#print(missing_values)

```

R Notebook