# Load necessary libraries
pacman::p_load(pacman, readr, tidyr, forcats, dplyr, ggplot2, patchwork, reshape2, scales, gridExtra)
# Function to load and clean datasets
load_and_clean_data <- function(file_path) {
data <- read_csv(file_path) %>%
filter(!is.na(country))
return(data)
}
# Load the datasets from the CSV files
Companies_ranked_by_Revenue <- load_and_clean_data("Companies_ranked_by_Revenue.csv")
## Rows: 9912 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, Symbol, country
## dbl (3): Rank, revenue_ttm, price (GBP)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Companies_ranked_by_Earnings <- load_and_clean_data("Companies_ranked_by_Earnings.csv")
## Rows: 9912 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, Symbol, country
## dbl (3): Rank, earnings_ttm, price (GBP)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Companies_ranked_by_Market_Cap <- load_and_clean_data("Companies_ranked_by_Market_Cap.csv")
## Rows: 9912 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, Symbol, country
## dbl (3): Rank, marketcap, price (GBP)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Companies_ranked_by_P_E_ratio <- load_and_clean_data("Companies_ranked_by_P_E_ratio.csv")
## Rows: 9912 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, Symbol, country
## dbl (3): Rank, pe_ratio_ttm, price (GBP)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Companies_ranked_by_Dividend_Yield <- load_and_clean_data("Companies_ranked_by_Dividend_Yield.csv")
## Rows: 9912 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Name, Symbol, country
## dbl (3): Rank, dividend_yield_ttm, price (GBP)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Merge all datasets based on common columns: Name, Symbol, and country
combined_data <- Companies_ranked_by_Revenue %>%
inner_join(Companies_ranked_by_Earnings, by = c("Name", "Symbol", "country")) %>%
inner_join(Companies_ranked_by_Market_Cap, by = c("Name", "Symbol", "country")) %>%
inner_join(Companies_ranked_by_P_E_ratio, by = c("Name", "Symbol", "country")) %>%
inner_join(Companies_ranked_by_Dividend_Yield, by = c("Name", "Symbol", "country")) %>%
mutate(across(starts_with("ttm"), ~replace_na(., 0))) # Replace NA with 0 for relevant columns
# Filter the top 15 companies based on revenue rank for focused analysis
top_15_companies <- combined_data %>%
arrange(Rank.x) %>%
slice(1:15)
# Summary of the numerical columns for overall analysis
summary_stats <- combined_data %>%
summarize(
total_companies = n(),
avg_revenue = mean(revenue_ttm, na.rm = TRUE),
avg_earnings = mean(earnings_ttm, na.rm = TRUE),
avg_marketcap = mean(marketcap, na.rm = TRUE),
avg_pe_ratio = mean(pe_ratio_ttm, na.rm = TRUE),
avg_dividend_yield = mean(dividend_yield_ttm, na.rm = TRUE)
)
# Print summary statistics to the console
print(summary_stats)
## # A tibble: 1 × 6
## total_companies avg_revenue avg_earnings avg_marketcap avg_pe_ratio
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 9910 5707180612. 711244595. 8743628906. -3.26e14
## # ℹ 1 more variable: avg_dividend_yield <dbl>
# Visualise Top 15 Companies by Revenue, Earnings, Market Cap, P/E Ratio, and Dividend Yield
# Bar plot: Top 15 Companies by Revenue
p1 <- ggplot(top_15_companies, aes(x = reorder(Name, -revenue_ttm), y = revenue_ttm / 1e9)) +
geom_bar(stat = "identity", fill = "#3399ff") +
labs(title = "Top 15 Companies by Revenue", x = "Company", y = "Revenue (£B)") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Bar plot: Top 15 Companies by Earnings
p2 <- ggplot(top_15_companies, aes(x = reorder(Name, -earnings_ttm), y = earnings_ttm / 1e9)) +
geom_bar(stat = "identity", fill = "#339900") +
labs(title = "Top 15 Companies by Earnings", x = "Company", y = "Earnings (£B)") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Bar plot: Top 15 Companies by Market Cap
p3 <- ggplot(top_15_companies, aes(x = reorder(Name, -marketcap), y = marketcap / 1e9)) +
geom_bar(stat = "identity", fill = "#993300") +
labs(title = "Top 15 Companies by Market Cap", x = "Company", y = "Market Cap (£B)") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Bar plot: Top 15 Companies by P/E Ratio
p4 <- ggplot(top_15_companies, aes(x = reorder(Name, -pe_ratio_ttm), y = pe_ratio_ttm)) +
geom_bar(stat = "identity", fill = "#cc6600") +
labs(title = "Top 15 Companies by P/E Ratio", x = "Company", y = "P/E Ratio") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Bar plot: Top 15 Companies by Dividend Yield
p5 <- ggplot(top_15_companies, aes(x = reorder(Name, -dividend_yield_ttm), y = dividend_yield_ttm)) +
geom_bar(stat = "identity", fill = "#ff6699") +
labs(title = "Top 15 Companies by Dividend Yield", x = "Company", y = "Dividend Yield (%)") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Correlation Analysis: Heatmap
correlation_matrix <- cor(top_15_companies %>% select(revenue_ttm, earnings_ttm, marketcap, pe_ratio_ttm, dividend_yield_ttm), use = "complete.obs")
melted_correlation <- melt(correlation_matrix)
p6 <- ggplot(melted_correlation, aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name="Correlation") +
theme_minimal() +
labs(title = "Correlation Heatmap of Financial Metrics") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
# Arrange the plots in a grid layout
grid.arrange(p1, p2, p3, p4, p5, p6, nrow = 2, ncol = 3)

# Cross-Analysis: Scatter plots comparing the metrics
# Scatter plot: Revenue vs P/E Ratio
scatter1 <- ggplot(top_15_companies, aes(x = revenue_ttm / 1e9, y = pe_ratio_ttm, color = country)) +
geom_point(size = 3) +
labs(title = "Revenue vs P/E Ratio (Top 15 Companies)", x = "Revenue (£B)", y = "P/E Ratio") +
theme()
# Scatter plot: Earnings vs Dividend Yield
scatter2 <- ggplot(top_15_companies, aes(x = earnings_ttm / 1e9, y = dividend_yield_ttm, color = country)) +
geom_point(size = 3) +
labs(title = "Earnings vs Dividend Yield (Top 15 Companies)", x = "Earnings (£B)", y = "Dividend Yield (%)") +
theme()
# Scatter plot: Revenue vs Earnings
scatter3 <- ggplot(top_15_companies, aes(x = revenue_ttm / 1e9, y = earnings_ttm / 1e9, color = country)) +
geom_point(size = 3) +
labs(title = "Revenue vs Earnings (Top 15 Companies)", x = "Revenue (£B)", y = "Earnings (£B)") +
theme()
# Scatter plot: Revenue vs Market Cap
scatter4 <- ggplot(top_15_companies, aes(x = revenue_ttm / 1e9, y = marketcap / 1e9, color = country)) +
geom_point(size = 3) +
labs(title = "Revenue vs Market Cap (Top 15 Companies)", x = "Revenue (£B)", y = "Market Cap (£B)") +
theme()
# Arrange the 4 scatter plots into a 2x2 grid
grid.arrange(scatter1, scatter2, scatter3, scatter4, nrow = 2)

# Country-Level Analysis: Average Revenue, P/E Ratio, and Dividend Yield by Country
country_summary <- combined_data %>%
group_by(country) %>%
summarize(
avg_revenue = mean(revenue_ttm, na.rm = TRUE),
avg_earnings = mean(earnings_ttm, na.rm = TRUE),
avg_marketcap = mean(marketcap, na.rm = TRUE),
avg_pe_ratio = mean(pe_ratio_ttm, na.rm = TRUE),
avg_dividend_yield = mean(dividend_yield_ttm, na.rm = TRUE),
company_count = n()
) %>%
arrange(desc(avg_revenue))
# Bar Plot: Top 10 Countries by Average Dividend Yield
p7 <- ggplot(country_summary %>% top_n(10, avg_dividend_yield), aes(x = fct_reorder(country, avg_dividend_yield), y = avg_dividend_yield)) +
geom_bar(stat = "identity", fill = "#ff6699") +
labs(title = "Top 10 Countries by Average Dividend Yield", x = "Country", y = "Average Dividend Yield (%)") +
theme() +
coord_flip() +
scale_y_continuous(labels = scales::comma)
# Country-Level Analysis: Average Revenue, Earnings, and Market Cap by Country
country_summary <- combined_data %>%
group_by(country) %>%
summarize(
avg_revenue = mean(revenue_ttm, na.rm = TRUE),
avg_earnings = mean(earnings_ttm, na.rm = TRUE),
avg_marketcap = mean(marketcap, na.rm = TRUE),
company_count = n()
) %>%
arrange(desc(avg_revenue))
# Bar Plot: Top 10 Countries by Average Revenue, Earnings, and Market Cap
# Top 10 Countries by Revenue
p8 <- ggplot(country_summary %>% top_n(10, avg_revenue), aes(x = fct_reorder(country, avg_revenue), y = avg_revenue / 1e9)) +
geom_bar(stat = "identity", fill = "#3399ff") +
labs(title = "Top 10 Countries by Average Revenue", x = "Country", y = "Average Revenue (£B)") +
theme() +
coord_flip()
# Top 10 Countries by Earnings
p9 <- ggplot(country_summary %>% top_n(10, avg_earnings), aes(x = fct_reorder(country, avg_earnings), y = avg_earnings / 1e9)) +
geom_bar(stat = "identity", fill = "#339900") +
labs(title = "Top 10 Countries by Average Earnings", x = "Country", y = "Average Earnings (£B)") +
theme() +
coord_flip()
# Top 10 Countries by Market Cap
p10 <- ggplot(country_summary %>% top_n(10, avg_marketcap), aes(x = fct_reorder(country, avg_marketcap), y = avg_marketcap / 1e9)) +
geom_bar(stat = "identity", fill = "#993300") +
labs(title = "Top 10 Countries by Average Market Cap", x = "Country", y = "Average Market Cap (£B)") +
theme() +
coord_flip()
# Arrange the plots in a grid layout
grid.arrange(p7, p8, p9, p10, nrow = 2, ncol = 2)

# Correlation Analysis: Revenue, Earnings, and Market Cap
correlation_revenue_earnings <- cor(top_15_companies$revenue_ttm, top_15_companies$earnings_ttm, use = "complete.obs")
correlation_revenue_marketcap <- cor(top_15_companies$revenue_ttm, top_15_companies$marketcap, use = "complete.obs")
print(paste("Correlation between Revenue and Earnings: ", round(correlation_revenue_earnings, 2)))
## [1] "Correlation between Revenue and Earnings: 0.17"
print(paste("Correlation between Revenue and Market Cap: ", round(correlation_revenue_marketcap, 2)))
## [1] "Correlation between Revenue and Market Cap: 0.25"
# Summarise total revenue by country and calculate percentages
revenue_by_country <- combined_data %>%
group_by(country) %>%
summarize(total_revenue = sum(revenue_ttm, na.rm = TRUE)) %>%
arrange(desc(total_revenue)) %>%
slice(1:10) %>%
mutate(percentage = total_revenue / sum(total_revenue) * 100)
# Pie chart with percentage labels
p11 <- ggplot(revenue_by_country, aes(x = "", y = total_revenue, fill = country)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Revenue Distribution by Country (Top 10)", y = "", x = "") +
theme_void() +
theme(legend.title = element_blank()) +
scale_fill_brewer(palette = "Set3") +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5)) # Add percentage labels
# Calculate percentages for top 10 companies by market cap
top_10_marketcap <- combined_data %>%
arrange(desc(marketcap)) %>%
slice(1:10) %>%
mutate(percentage = marketcap / sum(marketcap) * 100)
# Pie chart with percentage labels
p12 <- ggplot(top_10_marketcap, aes(x = "", y = marketcap, fill = Name)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Market Capitalisation Distribution (Top 10 Companies)", y = "", x = "") +
theme_void() +
theme(legend.title = element_blank()) +
scale_fill_brewer(palette = "Set3") +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5)) # Add percentage labels
# Arrange the plots in a grid layout
grid.arrange(p11, p12, ncol = 2)
