PDM <- read.csv("C:/Users/mohamedabdirahman.is/Desktop/R Training in MoG/PDM.csv")

# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Clean the names and correct the structure
PDM_data <- PDM %>%
  mutate(    Region = tolower(trimws(Region)),  # Convert to lowercase and remove leading/trailing whitespaces
    District = tolower(trimws(District))  # Convert to lowercase and remove leading/trailing whitespaces
  )

# Compare the average family sizes of the regions
average_family_size <- PDM_data %>% group_by(Region) %>%
  summarise(Average_Family_Size = mean(How.many.people.are.currently.living.in.your.household., na.rm = TRUE))

# Print or visualize the results
cat("Average Family Sizes by Region:\n")
## Average Family Sizes by Region:
print(average_family_size)
## # A tibble: 5 × 2
##   Region Average_Family_Size
##   <chr>                <dbl>
## 1 awdal                 6.63
## 2 bakool                6   
## 3 bari                  7.64
## 4 bay                   6.97
## 5 nugaal                7.07
# Plotting average family sizes by region
ggplot(average_family_size, aes(x = Region, y = Average_Family_Size)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Average Family Sizes by Region", x = "Region", y = "Average Family Size") +
  theme_minimal()

# Compare the average food expenses of the districts
average_food_expenses <- PDM_data %>%
  group_by(District) %>%
  summarise(Average_Food_Expenses = mean(Food, na.rm = TRUE))

# Print or visualize the results
print("Average Food Expenses by District:")
## [1] "Average Food Expenses by District:"
print(average_food_expenses)
## # A tibble: 10 × 2
##    District     Average_Food_Expenses
##    <chr>                        <dbl>
##  1 bander bayla                  49.3
##  2 burhakaba                     38.5
##  3 dangorayo                     64.0
##  4 diinsoor                      69.0
##  5 eyl                           66.6
##  6 iskushuban                    57.8
##  7 lughaya                       88.4
##  8 qardho                        62.4
##  9 yeed                         100  
## 10 zeila                         88.9
# Plotting average food expenses by district
ggplot(average_food_expenses, aes(x = District, y = Average_Food_Expenses)) +
  geom_bar(stat = "identity", fill = "lightgreen") +
  labs(title = "Average Food Expenses by District", x = "District", y = "Average Food Expenses") +
  theme_minimal()