# Create the data frame
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)

df <- data.frame(infections, ufo2010, pop)
head(df)
##   infections ufo2010    pop
## 1        245       2  25101
## 2        215       6  61912
## 3       2076       2  33341
## 4       5023      59 409061
## 5        189       0   7481
## 6        195       1  18675


library(ggplot2)
library(dplyr)
 # Bar Graph: Comparing Infections and UFO Sightings
ggplot(df, aes(x = 1:nrow(df))) +
  geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
  scale_fill_manual("Variables", values = c("Infections" = "#008080", "UFO Sightings (2010)" = "yellow")) +
  labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
  theme_minimal() +
  theme(legend.position = "top",
        plot.title = element_text(hjust=0.5, face="bold"))


This bar graph compares the number of infections and UFO sightings for each data point. The scale of infections is significantly higher than UFO sightings in most cases. There are few instances where UFO sightings are non-zero, but their counts are low relative to the infection numbers.

# Line Chart: Trends in Infections and Population 
ggplot(df, aes(x = 1:nrow(df))) +
  geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
  geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
  scale_color_manual("Variables", values = c("Infections" = "salmon", "Population" = "purple")) +
  labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
  theme_minimal() +
  theme(legend.position = "top",
        plot.title = element_text(hjust=0.5, face="bold"))


This line chart shows the trends of infections and population across the data points. The population values are on a much larger scale than infection counts, making it difficult to observe detailed changes in infections on the same plot. However, we can see the overall fluctuations of both variables.

# Scatter Plot: Relationship between Population and Infections
ggplot(df, aes(x = pop, y = infections)) +
  geom_point(color = "blue", alpha = 0.6) +
  stat_smooth(method = "lm", se= FALSE,color="red")+
  labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
  theme_minimal() +
  theme(plot.title = element_text(hjust =0.5, face="bold"))


This scatter plot explores the relationship between population size and the number of infections. There doesn’t appear to be a strong linear correlation. While some high-population areas have high infection counts, this is not consistently the case.

# Box Plot: Distribution of Infections 
ggplot(df, aes(y = infections)) +
  geom_boxplot(fill = "blue") +
  labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5, face="bold"))


This box plot summarizes the distribution of the ‘infections’ variable. It shows the median, quartiles, and potential outliers. The plot indicates that the majority of infection counts are relatively low, with some higher values identified as outliers.

# Histogram: Frequency Distribution of UFO Sightings 
ggplot(df, aes(x = ufo2010)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
  labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
  theme_minimal() + 
  theme(plot.title = element_text(hjust=0.5, face="bold"))


This histogram shows the frequency distribution of UFO sightings in 2010. The distribution is heavily skewed towards zero, indicating that most data points have very few or no reported UFO sightings.


# Scatter Plot: Relationship between Population and UFO Sightings
ggplot(df, aes(x = pop, y = ufo2010)) +
  geom_point(color = "purple", size= 3,alpha = 0.6) +
  labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5, face="bold"))


This scatter plot examines the relationship between population size and the number of UFO sightings. There doesn’t seem to be a clear linear relationship between these two variables.

# Scatter Plot: Infections vs. UFOs with Population Size 
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
  geom_point(alpha = 0.9, color = "firebrick") +
  scale_size_continuous(name = "Population Size") +
  labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5, face="bold"))


This scatter plot shows the relationship between infections and UFO sightings, with the size of each point representing the population size. It helps to visualize if areas with higher infections or UFO sightings also tend to have larger populations. No strong pattern is immediately apparent.


# Pair Plot: Overview of Relationships
library(GGally)
ggpairs(df) +
  ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
  theme_minimal() +
  theme(plot.title = element_text(hjust=0.5, face="bold"))


The pair plot provides a matrix of scatter plots for each pair of variables and density plots for the distribution of each individual variable. This gives a quick overview of potential linear relationships and the shape of the distributions. The distributions of infections and UFO sightings appear skewed, and the scatter plots reiterate the lack of strong linear correlations observed in the individual plots.