#install.packages("GGally")
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)

#creating the data frame
df <- data.frame(infections, ufo2010, pop)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
ggplot(df, aes(x = 1:nrow(df))) +
  geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
  scale_fill_manual("Variables", values = c("Infections" = "skyblue", "UFO Sightings (2010)" = "salmon")) +
  labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
  theme_minimal() +
  theme(legend.position = "top")

#This creates a bar chart that stacks infections on top of UFO signtings. There are a lot more infections than UFO sightings. This chart was scaled to each data point in the data set. From this chart alone, it does not appear that UFO sightings do not have a major effect on infections. 
ggplot(df, aes(x = 1:nrow(df))) +
  geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
  geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
  scale_color_manual("Variables", values = c("Infections" = "green", "Population" = "purple")) +
  labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
  theme_minimal() +
  theme(legend.position = "top")

#This line chart shows the change in infections and population per data point. Due to a large number in the population, it is difficult to see any changes in infection per population, but infections is not a straight line. There are major fluctionations of populaiton as seen with the dashed purple lines. 
ggplot(df, aes(x = pop, y = infections)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
  theme_minimal()

#This is a scatter plot to show population vs. the number of infections. A large chunk of the data points are closer to the start of the graph near (0,0). There are some data points with a lower number of infections with high populations, and vice versa. It is difficult to see any trends on this plot. 
ggplot(df, aes(y = infections)) +
  geom_boxplot(fill = "lightcoral") +
  labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
  theme_minimal()

#This is a box plot to show the distibution of the number of infections. As seen with the red bar, most of the infections data points are near the bottom of the box chart mark, but there are some outliers that are higher as well. 
ggplot(df, aes(x = ufo2010)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
  labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
  theme_minimal()

#This is a frequency distrbution histogram that shows the number of UFO sightings in the data set. As seen in the histogram, most of the observations are close to or on the 0 mark, meaaning that there have been many areas with none or little UFO sightings. There are a couple of outliers though with more. 
ggplot(df, aes(x = pop, y = ufo2010)) +
  geom_point(color = "purple", alpha = 0.6) +
  labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
  theme_minimal()

#This is a scatter plot that shows the relationship between population vs UFO sightings. Most of the data points are around the (0,0) line, but the few outliers seen to follow a positive linear path But, there cannot be a trend seen, but the outliers show that in a bigger data set, there is a possibility of correlation. 
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
  geom_point(alpha = 0.6, color = "maroon") +
  scale_size_continuous(name = "Population Size") +
  labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
  theme_minimal()

#This is a scatter plot that scales the size of the circle by the population size. It shows the UFO sightings vs infections. The population size scale helps give easy visual context to each data point. There is no trend seen. 
library(GGally)
ggpairs(df) +
  ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
  theme_minimal()

#This is a pair plot that is a matrix of all the different relationships between the three different variables. Most of the observations seen were closer to the 0 marks for all the different charts seen in the matrix, making it difficult to draw any conslusions supporting that UFO sightings or population impact the number of infections.