#install.packages("ggplot2")
#install.packages("dplyr")
#install.packages("GGally")
# Infection Data Visualizations in R

# Create the data frame
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)
df <- data.frame(infections, ufo2010, pop)

# Load necessary libraries
library(ggplot2)
library(dplyr)

# --- 1. Bar Graph: Comparing Infections and UFO Sightings ---
ggplot(df, aes(x = 1:nrow(df))) +
  geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
  scale_fill_manual("Variables", values = c("Infections" = "skyblue", "UFO Sightings (2010)" = "salmon")) +
  labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
  theme_minimal() +
  theme(legend.position = "top")

# Observation: This bar graph compares the number of infections and UFO sightings for each
# data point. The scale of infections is significantly higher than UFO sightings in most cases.
# There are few instances where UFO sightings are non-zero, but their counts are low relative
# to the infection numbers.

# --- 2. Line Chart: Trends in Infections and Population ---
ggplot(df, aes(x = 1:nrow(df))) +
  geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
  geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
  scale_color_manual("Variables", values = c("Infections" = "green", "Population" = "purple")) +
  labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
  theme_minimal() +
  theme(legend.position = "top")

# Observation: This line chart shows the trends of infections and population across the data points.
# The population values are on a much larger scale than infection counts, making it difficult to
# observe detailed changes in infections on the same plot. However, we can see the overall
# fluctuations of both variables.

# --- 3. Scatter Plot: Relationship between Population and Infections ---
ggplot(df, aes(x = pop, y = infections)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
  theme_minimal()

# Observation: This scatter plot explores the relationship between population size and the number
# of infections. There doesn't appear to be a strong linear correlation. While some high-population
# areas have high infection counts, this is not consistently the case.

# --- 4. Box Plot: Distribution of Infections ---
ggplot(df, aes(y = infections)) +
  geom_boxplot(fill = "lightcoral") +
  labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
  theme_minimal()

# Observation: This box plot summarizes the distribution of the 'infections' variable. It shows the
# median, quartiles, and potential outliers. The plot indicates that the majority of infection
# counts are relatively low, with some higher values identified as outliers.
# --- 5. Histogram: Frequency Distribution of UFO Sightings ---
ggplot(df, aes(x = ufo2010)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
  labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
  theme_minimal()

# Observation: This histogram shows the frequency distribution of UFO sightings in 2010. The
# distribution is heavily skewed towards zero, indicating that most data points have very few or
# no reported UFO sightings.
# --- 6. Scatter Plot: Relationship between Population and UFO Sightings ---
ggplot(df, aes(x = pop, y = ufo2010)) +
  geom_point(color = "purple", alpha = 0.6) +
  labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
  theme_minimal()

# Observation: This scatter plot examines the relationship between population size and the number
# of UFO sightings. There doesn't seem to be a clear linear relationship between these two variables.
# --- 7. Scatter Plot: Infections vs. UFOs with Population Size ---
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
  geom_point(alpha = 0.6, color = "maroon") +
  scale_size_continuous(name = "Population Size") +
  labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
  theme_minimal()

# Observation: This scatter plot shows the relationship between infections and UFO sightings, with
# the size of each point representing the population size. It helps to visualize if areas with higher
# infections or UFO sightings also tend to have larger populations. No strong pattern is immediately
# apparent.
# --- 8. Pair Plot: Overview of Relationships ---
library(GGally)
ggpairs(df) +
  ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
  theme_minimal()

# Observation: The pair plot provides a matrix of scatter plots for each pair of variables and
# density plots for the distribution of each individual variable. This gives a quick overview of
# potential linear relationships and the shape of the distributions. The distributions of
# infections and UFO sightings appear skewed, and the scatter plots reiterate the lack of strong
# linear correlations observed in the individual plots.
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpCmBgYAoKCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQpgYGAKCgpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoIkdHYWxseSIpCmBgYAoKCmBgYHtyfQojIEluZmVjdGlvbiBEYXRhIFZpc3VhbGl6YXRpb25zIGluIFIKCiMgQ3JlYXRlIHRoZSBkYXRhIGZyYW1lCmluZmVjdGlvbnMgPC0gYygyNDUsIDIxNSwgMjA3NiwgNTAyMywgMTg5LCAxOTUsIDEyMywgMTE2LCAzMjk4LCA0MzAsIDUwMiwgMTI2LCAxMTIsIDY3LCA1MiwgMzksIDU0LCAyMzU2LCA2NzgxLCAxMjAsIDIzODksIDI3OSwgMjU3LCAyOTAsIDIzNCwgNTY4OSwgMjYxLCA2NzIsIDIwNSkKdWZvMjAxMCA8LSBjKDIsIDYsIDIsIDU5LCAwLCAxLCAxLCAwLCAxMTUsIDAsIDAsIDAsIDAsIDAsIDAsIDAsIDYsIDQsIDIsIDcsIDIsIDksIDIsIDI5LCAxMCwgMTY5LCAxLCA0MCwgMTYpCnBvcCA8LSBjKDI1MTAxLCA2MTkxMiwgMzMzNDEsIDQwOTA2MSwgNzQ4MSwgMTg2NzUsIDI1NTgxLCAyMjI4NiwgNDU5NTk4LCAzOTE1LCA2NzE5NywgMzQzNjUsIDM5MTEsIDMyMTIyLCAzMTQ1OSwgMjMxMSwgMjgzNTAsIDEwMTQ4MiwgMTkwMDUsIDIwNjc5LCAzNjc0NSwgMTYyODEyLCAxNTkyNywgMjUxNDE3LCAxNTM5MjAsIDE1NTQ3MjAsIDE2MTQ4LCAzMDU0NTUsIDM3Mjc2KQpgYGAKCgoKYGBge3J9CmRmIDwtIGRhdGEuZnJhbWUoaW5mZWN0aW9ucywgdWZvMjAxMCwgcG9wKQoKIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQoKIyAtLS0gMS4gQmFyIEdyYXBoOiBDb21wYXJpbmcgSW5mZWN0aW9ucyBhbmQgVUZPIFNpZ2h0aW5ncyAtLS0KZ2dwbG90KGRmLCBhZXMoeCA9IDE6bnJvdyhkZikpKSArCiAgZ2VvbV9iYXIoYWVzKHkgPSBpbmZlY3Rpb25zLCBmaWxsID0gIkluZmVjdGlvbnMiKSwgc3RhdCA9ICJpZGVudGl0eSIsIHBvc2l0aW9uID0gImRvZGdlIikgKwogIGdlb21fYmFyKGFlcyh5ID0gdWZvMjAxMCwgZmlsbCA9ICJVRk8gU2lnaHRpbmdzICgyMDEwKSIpLCBzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiLCBhbHBoYSA9IDAuNykgKwogIHNjYWxlX2ZpbGxfbWFudWFsKCJWYXJpYWJsZXMiLCB2YWx1ZXMgPSBjKCJJbmZlY3Rpb25zIiA9ICJza3libHVlIiwgIlVGTyBTaWdodGluZ3MgKDIwMTApIiA9ICJzYWxtb24iKSkgKwogIGxhYnMoeCA9ICJEYXRhIFBvaW50IEluZGV4IiwgeSA9ICJDb3VudCIsIHRpdGxlID0gIkNvbXBhcmlzb24gb2YgSW5mZWN0aW9ucyBhbmQgVUZPIFNpZ2h0aW5ncyIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiKQojIE9ic2VydmF0aW9uOiBUaGlzIGJhciBncmFwaCBjb21wYXJlcyB0aGUgbnVtYmVyIG9mIGluZmVjdGlvbnMgYW5kIFVGTyBzaWdodGluZ3MgZm9yIGVhY2gKIyBkYXRhIHBvaW50LiBUaGUgc2NhbGUgb2YgaW5mZWN0aW9ucyBpcyBzaWduaWZpY2FudGx5IGhpZ2hlciB0aGFuIFVGTyBzaWdodGluZ3MgaW4gbW9zdCBjYXNlcy4KIyBUaGVyZSBhcmUgZmV3IGluc3RhbmNlcyB3aGVyZSBVRk8gc2lnaHRpbmdzIGFyZSBub24temVybywgYnV0IHRoZWlyIGNvdW50cyBhcmUgbG93IHJlbGF0aXZlCiMgdG8gdGhlIGluZmVjdGlvbiBudW1iZXJzLgpgYGAKCgoKCmBgYHtyfQoKIyAtLS0gMi4gTGluZSBDaGFydDogVHJlbmRzIGluIEluZmVjdGlvbnMgYW5kIFBvcHVsYXRpb24gLS0tCmdncGxvdChkZiwgYWVzKHggPSAxOm5yb3coZGYpKSkgKwogIGdlb21fbGluZShhZXMoeSA9IGluZmVjdGlvbnMsIGNvbG9yID0gIkluZmVjdGlvbnMiKSwgbGluZXdpZHRoID0gMSkgKwogIGdlb21fbGluZShhZXMoeSA9IHBvcCwgY29sb3IgPSAiUG9wdWxhdGlvbiIpLCBsaW5ld2lkdGggPSAxLCBsaW5ldHlwZSA9ICJkYXNoZWQiKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKCJWYXJpYWJsZXMiLCB2YWx1ZXMgPSBjKCJJbmZlY3Rpb25zIiA9ICJncmVlbiIsICJQb3B1bGF0aW9uIiA9ICJwdXJwbGUiKSkgKwogIGxhYnMoeCA9ICJEYXRhIFBvaW50IEluZGV4IiwgeSA9ICJDb3VudCIsIHRpdGxlID0gIlRyZW5kcyBpbiBJbmZlY3Rpb25zIGFuZCBQb3B1bGF0aW9uIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gInRvcCIpCiMgT2JzZXJ2YXRpb246IFRoaXMgbGluZSBjaGFydCBzaG93cyB0aGUgdHJlbmRzIG9mIGluZmVjdGlvbnMgYW5kIHBvcHVsYXRpb24gYWNyb3NzIHRoZSBkYXRhIHBvaW50cy4KIyBUaGUgcG9wdWxhdGlvbiB2YWx1ZXMgYXJlIG9uIGEgbXVjaCBsYXJnZXIgc2NhbGUgdGhhbiBpbmZlY3Rpb24gY291bnRzLCBtYWtpbmcgaXQgZGlmZmljdWx0IHRvCiMgb2JzZXJ2ZSBkZXRhaWxlZCBjaGFuZ2VzIGluIGluZmVjdGlvbnMgb24gdGhlIHNhbWUgcGxvdC4gSG93ZXZlciwgd2UgY2FuIHNlZSB0aGUgb3ZlcmFsbAojIGZsdWN0dWF0aW9ucyBvZiBib3RoIHZhcmlhYmxlcy4KCiMgLS0tIDMuIFNjYXR0ZXIgUGxvdDogUmVsYXRpb25zaGlwIGJldHdlZW4gUG9wdWxhdGlvbiBhbmQgSW5mZWN0aW9ucyAtLS0KZ2dwbG90KGRmLCBhZXMoeCA9IHBvcCwgeSA9IGluZmVjdGlvbnMpKSArCiAgZ2VvbV9wb2ludChjb2xvciA9ICJibHVlIiwgYWxwaGEgPSAwLjYpICsKICBsYWJzKHggPSAiUG9wdWxhdGlvbiIsIHkgPSAiTnVtYmVyIG9mIEluZmVjdGlvbnMiLCB0aXRsZSA9ICJSZWxhdGlvbnNoaXAgYmV0d2VlbiBQb3B1bGF0aW9uIGFuZCBOdW1iZXIgb2YgSW5mZWN0aW9ucyIpICsKICB0aGVtZV9taW5pbWFsKCkKIyBPYnNlcnZhdGlvbjogVGhpcyBzY2F0dGVyIHBsb3QgZXhwbG9yZXMgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHBvcHVsYXRpb24gc2l6ZSBhbmQgdGhlIG51bWJlcgojIG9mIGluZmVjdGlvbnMuIFRoZXJlIGRvZXNuJ3QgYXBwZWFyIHRvIGJlIGEgc3Ryb25nIGxpbmVhciBjb3JyZWxhdGlvbi4gV2hpbGUgc29tZSBoaWdoLXBvcHVsYXRpb24KIyBhcmVhcyBoYXZlIGhpZ2ggaW5mZWN0aW9uIGNvdW50cywgdGhpcyBpcyBub3QgY29uc2lzdGVudGx5IHRoZSBjYXNlLgpgYGAKCgoKYGBge3J9CgojIC0tLSA0LiBCb3ggUGxvdDogRGlzdHJpYnV0aW9uIG9mIEluZmVjdGlvbnMgLS0tCmdncGxvdChkZiwgYWVzKHkgPSBpbmZlY3Rpb25zKSkgKwogIGdlb21fYm94cGxvdChmaWxsID0gImxpZ2h0Y29yYWwiKSArCiAgbGFicyh5ID0gIk51bWJlciBvZiBJbmZlY3Rpb25zIiwgdGl0bGUgPSAiRGlzdHJpYnV0aW9uIG9mIE51bWJlciBvZiBJbmZlY3Rpb25zIikgKwogIHRoZW1lX21pbmltYWwoKQojIE9ic2VydmF0aW9uOiBUaGlzIGJveCBwbG90IHN1bW1hcml6ZXMgdGhlIGRpc3RyaWJ1dGlvbiBvZiB0aGUgJ2luZmVjdGlvbnMnIHZhcmlhYmxlLiBJdCBzaG93cyB0aGUKIyBtZWRpYW4sIHF1YXJ0aWxlcywgYW5kIHBvdGVudGlhbCBvdXRsaWVycy4gVGhlIHBsb3QgaW5kaWNhdGVzIHRoYXQgdGhlIG1ham9yaXR5IG9mIGluZmVjdGlvbgojIGNvdW50cyBhcmUgcmVsYXRpdmVseSBsb3csIHdpdGggc29tZSBoaWdoZXIgdmFsdWVzIGlkZW50aWZpZWQgYXMgb3V0bGllcnMuCmBgYAoKCmBgYHtyfQojIC0tLSA1LiBIaXN0b2dyYW06IEZyZXF1ZW5jeSBEaXN0cmlidXRpb24gb2YgVUZPIFNpZ2h0aW5ncyAtLS0KZ2dwbG90KGRmLCBhZXMoeCA9IHVmbzIwMTApKSArCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSA1LCBmaWxsID0gIm9yYW5nZSIsIGNvbG9yID0gImJsYWNrIiwgYWxwaGEgPSAwLjcpICsKICBsYWJzKHggPSAiTnVtYmVyIG9mIFVGTyBTaWdodGluZ3MgKDIwMTApIiwgeSA9ICJGcmVxdWVuY3kiLCB0aXRsZSA9ICJGcmVxdWVuY3kgRGlzdHJpYnV0aW9uIG9mIFVGTyBTaWdodGluZ3MgKDIwMTApIikgKwogIHRoZW1lX21pbmltYWwoKQojIE9ic2VydmF0aW9uOiBUaGlzIGhpc3RvZ3JhbSBzaG93cyB0aGUgZnJlcXVlbmN5IGRpc3RyaWJ1dGlvbiBvZiBVRk8gc2lnaHRpbmdzIGluIDIwMTAuIFRoZQojIGRpc3RyaWJ1dGlvbiBpcyBoZWF2aWx5IHNrZXdlZCB0b3dhcmRzIHplcm8sIGluZGljYXRpbmcgdGhhdCBtb3N0IGRhdGEgcG9pbnRzIGhhdmUgdmVyeSBmZXcgb3IKIyBubyByZXBvcnRlZCBVRk8gc2lnaHRpbmdzLgpgYGAKCgpgYGB7cn0KIyAtLS0gNi4gU2NhdHRlciBQbG90OiBSZWxhdGlvbnNoaXAgYmV0d2VlbiBQb3B1bGF0aW9uIGFuZCBVRk8gU2lnaHRpbmdzIC0tLQpnZ3Bsb3QoZGYsIGFlcyh4ID0gcG9wLCB5ID0gdWZvMjAxMCkpICsKICBnZW9tX3BvaW50KGNvbG9yID0gInB1cnBsZSIsIGFscGhhID0gMC42KSArCiAgbGFicyh4ID0gIlBvcHVsYXRpb24iLCB5ID0gIk51bWJlciBvZiBVRk8gU2lnaHRpbmdzICgyMDEwKSIsIHRpdGxlID0gIlJlbGF0aW9uc2hpcCBiZXR3ZWVuIFBvcHVsYXRpb24gYW5kIFVGTyBTaWdodGluZ3MgKDIwMTApIikgKwogIHRoZW1lX21pbmltYWwoKQojIE9ic2VydmF0aW9uOiBUaGlzIHNjYXR0ZXIgcGxvdCBleGFtaW5lcyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gcG9wdWxhdGlvbiBzaXplIGFuZCB0aGUgbnVtYmVyCiMgb2YgVUZPIHNpZ2h0aW5ncy4gVGhlcmUgZG9lc24ndCBzZWVtIHRvIGJlIGEgY2xlYXIgbGluZWFyIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHRoZXNlIHR3byB2YXJpYWJsZXMuCmBgYAoKCmBgYHtyfQojIC0tLSA3LiBTY2F0dGVyIFBsb3Q6IEluZmVjdGlvbnMgdnMuIFVGT3Mgd2l0aCBQb3B1bGF0aW9uIFNpemUgLS0tCmdncGxvdChkZiwgYWVzKHggPSB1Zm8yMDEwLCB5ID0gaW5mZWN0aW9ucywgc2l6ZSA9IHBvcCkpICsKICBnZW9tX3BvaW50KGFscGhhID0gMC42LCBjb2xvciA9ICJtYXJvb24iKSArCiAgc2NhbGVfc2l6ZV9jb250aW51b3VzKG5hbWUgPSAiUG9wdWxhdGlvbiBTaXplIikgKwogIGxhYnMoeCA9ICJOdW1iZXIgb2YgVUZPIFNpZ2h0aW5ncyAoMjAxMCkiLCB5ID0gIk51bWJlciBvZiBJbmZlY3Rpb25zIiwgdGl0bGUgPSAiSW5mZWN0aW9ucyB2cy4gVUZPIFNpZ2h0aW5ncywgU2l6ZSBieSBQb3B1bGF0aW9uIikgKwogIHRoZW1lX21pbmltYWwoKQojIE9ic2VydmF0aW9uOiBUaGlzIHNjYXR0ZXIgcGxvdCBzaG93cyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gaW5mZWN0aW9ucyBhbmQgVUZPIHNpZ2h0aW5ncywgd2l0aAojIHRoZSBzaXplIG9mIGVhY2ggcG9pbnQgcmVwcmVzZW50aW5nIHRoZSBwb3B1bGF0aW9uIHNpemUuIEl0IGhlbHBzIHRvIHZpc3VhbGl6ZSBpZiBhcmVhcyB3aXRoIGhpZ2hlcgojIGluZmVjdGlvbnMgb3IgVUZPIHNpZ2h0aW5ncyBhbHNvIHRlbmQgdG8gaGF2ZSBsYXJnZXIgcG9wdWxhdGlvbnMuIE5vIHN0cm9uZyBwYXR0ZXJuIGlzIGltbWVkaWF0ZWx5CiMgYXBwYXJlbnQuCmBgYAoKCmBgYHtyfQojIC0tLSA4LiBQYWlyIFBsb3Q6IE92ZXJ2aWV3IG9mIFJlbGF0aW9uc2hpcHMgLS0tCmxpYnJhcnkoR0dhbGx5KQpnZ3BhaXJzKGRmKSArCiAgZ2d0aXRsZSgiUGFpciBQbG90IG9mIEluZmVjdGlvbnMsIFVGTyBTaWdodGluZ3MsIGFuZCBQb3B1bGF0aW9uIikgKwogIHRoZW1lX21pbmltYWwoKQojIE9ic2VydmF0aW9uOiBUaGUgcGFpciBwbG90IHByb3ZpZGVzIGEgbWF0cml4IG9mIHNjYXR0ZXIgcGxvdHMgZm9yIGVhY2ggcGFpciBvZiB2YXJpYWJsZXMgYW5kCiMgZGVuc2l0eSBwbG90cyBmb3IgdGhlIGRpc3RyaWJ1dGlvbiBvZiBlYWNoIGluZGl2aWR1YWwgdmFyaWFibGUuIFRoaXMgZ2l2ZXMgYSBxdWljayBvdmVydmlldyBvZgojIHBvdGVudGlhbCBsaW5lYXIgcmVsYXRpb25zaGlwcyBhbmQgdGhlIHNoYXBlIG9mIHRoZSBkaXN0cmlidXRpb25zLiBUaGUgZGlzdHJpYnV0aW9ucyBvZgojIGluZmVjdGlvbnMgYW5kIFVGTyBzaWdodGluZ3MgYXBwZWFyIHNrZXdlZCwgYW5kIHRoZSBzY2F0dGVyIHBsb3RzIHJlaXRlcmF0ZSB0aGUgbGFjayBvZiBzdHJvbmcKIyBsaW5lYXIgY29ycmVsYXRpb25zIG9ic2VydmVkIGluIHRoZSBpbmRpdmlkdWFsIHBsb3RzLgpgYGAKCgoK