This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

#install.packages("ggplot2")
#install.packages("dplyr")
#install.packages("GGally")
# Infection Data Visualizations in R

# Create the data frame
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)
df <- data.frame(infections, ufo2010, pop)

# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# --- 1. Bar Graph: Comparing Infections and UFO Sightings ---
ggplot(df, aes(x = 1:nrow(df))) +
  geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
  scale_fill_manual("Variables", values = c("Infections" = "skyblue", "UFO Sightings (2010)" = "salmon")) +
  labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
  theme_minimal() +
  theme(legend.position = "top")

# Observation: This bar graph compares the number of infections and UFO sightings for each # data point. The scale of infections is significantly higher than UFO sightings in most cases. # There are few instances where UFO sightings are non-zero, but their counts are low relative # to the infection numbers.

# --- 2. Line Chart: Trends in Infections and Population ---
ggplot(df, aes(x = 1:nrow(df))) +
  geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
  geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
  scale_color_manual("Variables", values = c("Infections" = "green", "Population" = "purple")) +
  labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
  theme_minimal() +
  theme(legend.position = "top")

# Observation: This line chart shows the trends of infections and population across the data points. # The population values are on a much larger scale than infection counts, making it difficult to # observe detailed changes in infections on the same plot. However, we can see the overall # fluctuations of both variables.

# --- 3. Scatter Plot: Relationship between Population and Infections ---
ggplot(df, aes(x = pop, y = infections)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
  theme_minimal()

# Observation: This scatter plot explores the relationship between population size and the number # of infections. There doesn’t appear to be a strong linear correlation. While some high-population # areas have high infection counts, this is not consistently the case.

# --- 4. Box Plot: Distribution of Infections ---
ggplot(df, aes(y = infections)) +
  geom_boxplot(fill = "lightcoral") +
  labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
  theme_minimal()

# Observation: This box plot summarizes the distribution of the ‘infections’ variable. It shows the # median, quartiles, and potential outliers. The plot indicates that the majority of infection # counts are relatively low, with some higher values identified as outliers.

# --- 5. Histogram: Frequency Distribution of UFO Sightings ---
ggplot(df, aes(x = ufo2010)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
  labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
  theme_minimal()

# Observation: This histogram shows the frequency distribution of UFO sightings in 2010. The # distribution is heavily skewed towards zero, indicating that most data points have very few or # no reported UFO sightings.

# --- 6. Scatter Plot: Relationship between Population and UFO Sightings ---
ggplot(df, aes(x = pop, y = ufo2010)) +
  geom_point(color = "purple", alpha = 0.6) +
  labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
  theme_minimal()

# Observation: This scatter plot examines the relationship between population size and the number # of UFO sightings. There doesn’t seem to be a clear linear relationship between these two variables.

# --- 7. Scatter Plot: Infections vs. UFOs with Population Size ---
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
  geom_point(alpha = 0.6, color = "maroon") +
  scale_size_continuous(name = "Population Size") +
  labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
  theme_minimal()

# Observation: This scatter plot shows the relationship between infections and UFO sightings, with # the size of each point representing the population size. It helps to visualize if areas with higher # infections or UFO sightings also tend to have larger populations. No strong pattern is immediately # apparent.

# --- 8. Pair Plot: Overview of Relationships ---
library(GGally)
ggpairs(df) +
  ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
  theme_minimal()

# Observation: The pair plot provides a matrix of scatter plots for each pair of variables and # density plots for the distribution of each individual variable. This gives a quick overview of # potential linear relationships and the shape of the distributions. The distributions of # infections and UFO sightings appear skewed, and the scatter plots reiterate the lack of strong # linear correlations observed in the individual plots.