install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("GGally")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
# Infection Data Visualizations
# Create the data frame. This is the infections datasets:
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)
install.packages("rlang", dependencies = TRUE)
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
df <- data.frame(infections, ufo2010, pop)
# Load necessary libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 1. Bar Graph: Comparing Infections and UFO Sightings
ggplot(df, aes(x = 1:nrow(df))) +
geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
scale_fill_manual("Variables", values = c("Infections" = "skyblue", "UFO Sightings (2010)" = "salmon")) +
labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
theme_minimal() +
theme(legend.position = "top")

# Observation:
# This bar chart compares infections and UFO sightings for each data point index.
# One thing that stands out immediately is the large difference in scale —
# infections are much higher than UFO sightings in almost every case.
# Because of this, the UFO bars appear very small and are harder to visually
# compare.
# I also notice that UFO sightings are zero or very low for many observations,
# while infections vary widely across the dataset. This suggests that infections
# have much greater variability, while UFO sightings are relatively rare events.
#
# Overall, this chart shows that these two variables are not directly comparable
# in magnitude, and using the same axis makes the differences even more
# noticeable.
# 2. Line Chart: Trends in Infections and Population
ggplot(df, aes(x = 1:nrow(df))) +
geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
scale_color_manual("Variables", values = c("Infections" = "green", "Population" = "purple")) +
labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
theme_minimal() +
theme(legend.position = "top")

# Observation:
# This line chart shows how infections and population change across the data
# points.
# The population line is consistently much higher than infections, which makes
# the infection trend harder to distinguish clearly.
#
# I notice that both variables fluctuate, but population has much larger jumps.
# There are some points where infections increase, but this does not always match
# a similar increase in population.
#
# This suggests that while population might influence infections, it is not the
# only factor affecting infection counts.
# 3. Scatter Plot: Relationship between Population and Infections
ggplot(df, aes(x = pop, y = infections)) +
geom_point(color = "blue", alpha = 0.6) +
labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
theme_minimal()

# Observation:
# This scatter plot explores whether there is a relationship between population
# size and infection counts.
#
# I expected to see a clear upward trend (higher population => more infections),
# but the points are quite spread out. While some high-population areas do have
# high infections, there are also cases where large populations have relatively
# low infection counts.
#
# This indicates that the relationship is weak or inconsistent, and other factors
# besides population likely play an important role here.
# 4. Box Plot: Distribution of Infections
ggplot(df, aes(y = infections)) +
geom_boxplot(fill = "lightcoral") +
labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
theme_minimal()

# Observation:
# The box plot summarizes the distribution of infection values.
#
# I can see that most of the data is concentrated at lower infection values,
# while a few points are much higher. These appear as outliers.
#
# This kind of distribution suggests that extreme cases exist but are not common.
# 5. Histogram: Frequency Distribution of UFO Sightings
ggplot(df, aes(x = ufo2010)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
theme_minimal()

# Observation:
# This histogram shows how frequently different values of UFO sightings occur.
#
# Most of the values are clustered near zero, meaning that many locations have
# very few or no reported sightings. Only a small number of observations have
# higher values.
#
# This creates a strong right-skewed distribution, indicating that UFO sightings
# are rare events with occasional spikes.
# 6. Scatter Plot: Relationship between Population and UFO Sightings
ggplot(df, aes(x = pop, y = ufo2010)) +
geom_point(color = "purple", alpha = 0.6) +
labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
theme_minimal()

# Observation:
# This scatter plot examines the relationship between population and UFO
# sightings.
#
# The points are widely scattered without a clear upward or downward trend.
# This suggests that population size does not strongly determine the number of
# UFO sightings.
#
# Even areas with large populations sometimes have very few sightings, which
# reinforces the idea that sightings are unpredictable or influenced by other
# factors.
# 7. Scatter Plot: Infections vs. UFOs with Population Size
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
geom_point(alpha = 0.6, color = "maroon") +
scale_size_continuous(name = "Population Size") +
labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
theme_minimal()

# Observation:
# This plot combines three variables: infections, UFO sightings, and population.
#
# The size of each point represents population, which helps visualize whether
# larger populations correspond to higher infections or sightings.
#
# I notice that larger points (higher population) are spread across different
# levels of infections and UFO sightings, rather than forming a clear pattern.
#
# This again suggests that population alone does not explain the variation in
# infections or UFO sightings.
# 8. Pair Plot: Overview of Relationships
library(GGally)
ggpairs(df) +
ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
theme_minimal()

# Observation:
# The pair plot provides a complete overview of relationships between all
# variables.
#
# From the diagonal plots, I can see that infections and UFO sightings are both
# skewed distributions, with most values concentrated at the lower end.
#
# The scatter plots confirm earlier observations: there are no strong linear
# relationships between the variables. The points are scattered rather than
# forming clear lines.
#
# Overall, this reinforces the conclusion that these variables do not have
# strong direct correlations with each other.