This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#install.packages("ggplot2")
#install.packages("dplyr")
#install.packages("GGally")
# Create the data frame
infections <- c(245, 215, 2076, 5023, 189, 195, 123, 116, 3298, 430, 502, 126, 112, 67, 52, 39, 54, 2356, 6781, 120, 2389, 279, 257, 290, 234, 5689, 261, 672, 205)
ufo2010 <- c(2, 6, 2, 59, 0, 1, 1, 0, 115, 0, 0, 0, 0, 0, 0, 0, 6, 4, 2, 7, 2, 9, 2, 29, 10, 169, 1, 40, 16)
pop <- c(25101, 61912, 33341, 409061, 7481, 18675, 25581, 22286, 459598, 3915, 67197, 34365, 3911, 32122, 31459, 2311, 28350, 101482, 19005, 20679, 36745, 162812, 15927, 251417, 153920, 1554720, 16148, 305455, 37276)
df <- data.frame(infections, ufo2010, pop)
# Load necessary libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# --- 1. Bar Graph: Comparing Infections and UFO Sightings ---
ggplot(df, aes(x = 1:nrow(df))) +
geom_bar(aes(y = infections, fill = "Infections"), stat = "identity", position = "dodge") +
geom_bar(aes(y = ufo2010, fill = "UFO Sightings (2010)"), stat = "identity", position = "dodge", alpha = 0.7) +
scale_fill_manual("Variables", values = c("Infections" = "skyblue", "UFO Sightings (2010)" = "salmon")) +
labs(x = "Data Point Index", y = "Count", title = "Comparison of Infections and UFO Sightings") +
theme_minimal() +
theme(legend.position = "top")
# Observation: This bar graph compares the number of infections and UFO sightings for each
# data point. The scale of infections is significantly higher than UFO sightings in most cases.
# There are few instances where UFO sightings are non-zero, but their counts are low relative
# to the infection numbers.
# --- 2. Line Chart: Trends in Infections and Population ---
ggplot(df, aes(x = 1:nrow(df))) +
geom_line(aes(y = infections, color = "Infections"), linewidth = 1) +
geom_line(aes(y = pop, color = "Population"), linewidth = 1, linetype = "dashed") +
scale_color_manual("Variables", values = c("Infections" = "green", "Population" = "purple")) +
labs(x = "Data Point Index", y = "Count", title = "Trends in Infections and Population") +
theme_minimal() +
theme(legend.position = "top")
# Observation: This line chart shows the trends of infections and population across the data points.
# The population values are on a much larger scale than infection counts, making it difficult to
# observe detailed changes in infections on the same plot. However, we can see the overall
# fluctuations of both variables.
# --- 3. Scatter Plot: Relationship between Population and Infections ---
ggplot(df, aes(x = pop, y = infections)) +
geom_point(color = "blue", alpha = 0.6) +
labs(x = "Population", y = "Number of Infections", title = "Relationship between Population and Number of Infections") +
theme_minimal()
# Observation: This scatter plot explores the relationship between population size and the number
# of infections. There doesn't appear to be a strong linear correlation. While some high-population
# areas have high infection counts, this is not consistently the case.
# --- 4. Box Plot: Distribution of Infections ---
ggplot(df, aes(y = infections)) +
geom_boxplot(fill = "lightcoral") +
labs(y = "Number of Infections", title = "Distribution of Number of Infections") +
theme_minimal()
# Observation: This box plot summarizes the distribution of the 'infections' variable. It shows the
# median, quartiles, and potential outliers. The plot indicates that the majority of infection
# counts are relatively low, with some higher values identified as outliers.
# --- 5. Histogram: Frequency Distribution of UFO Sightings ---
ggplot(df, aes(x = ufo2010)) +
geom_histogram(binwidth = 5, fill = "orange", color = "black", alpha = 0.7) +
labs(x = "Number of UFO Sightings (2010)", y = "Frequency", title = "Frequency Distribution of UFO Sightings (2010)") +
theme_minimal()
# Observation: This histogram shows the frequency distribution of UFO sightings in 2010. The
# distribution is heavily skewed towards zero, indicating that most data points have very few or
# no reported UFO sightings.
# --- 6. Scatter Plot: Relationship between Population and UFO Sightings ---
ggplot(df, aes(x = pop, y = ufo2010)) +
geom_point(color = "purple", alpha = 0.6) +
labs(x = "Population", y = "Number of UFO Sightings (2010)", title = "Relationship between Population and UFO Sightings (2010)") +
theme_minimal()
# Observation: This scatter plot examines the relationship between population size and the number
# of UFO sightings. There doesn't seem to be a clear linear relationship between these two variables.
# --- 7. Scatter Plot: Infections vs. UFOs with Population Size ---
ggplot(df, aes(x = ufo2010, y = infections, size = pop)) +
geom_point(alpha = 0.6, color = "maroon") +
scale_size_continuous(name = "Population Size") +
labs(x = "Number of UFO Sightings (2010)", y = "Number of Infections", title = "Infections vs. UFO Sightings, Size by Population") +
theme_minimal()
# Observation: This scatter plot shows the relationship between infections and UFO sightings, with
# the size of each point representing the population size. It helps to visualize if areas with higher
# infections or UFO sightings also tend to have larger populations. No strong pattern is immediately
# apparent.
# --- 8. Pair Plot: Overview of Relationships ---
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(df) +
ggtitle("Pair Plot of Infections, UFO Sightings, and Population") +
theme_minimal()
# Observation: The pair plot provides a matrix of scatter plots for each pair of variables and
# density plots for the distribution of each individual variable. This gives a quick overview of
# potential linear relationships and the shape of the distributions. The distributions of
# infections and UFO sightings appear skewed, and the scatter plots reiterate the lack of strong
# linear correlations observed in the individual plots.