# Load required libraries
library(ggplot2)
library(dplyr)

# Read the data
df <- read.table("infections1.txt", header = TRUE)

# Preview the data
head(df)
NA
# Create a bar plot by binning infection counts
df$infection_bin <- cut(df$infections, breaks = c(0, 100, 500, 1000, 3000, 7000), 
                        labels = c("0-100", "101-500", "501-1000", "1001-3000", "3001-7000"))

ggplot(df, aes(x = infection_bin)) +
  geom_bar(fill = "steelblue") +
  labs(title = "Bar Plot of Infection Count Ranges", x = "Infection Range", y = "Frequency") +
  theme_minimal()


# Observation: Most values fall within the lower range of infections (0-1000),
# but there are some outliers in the 3000-7000 range.
# Sort by population to simulate a time-series-like progression
df_sorted <- df[order(df$pop), ]

ggplot(df_sorted, aes(x = pop, y = infections)) +
  geom_line(color = "darkgreen") +
  labs(title = "Line Chart of Infections Over Increasing Population",
       x = "Population", y = "Infections") +
  theme_minimal()


# Observation: Infections generally increase with population, 
# but the trend is not strictly linear.
ggplot(df, aes(x = pop, y = infections)) +
  geom_point(color = "tomato", alpha = 0.7) +
  labs(title = "Scatter Plot of Infections vs Population",
       x = "Population", y = "Infections") +
  theme_minimal()


# Observation: There appears to be a weak positive relationship between 
# population size and number of infections, with several influential outliers.
# Boxplot of all variables
boxplot(df, main = "Boxplots of Variables", col = c("skyblue", "orange", "green"))


# Observation: The 'infections' and 'pop' variables have significant outliers.
# 'ufo2010' has a smaller range with a few extreme values.
# Histogram of infections
ggplot(df, aes(x = infections)) +
  geom_histogram(fill = "purple", bins = 10, color = "black") +
  labs(title = "Histogram of Infections", x = "Infections", y = "Count") +
  theme_minimal()


# Observation: The distribution is right-skewed, with a majority of cases having lower infection counts.
LS0tCnRpdGxlOiAiQXNzaWdubWVudDQiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKYGBge3J9CiMgTG9hZCByZXF1aXJlZCBsaWJyYXJpZXMKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRwbHlyKQoKIyBSZWFkIHRoZSBkYXRhCmRmIDwtIHJlYWQudGFibGUoImluZmVjdGlvbnMxLnR4dCIsIGhlYWRlciA9IFRSVUUpCgojIFByZXZpZXcgdGhlIGRhdGEKaGVhZChkZikKCmBgYAoKYGBge3J9CiMgQ3JlYXRlIGEgYmFyIHBsb3QgYnkgYmlubmluZyBpbmZlY3Rpb24gY291bnRzCmRmJGluZmVjdGlvbl9iaW4gPC0gY3V0KGRmJGluZmVjdGlvbnMsIGJyZWFrcyA9IGMoMCwgMTAwLCA1MDAsIDEwMDAsIDMwMDAsIDcwMDApLCAKICAgICAgICAgICAgICAgICAgICAgICAgbGFiZWxzID0gYygiMC0xMDAiLCAiMTAxLTUwMCIsICI1MDEtMTAwMCIsICIxMDAxLTMwMDAiLCAiMzAwMS03MDAwIikpCgpnZ3Bsb3QoZGYsIGFlcyh4ID0gaW5mZWN0aW9uX2JpbikpICsKICBnZW9tX2JhcihmaWxsID0gInN0ZWVsYmx1ZSIpICsKICBsYWJzKHRpdGxlID0gIkJhciBQbG90IG9mIEluZmVjdGlvbiBDb3VudCBSYW5nZXMiLCB4ID0gIkluZmVjdGlvbiBSYW5nZSIsIHkgPSAiRnJlcXVlbmN5IikgKwogIHRoZW1lX21pbmltYWwoKQoKIyBPYnNlcnZhdGlvbjogTW9zdCB2YWx1ZXMgZmFsbCB3aXRoaW4gdGhlIGxvd2VyIHJhbmdlIG9mIGluZmVjdGlvbnMgKDAtMTAwMCksCiMgYnV0IHRoZXJlIGFyZSBzb21lIG91dGxpZXJzIGluIHRoZSAzMDAwLTcwMDAgcmFuZ2UuCgpgYGAKCmBgYHtyfQojIFNvcnQgYnkgcG9wdWxhdGlvbiB0byBzaW11bGF0ZSBhIHRpbWUtc2VyaWVzLWxpa2UgcHJvZ3Jlc3Npb24KZGZfc29ydGVkIDwtIGRmW29yZGVyKGRmJHBvcCksIF0KCmdncGxvdChkZl9zb3J0ZWQsIGFlcyh4ID0gcG9wLCB5ID0gaW5mZWN0aW9ucykpICsKICBnZW9tX2xpbmUoY29sb3IgPSAiZGFya2dyZWVuIikgKwogIGxhYnModGl0bGUgPSAiTGluZSBDaGFydCBvZiBJbmZlY3Rpb25zIE92ZXIgSW5jcmVhc2luZyBQb3B1bGF0aW9uIiwKICAgICAgIHggPSAiUG9wdWxhdGlvbiIsIHkgPSAiSW5mZWN0aW9ucyIpICsKICB0aGVtZV9taW5pbWFsKCkKCiMgT2JzZXJ2YXRpb246IEluZmVjdGlvbnMgZ2VuZXJhbGx5IGluY3JlYXNlIHdpdGggcG9wdWxhdGlvbiwgCiMgYnV0IHRoZSB0cmVuZCBpcyBub3Qgc3RyaWN0bHkgbGluZWFyLgoKYGBgCgpgYGB7cn0KZ2dwbG90KGRmLCBhZXMoeCA9IHBvcCwgeSA9IGluZmVjdGlvbnMpKSArCiAgZ2VvbV9wb2ludChjb2xvciA9ICJ0b21hdG8iLCBhbHBoYSA9IDAuNykgKwogIGxhYnModGl0bGUgPSAiU2NhdHRlciBQbG90IG9mIEluZmVjdGlvbnMgdnMgUG9wdWxhdGlvbiIsCiAgICAgICB4ID0gIlBvcHVsYXRpb24iLCB5ID0gIkluZmVjdGlvbnMiKSArCiAgdGhlbWVfbWluaW1hbCgpCgojIE9ic2VydmF0aW9uOiBUaGVyZSBhcHBlYXJzIHRvIGJlIGEgd2VhayBwb3NpdGl2ZSByZWxhdGlvbnNoaXAgYmV0d2VlbiAKIyBwb3B1bGF0aW9uIHNpemUgYW5kIG51bWJlciBvZiBpbmZlY3Rpb25zLCB3aXRoIHNldmVyYWwgaW5mbHVlbnRpYWwgb3V0bGllcnMuCgpgYGAKCmBgYHtyfQojIEJveHBsb3Qgb2YgYWxsIHZhcmlhYmxlcwpib3hwbG90KGRmLCBtYWluID0gIkJveHBsb3RzIG9mIFZhcmlhYmxlcyIsIGNvbCA9IGMoInNreWJsdWUiLCAib3JhbmdlIiwgImdyZWVuIikpCgojIE9ic2VydmF0aW9uOiBUaGUgJ2luZmVjdGlvbnMnIGFuZCAncG9wJyB2YXJpYWJsZXMgaGF2ZSBzaWduaWZpY2FudCBvdXRsaWVycy4KIyAndWZvMjAxMCcgaGFzIGEgc21hbGxlciByYW5nZSB3aXRoIGEgZmV3IGV4dHJlbWUgdmFsdWVzLgoKYGBgCgpgYGB7cn0KIyBIaXN0b2dyYW0gb2YgaW5mZWN0aW9ucwpnZ3Bsb3QoZGYsIGFlcyh4ID0gaW5mZWN0aW9ucykpICsKICBnZW9tX2hpc3RvZ3JhbShmaWxsID0gInB1cnBsZSIsIGJpbnMgPSAxMCwgY29sb3IgPSAiYmxhY2siKSArCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgSW5mZWN0aW9ucyIsIHggPSAiSW5mZWN0aW9ucyIsIHkgPSAiQ291bnQiKSArCiAgdGhlbWVfbWluaW1hbCgpCgojIE9ic2VydmF0aW9uOiBUaGUgZGlzdHJpYnV0aW9uIGlzIHJpZ2h0LXNrZXdlZCwgd2l0aCBhIG1ham9yaXR5IG9mIGNhc2VzIGhhdmluZyBsb3dlciBpbmZlY3Rpb24gY291bnRzLgoKYGBgCgo=