#csv file
data <- read.csv ("Worldmap - Sheet1.csv")
ggplot(data= data)+
geom_bar(mapping = aes(x= Continents))
#density
ggplot(data = data, mapping = aes( x= X.))+
geom_histogram(binwidth =0.02)
data <- data %>%
mutate(Population = as.numeric(Population), Land.Area.Km2 = as.numeric(Land.Area.Km2))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Population = as.numeric(Population)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ggplot(data, aes(x = Population)) +
geom_freqpoly(bindwith = 50000000, color = "steelblue", linewidth = 1) +
labs(
title = "Frequency Polygon of Population",
x = "Population",
y = "Count"
)
## Warning in geom_freqpoly(bindwith = 5e+07, color = "steelblue", linewidth = 1):
## Ignoring unknown parameters: `bindwith`
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## Warning: Removed 232 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(data, aes(y = Land.Area.Km2)) +
geom_boxplot(outlier.color = "blue", outlier.size = 3) +
labs(
title = "Unusual Land Area Values",
y = "Land.Area.Km2)"
)
## Warning: Removed 183 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
missing_tbl <- data %>%
summarize(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "missing")
ggplot(missing_tbl, aes(x = variable, y = missing)) +
geom_col(fill = "steelblue") +
labs(
title = "Missing Values per Variable",
x = "Variable",
y = "Number of Missing Values"
) +
theme_minimal()
data_ranked <- data %>%
mutate(X. = rank(Population))
ggplot(data_ranked, aes(x = Continents, y = X., color = Continents)) +
geom_point(size = 3, alpha = 0.7) +
labs(
title = "Covariation: Country Population Rank by Continent",
y = "Population Rank (1 = largest)"
)
# Load packages
library(tidyverse)
data_clean <- data %>%
mutate(
Continents = as.character(Continents),
World. = readr::parse_number(World.)
) %>%
filter(
!is.na(World.),
World. > 0,
!is.na(Continents),
!str_detect(Continents, "/"),
!str_detect(Continents, ",")
) %>%
mutate(
Continents = droplevels(factor(Continents))
)
ggplot(data_clean, aes(x = Continents, y = World., fill = Continents)) +
geom_boxplot(alpha = 0.7) +
coord_flip() +
labs(
title = "Covariation: World % by Continent (Cleaned)",
x = "Continent",
y = "World % of World Population"
)
library(tidyverse)
world <- readr::read_csv("Worldmap - Sheet1.csv")
world2 <- world %>%
mutate(
World_num = readr::parse_number(`World%`),
World_cat = cut(
World_num,
breaks = c(-Inf, 1, 3, 10, Inf),
labels = c("<1%", "1–3%", "3–10%", ">10%")
)
)
ggplot(world2, aes(x = Continents, fill = World_cat)) +
geom_bar(position = "dodge") +
labs(
title = "Countries by Continent and World Population Share Category",
x = "Continent",
y = "Number of Countries",
fill = "World %"
) +
theme_minimal()
library(tidyverse)
world <- readr::read_csv("Worldmap - Sheet1.csv")
world2 <- world %>%
mutate(
Population_num = as.numeric(Population),
LandArea_num = as.numeric(`Land Area Km2`) )
ggplot(world2, aes(x = LandArea_num, y = Population_num)) +
geom_point(alpha = 0.7) +
labs(
title = "Population vs Land Area",
x = "Land Area (km²)",
y = "Population"
) +
theme_minimal()
## Patterns and models
library(tidyverse)
library(modelr)
library(scales)
world <- readr::read_csv("Worldmap - Sheet1.csv")
world2 <- world %>%
mutate(
Population_num = as.numeric(Population),
LandArea_num = as.numeric(`Land Area Km2`)
) %>%
filter(
!is.na(Population_num),
!is.na(LandArea_num),
Population_num > 0,
LandArea_num > 0
)
mod_world <- lm(log(Population_num) ~ log(LandArea_num), data = world2)
world2 <- world2 %>%
add_residuals(mod_world) %>%
mutate(resid_exp = exp(resid))
g1 <- ggplot(world2, aes(x = LandArea_num, y = resid_exp)) +
geom_point(alpha = 0.7) +
scale_x_continuous(labels = comma) +
scale_y_continuous(labels = comma) +
labs(
title = "Residuals from log(Population) ~ log(Land Area)",
x = "Land Area (km²)",
y = "Residual (back-transformed)"
) +
theme_minimal()
g2 <- ggplot(world2, aes(x = Continents, y = resid_exp)) +
geom_boxplot() +
scale_y_continuous(labels = comma) +
labs(
title = "Residuals by Continent",
x = "Continent",
y = "Residual (back-transformed)"
) +
theme_minimal()
g1
g2