required library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(readxl)
library(haven)
Data
Data and data shummary
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Basic Summary Stats
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
Skimr for more details of data summary
# install.packages("skimr")
library(skimr)
## Warning: package 'skimr' was built under R version 4.4.3
skim(mtcars)
Data summary
| Name |
mtcars |
| Number of rows |
32 |
| Number of columns |
11 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
11 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| mpg |
0 |
1 |
20.09 |
6.03 |
10.40 |
15.43 |
19.20 |
22.80 |
33.90 |
▃▇▅▁▂ |
| cyl |
0 |
1 |
6.19 |
1.79 |
4.00 |
4.00 |
6.00 |
8.00 |
8.00 |
▆▁▃▁▇ |
| disp |
0 |
1 |
230.72 |
123.94 |
71.10 |
120.83 |
196.30 |
326.00 |
472.00 |
▇▃▃▃▂ |
| hp |
0 |
1 |
146.69 |
68.56 |
52.00 |
96.50 |
123.00 |
180.00 |
335.00 |
▇▇▆▃▁ |
| drat |
0 |
1 |
3.60 |
0.53 |
2.76 |
3.08 |
3.70 |
3.92 |
4.93 |
▇▃▇▅▁ |
| wt |
0 |
1 |
3.22 |
0.98 |
1.51 |
2.58 |
3.33 |
3.61 |
5.42 |
▃▃▇▁▂ |
| qsec |
0 |
1 |
17.85 |
1.79 |
14.50 |
16.89 |
17.71 |
18.90 |
22.90 |
▃▇▇▂▁ |
| vs |
0 |
1 |
0.44 |
0.50 |
0.00 |
0.00 |
0.00 |
1.00 |
1.00 |
▇▁▁▁▆ |
| am |
0 |
1 |
0.41 |
0.50 |
0.00 |
0.00 |
0.00 |
1.00 |
1.00 |
▇▁▁▁▆ |
| gear |
0 |
1 |
3.69 |
0.74 |
3.00 |
3.00 |
4.00 |
4.00 |
5.00 |
▇▁▆▁▂ |
| carb |
0 |
1 |
2.81 |
1.62 |
1.00 |
2.00 |
2.00 |
4.00 |
8.00 |
▇▂▅▁▁ |
Group-wise Summaries: dplyr
library(dplyr)
mtcars %>%
group_by(cyl) %>%
summarise(across(mpg:hp, list(mean = mean, sd = sd)))
## # A tibble: 3 × 7
## cyl mpg_mean mpg_sd disp_mean disp_sd hp_mean hp_sd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 26.7 4.51 105. 26.9 82.6 20.9
## 2 6 19.7 1.45 183. 41.6 122. 24.3
## 3 8 15.1 2.56 353. 67.8 209. 51.0
Group-wise Summaries: dplyr
library(dplyr)
mtcars %>%
group_by(cyl, carb) %>%
summarise(across(mpg:hp, list(mean = mean, sd = sd)))
## `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 8
## # Groups: cyl [3]
## cyl carb mpg_mean mpg_sd disp_mean disp_sd hp_mean hp_sd
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 1 27.6 5.55 91.4 21.4 77.4 16.1
## 2 4 2 25.9 3.81 117. 27.1 87 24.9
## 3 6 1 19.8 2.33 242. 23.3 108. 3.54
## 4 6 4 19.8 1.55 164. 4.39 116. 7.51
## 5 6 6 19.7 NA 145 NA 175 NA
## 6 8 2 17.2 2.09 346. 43.4 162. 14.4
## 7 8 3 16.3 1.05 276. 0 180 0
## 8 8 4 13.2 2.28 406. 57.8 234 21.7
## 9 8 8 15 NA 301 NA 335 NA
Basic Visualization: Base R
hist(mtcars$mpg, main="MPG Distribution")

Using ggplot2: Intro
library(ggplot2)
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(bins=10, fill="skyblue", color="black") + theme_dark()

Boxplots for Group Comparisons
ggplot(mtcars, aes(x=factor(cyl), y=mpg)) +
geom_boxplot(fill="steelblue") +
labs(x="Cylinders", y="Miles Per Gallon") + theme_bw()

Scatter Plot with Trend Line
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
geom_smooth(method="lm") + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Density Plots
ggplot(mtcars, aes(x=mpg, fill=factor(cyl))) +
geom_density(alpha=0.5) + theme_classic()

Bar plot
ggplot(mtcars, aes(x=factor(gear))) +
geom_bar(fill="darkblue") + theme_light()

Violin Plots
ggplot(mtcars, aes(x=factor(cyl), y=mpg)) +
geom_violin(fill="lightgreen") + theme_classic()

Faceting: Multi-panel Plots
ggplot(mtcars, aes(x=mpg)) +
geom_histogram(fill="blue") +
facet_wrap(~cyl) + theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Themes and Customizations
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_point() +
theme_minimal()

Interactive Plots: plotly
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p <- ggplot(mtcars, aes(x=wt, y=mpg, color = factor(cyl))) + geom_point() + theme_classic()
ggplotly(p)
Linear Model Forest Plot
# Linear Regression Example
data(mtcars)
model_lm <- lm(mpg ~ wt + hp + cyl, data = mtcars)
# Extract coefficients and confidence intervals
coef_df <- as.data.frame(summary(model_lm)$coefficients)
coef_df$term <- rownames(coef_df)
confint_df <- confint(model_lm)
coef_df$conf.low <- confint_df[,1]
coef_df$conf.high <- confint_df[,2]
# Forest Plot with ggplot2
library(ggplot2)
ggplot(coef_df[-1,], aes(x=term, y=Estimate)) + # Remove intercept
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.2) +
coord_flip() +
labs(title="Linear Model - Forest Plot", y="Coefficient Estimate", x="Variable") +
theme_minimal()

Logistic Regression Forest Plot
# Logistic Regression Example using mtcars
mtcars$am <- factor(mtcars$am)
model_logit <- glm(am ~ wt + hp + cyl, data = mtcars, family = "binomial")
# Tidy output using broom
library(broom)
logit_df <- tidy(model_logit, conf.int = TRUE, exponentiate = TRUE) # ORs
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Forest Plot
ggplot(logit_df[-1,], aes(x=term, y=estimate)) + # Exclude intercept
geom_point() +
geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.2) +
coord_flip() +
labs(title="Logistic Regression - Odds Ratios", y="Odds Ratio", x="Variable") +
theme_minimal()

Cox Proportional Hazards Model Forest Plot
# Survival Analysis Example using survival package
library(survival)
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
# Simulate survival data
set.seed(123)
n <- 100
surv_data <- data.frame(
time = rexp(n, 0.1),
status = sample(0:1, n, replace = TRUE),
age = rnorm(n, 50, 10),
sex = factor(sample(c("Male", "Female"), n, replace = TRUE))
)
cox_model <- coxph(Surv(time, status) ~ age + sex, data = surv_data)
library(forestmodel)
## Warning: package 'forestmodel' was built under R version 4.4.3
forest_model(cox_model)

The easiest way is with the usmap package to plot US Map and your
results
Basic Maps
library(usmap)
## Warning: package 'usmap' was built under R version 4.4.3
plot_usmap()

Color by Population in 2022
plot_usmap(data = statepop, values = "pop_2022") + theme(legend.position = "right")

MS map by county
plot_usmap(include = "MS",
regions = "county")

Deep SOuth
plot_usmap(include = c("MS", "AL", "GA", "LA", "SC", "FL"),
regions = "county")

Other plot with differnt GIS
# Output is ggplot object so it can be extended # with any number of ggplot layers
library(ggplot2)
plot_usmap(include = c("CA", "NV", "ID", "OR", "WA")) + labs(title = "Western States")

# Color maps with data
plot_usmap(data = statepop, values = "pop_2022")

# Include labels on map (e.g. state abbreviations)
plot_usmap(data = statepop, values = "pop_2022", labels = TRUE)

# Choose color for labels
plot_usmap(data = statepop, values = "pop_2022", labels = TRUE, label_color = "white")

Some random sample USA Map
library(usmap)
library(ggplot2)
# Sample data: Population by state
data <- data.frame(
state = state.name,
prevalence = sample(10:100, 10) # Random values for demonstration
)
# Create the map
plot_usmap(data = data, values = "prevalence", color = "white") +
scale_fill_continuous(
low = "lightblue", high = "darkblue", name = "prevalence", label = scales::comma
) +
labs(title = "Sample Data by State", subtitle = "Random values across USA") +
theme(legend.position = "right")
