required library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(readxl)
library(haven)

Data

Data and data shummary

data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Basic Summary Stats

summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Skimr for more details of data summary

# install.packages("skimr")
library(skimr)
## Warning: package 'skimr' was built under R version 4.4.3
skim(mtcars)
Data summary
Name mtcars
Number of rows 32
Number of columns 11
_______________________
Column type frequency:
numeric 11
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
mpg 0 1 20.09 6.03 10.40 15.43 19.20 22.80 33.90 ▃▇▅▁▂
cyl 0 1 6.19 1.79 4.00 4.00 6.00 8.00 8.00 ▆▁▃▁▇
disp 0 1 230.72 123.94 71.10 120.83 196.30 326.00 472.00 ▇▃▃▃▂
hp 0 1 146.69 68.56 52.00 96.50 123.00 180.00 335.00 ▇▇▆▃▁
drat 0 1 3.60 0.53 2.76 3.08 3.70 3.92 4.93 ▇▃▇▅▁
wt 0 1 3.22 0.98 1.51 2.58 3.33 3.61 5.42 ▃▃▇▁▂
qsec 0 1 17.85 1.79 14.50 16.89 17.71 18.90 22.90 ▃▇▇▂▁
vs 0 1 0.44 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
am 0 1 0.41 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
gear 0 1 3.69 0.74 3.00 3.00 4.00 4.00 5.00 ▇▁▆▁▂
carb 0 1 2.81 1.62 1.00 2.00 2.00 4.00 8.00 ▇▂▅▁▁

Group-wise Summaries: dplyr

library(dplyr)

mtcars %>%
  group_by(cyl) %>%
  summarise(across(mpg:hp, list(mean = mean, sd = sd)))
## # A tibble: 3 × 7
##     cyl mpg_mean mpg_sd disp_mean disp_sd hp_mean hp_sd
##   <dbl>    <dbl>  <dbl>     <dbl>   <dbl>   <dbl> <dbl>
## 1     4     26.7   4.51      105.    26.9    82.6  20.9
## 2     6     19.7   1.45      183.    41.6   122.   24.3
## 3     8     15.1   2.56      353.    67.8   209.   51.0

Group-wise Summaries: dplyr

library(dplyr)

mtcars %>%
  group_by(cyl, carb) %>%
  summarise(across(mpg:hp, list(mean = mean, sd = sd)))
## `summarise()` has grouped output by 'cyl'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 8
## # Groups:   cyl [3]
##     cyl  carb mpg_mean mpg_sd disp_mean disp_sd hp_mean hp_sd
##   <dbl> <dbl>    <dbl>  <dbl>     <dbl>   <dbl>   <dbl> <dbl>
## 1     4     1     27.6   5.55      91.4   21.4     77.4 16.1 
## 2     4     2     25.9   3.81     117.    27.1     87   24.9 
## 3     6     1     19.8   2.33     242.    23.3    108.   3.54
## 4     6     4     19.8   1.55     164.     4.39   116.   7.51
## 5     6     6     19.7  NA        145     NA      175   NA   
## 6     8     2     17.2   2.09     346.    43.4    162.  14.4 
## 7     8     3     16.3   1.05     276.     0      180    0   
## 8     8     4     13.2   2.28     406.    57.8    234   21.7 
## 9     8     8     15    NA        301     NA      335   NA

Basic Visualization: Base R

hist(mtcars$mpg, main="MPG Distribution")

Using ggplot2: Intro

library(ggplot2)

ggplot(mtcars, aes(x=mpg)) +
  geom_histogram(bins=10, fill="skyblue", color="black") + theme_dark()

Boxplots for Group Comparisons

ggplot(mtcars, aes(x=factor(cyl), y=mpg)) +
  geom_boxplot(fill="steelblue") +
  labs(x="Cylinders", y="Miles Per Gallon") + theme_bw()

Scatter Plot with Trend Line

ggplot(mtcars, aes(x=wt, y=mpg)) +
  geom_point() +
  geom_smooth(method="lm") + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Density Plots

ggplot(mtcars, aes(x=mpg, fill=factor(cyl))) +
  geom_density(alpha=0.5) + theme_classic()

Bar plot

ggplot(mtcars, aes(x=factor(gear))) +
  geom_bar(fill="darkblue") + theme_light()

Violin Plots

ggplot(mtcars, aes(x=factor(cyl), y=mpg)) +
  geom_violin(fill="lightgreen") + theme_classic()

Faceting: Multi-panel Plots

ggplot(mtcars, aes(x=mpg)) +
  geom_histogram(fill="blue") +
  facet_wrap(~cyl) + theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Themes and Customizations

ggplot(mtcars, aes(x=wt, y=mpg)) +
  geom_point() +
  theme_minimal()

Interactive Plots: plotly

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplot(mtcars, aes(x=wt, y=mpg, color = factor(cyl))) + geom_point() + theme_classic()
ggplotly(p)

Linear Model Forest Plot

# Linear Regression Example
data(mtcars)
model_lm <- lm(mpg ~ wt + hp + cyl, data = mtcars)

# Extract coefficients and confidence intervals
coef_df <- as.data.frame(summary(model_lm)$coefficients)
coef_df$term <- rownames(coef_df)
confint_df <- confint(model_lm)
coef_df$conf.low <- confint_df[,1]
coef_df$conf.high <- confint_df[,2]

# Forest Plot with ggplot2
library(ggplot2)
ggplot(coef_df[-1,], aes(x=term, y=Estimate)) +  # Remove intercept
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.2) +
  coord_flip() +
  labs(title="Linear Model - Forest Plot", y="Coefficient Estimate", x="Variable") +
  theme_minimal()

Logistic Regression Forest Plot

# Logistic Regression Example using mtcars
mtcars$am <- factor(mtcars$am)
model_logit <- glm(am ~ wt + hp + cyl, data = mtcars, family = "binomial")

# Tidy output using broom
library(broom)
logit_df <- tidy(model_logit, conf.int = TRUE, exponentiate = TRUE)  # ORs
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Forest Plot
ggplot(logit_df[-1,], aes(x=term, y=estimate)) +  # Exclude intercept
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width=0.2) +
  coord_flip() +
  labs(title="Logistic Regression - Odds Ratios", y="Odds Ratio", x="Variable") +
  theme_minimal()

Cox Proportional Hazards Model Forest Plot

# Survival Analysis Example using survival package
library(survival)
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
# Simulate survival data
set.seed(123)
n <- 100
surv_data <- data.frame(
  time = rexp(n, 0.1),
  status = sample(0:1, n, replace = TRUE),
  age = rnorm(n, 50, 10),
  sex = factor(sample(c("Male", "Female"), n, replace = TRUE))
)

cox_model <- coxph(Surv(time, status) ~ age + sex, data = surv_data)

library(forestmodel)
## Warning: package 'forestmodel' was built under R version 4.4.3
forest_model(cox_model)

The easiest way is with the usmap package to plot US Map and your results

Basic Maps

library(usmap)
## Warning: package 'usmap' was built under R version 4.4.3
plot_usmap()

Color by Population in 2022

plot_usmap(data = statepop, values = "pop_2022") + theme(legend.position = "right")

MS map by county

plot_usmap(include = "MS",
           regions = "county") 

Deep SOuth

plot_usmap(include = c("MS", "AL", "GA", "LA", "SC", "FL"),
           regions = "county") 

Other plot with differnt GIS

# Output is ggplot object so it can be extended # with any number of ggplot layers
library(ggplot2) 
plot_usmap(include = c("CA", "NV", "ID", "OR", "WA")) + labs(title = "Western States") 

# Color maps with data 
plot_usmap(data = statepop, values = "pop_2022") 

# Include labels on map (e.g. state abbreviations) 
plot_usmap(data = statepop, values = "pop_2022", labels = TRUE) 

# Choose color for labels 
plot_usmap(data = statepop, values = "pop_2022", labels = TRUE, label_color = "white")

Some random sample USA Map

library(usmap)
library(ggplot2)

# Sample data: Population by state
data <- data.frame(
  state = state.name,
  prevalence = sample(10:100, 10)  # Random values for demonstration
)

# Create the map
plot_usmap(data = data, values = "prevalence", color = "white") +
  scale_fill_continuous(
    low = "lightblue", high = "darkblue", name = "prevalence", label = scales::comma
  ) +
  labs(title = "Sample Data by State", subtitle = "Random values across USA") +
  theme(legend.position = "right")