library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ dplyr 1.0.7
## ✓ tibble 3.1.5 ✓ stringr 1.4.0
## ✓ tidyr 1.1.4 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(haven)
library(ggplot2)
health <- read_xpt("/Users/yunis/Desktop/NHANES-2011-2012-Demo.xpt")
#first we are loading the xpt file#
nrow(health)
## [1] 9756
ncol(health)
## [1] 48
summary(health$DMDHRAGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 33.00 43.00 45.39 56.00 80.00
summary(health$INDFMPIR)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.860 1.630 2.205 3.580 5.000 840
# checking the dataset #
class(health$DMDHRAGE)
## [1] "numeric"
age <- health$DMDHRAGE
# creating data frame for person's age in years, this is a numeric data#
class(health$INDFMPIR)
## [1] "numeric"
income <- health$INDFMPIR
# creating data frame for Ratio of family income to poverty, this is a numeric data#
plot (age, income,
main = "2011-2012 National Health and Nutrition Examination Survey",
xlab = "age",
ylab = "family income ",
col= "pink",
pch = 8
)

# this is a base R scatter plot #
# most people with estimate of age of 20-4's family income ratio scatter around 0-1 #
ggplot(health, aes(x = age, y = income)) +
geom_point(size = 1.5, color = "#b47dc9") +
labs(
title = "2011-2012 National Health and Nutrition Examination Survey",
x = "Age",
y = "Family Income"
)
## Warning: Removed 840 rows containing missing values (geom_point).

# this is a ggplot version of scatter plot #
ggplot(health, aes(x = age, y = income)) +
geom_point(size = 1.5, aes(color = factor(RIDRETH3))) +
labs(
title = "2011-2012 National Health and Nutrition Examination Survey",
x = "Age",
y = "Family Income"
)
## Warning: Removed 840 rows containing missing values (geom_point).

# this is a ggplot version of scatter plot, with factor RIDRETH3 #
# RIDRETH3 (Non-Hispanic White) and RIDRETH3 (Non-Hispanic White) occupy of the plot, gather around age of 20-4's family income ratio of 0-1 #