Asthma Presence

Author

Crows: Artemas Souder, Danielle Clark, Kensley House

Introduction

Knowing the factors that influence Asthma can be integrally in determining what factors might influence asthma severity. This data set provides plenty of general information in the healthcare domain, to explore this topic.

Research Questions

  • What factors are associated with the presence of Asthma?

  • How does BMI interact with Asthma Risk and Asthma Severity?

The purpose of this research is to gain a better understanding of what influences Asthma Risk and Severity. Our aim is provide adequate analysis, and to understand how certain personal factors might prove important for people with Asthma.

Data

Data was collected from Kaggle: Asthma Risk & Severity Dataset

setwd("C:\\Users\\dccla\\Downloads")
library(readxl)
library(ggplot2)
library(tidyverse)
library(scales)
library(ggrepel)
library(patchwork)
library(gridExtra)
library(gganimate)
library(plotly) 
library(knitr)

asthma <- read.csv("C:\\Users\\dccla\\Downloads\\synthetic_asthma_dataset.csv")


data.frame(Variable_Names = names(asthma)) %>%
  knitr::kable(
    caption = "Variable Names in Asthma Risk Dataset"
  )
Variable Names in Asthma Risk Dataset
Variable_Names
Patient_ID
Age
Gender
BMI
Smoking_Status
Family_History
Allergies
Air_Pollution_Level
Physical_Activity_Level
Occupation_Type
Comorbidities
Medication_Adherence
Number_of_ER_Visits
Peak_Expiratory_Flow
FeNO_Level
Has_Asthma
Asthma_Control_Level

The variables that are present in the Asthma Risk & Severity data set. Only certain variables will be kept for further analysis.

BMI and Age will be turned into categorical groups for general comparison purposes based on broad categories affecting Asthma Levels.

asthma <- asthma|>
  mutate( Age_groups = case_when(
          Age < 26 ~ " 25 & Under",
          Age >= 26 & Age < 51 ~ "26 - 50",
          Age >= 51 & Age < 76 ~ "51 - 75",
          Age >= 76 ~ "76+"), 
          BMI_groups = case_when(
            BMI < 18.5 ~ "Underweight",
            BMI <= 24.9 ~ "Normal weight",
            BMI <= 29.9 ~ "Overweight",
            BMI >29.9 ~ "Obese")
          )|>select(Age, BMI, Gender, Allergies, Physical_Activity_Level, Occupation_Type, Comorbidities, Has_Asthma, Age_groups, BMI_groups, Medication_Adherence)

asthma_clean <- asthma

This table is interactive allowing you to explore the data set through the use of the search box.

library(DT)
datatable(asthma_clean)

The table below highlights the differences for BMI groups when also looking at Age groups.

hold<- table(asthma_clean$Age_groups,asthma_clean$BMI_groups)


knitr::kable(hold, caption = "BMI and Age Categories Table")
BMI and Age Categories Table
Normal weight Obese Overweight Underweight
25 & Under 1133 467 990 257
26 - 50 1112 441 969 245
51 - 75 1158 443 945 272
76+ 632 241 546 149

Results

asthma_clean|>
  ggplot(aes(x=BMI_groups, fill = Age_groups))+
  geom_bar()+facet_wrap(~Has_Asthma)+
  labs(x= "Asthma BMI Levels",
       title = "Barcharts for BMI and Age Groups",
       subtitle = "For differnt occupation locations")

Age_distribution_by_group <- ggplot(asthma_clean, aes(x = Age_groups, color = Gender))+
   geom_bar()+
   labs(title = "Bar chart of Age Distribution by Group",
        ) +
    theme_minimal()
 
 Age_distribution <- ggplot(asthma_clean, aes(x = Age,color = Gender))+
   geom_histogram()+
   labs(title = "Bar chart of Age Distribution",
   ) +
   theme_minimal()
 
 ## combined chart to show how things differ
 Age_distribution + Age_distribution_by_group

ggplot(asthma_clean, aes(x = Has_Asthma, fill = Gender))+
  geom_bar()+
  scale_x_discrete(name = "Asthma Presence",breaks = c(0, 1),
                     labels = c('No Asthma', "Has Asthma"))+
  annotate(geom = "label", x=1.5, y=4100,
           label = '24.33% Have Asthma', hjust = "center",
           vjust = "bottom",
           color = "red")+
  annotate(
    geom = "segment", x= 1.5, y=4100,
    xend = 1.1, yend = 2500,
    color = "blue",
    arrow = arrow(type = "closed"))+

  annotate(geom = "label", x=.75, y=5500,
           label = '75.67% Do Not Have Asthma', hjust = "left",
           color = "red")+
  annotate(
    geom = "segment", x= .75, y=5500,
    xend = 0.5, yend = 5000,
    color = "blue",
    arrow = arrow(type = "closed"))+
  labs(subtitle = 'For both groups there are an approximately equal numbers of males and females at 48%', 
       title = "Barchart For Presence of Asthma by Gender")

#histogram for occ type in BMI
ggplot(asthma_clean,aes(x=BMI_groups, fill = as.character(Has_Asthma)))+
  facet_wrap(~Occupation_Type)+geom_bar()+
  scale_fill_discrete(labels = c('Does Not Have Asthma', "Has Asthma"))+
  labs(title = "Faceted Histogram for BMI by Asthma Presence and Occupation Location", fill = "Asthma PResence")

Interactive Elements

plot_ly(
  data= asthma_clean,
  x=~BMI,
  type = "box"
)%>%
  layout(
    xaxis = list(title = "BMI")
  )
plot_ly(
  data= asthma_clean,
  x=~BMI,
  type = "histogram"
)%>%
  layout(
    xaxis = list(title = "BMI"),
    yaxis = list(title = "Number of People")
  )

Analysis

#density ridges plot
library(ggridges)
ggplot(asthma_clean, aes(x=BMI, y=as.character(Has_Asthma),
                   fill = Occupation_Type,
                   color = as.character(Has_Asthma)))+
  geom_density_ridges(alpha = .4,show.legend = FALSE)

ggplot(asthma_clean, aes(x = BMI, y = Medication_Adherence)) +
   geom_point(
     data = asthma_clean |>
       filter(Has_Asthma == 1)) +
   geom_smooth()+
   labs(title = "Scatterd graph of Medication Adherance by BMI",
        subtitle = "Individuals with Asthma")

 ## medication adherance per activity level
 ggplot(asthma_clean, aes(x = Medication_Adherence, color = Gender)) +
   geom_histogram() +
   facet_wrap( ~ Physical_Activity_Level) +
   labs(title = "Stacked bar chart of Medication Adherance
                  by Activity Level and Gender",
        subtitle = "Individuals with Asthma"
        )

 ##look into prop chart or side/side bar vs stacked
 
 
 ## medication adherance of men who work outdoor 
 ggplot(asthma_clean, aes(x = Medication_Adherence)) +
  geom_boxplot(data = asthma_clean |>
              filter(Gender == "Male" & Occupation_Type == "Outdoor")) +
  facet_wrap( ~ Physical_Activity_Level)+
  labs(title = "Box plot of Medication Adherance
                by Activity Level"
       )

  # all graphs are the same 
  
 
 ##Is there a difference in medication adherence between genders
 ggplot(asthma_clean, aes(x = Medication_Adherence)) +
   geom_bar()+
   facet_grid( ~ Gender)

 #maybe?
 ## change to scatter plot
 
 ggplot(asthma_clean, aes(x = BMI)) +
   geom_bar()+
   facet_grid( ~ Gender)

Conclusion

Contact Information

  • xsouder@students.kennesaw.edu

  • dclar175@students.kennesaw.edu

  • khouse15@students.kennesaw.edu