Asthma Presence

Author

Crows: Artemas Souder, Danielle Clark, Kensley House

Table of Contents

  • Introduction

  • Data

    • Interactive Elements
  • Results

Introduction

Data

Data was collected from Kaggle: Asthma Dataset

setwd("C:\\Users\\star-\\Data Science\\R stuff")
library(readxl)
Warning: package 'readxl' was built under R version 4.4.2
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'tibble' was built under R version 4.4.1
Warning: package 'tidyr' was built under R version 4.4.1
Warning: package 'readr' was built under R version 4.4.2
Warning: package 'purrr' was built under R version 4.4.2
Warning: package 'dplyr' was built under R version 4.4.2
Warning: package 'stringr' was built under R version 4.4.1
Warning: package 'forcats' was built under R version 4.4.1
Warning: package 'lubridate' was built under R version 4.4.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
Warning: package 'scales' was built under R version 4.4.3

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor
library(ggrepel)
Warning: package 'ggrepel' was built under R version 4.4.2
library(patchwork)
Warning: package 'patchwork' was built under R version 4.4.3
library(gridExtra)
Warning: package 'gridExtra' was built under R version 4.4.2

Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine
library(gganimate)
Warning: package 'gganimate' was built under R version 4.4.3
library(plotly)
Warning: package 'plotly' was built under R version 4.4.2

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
asthma <- read.csv("C:\\Users\\star-\\Data Science\\R stuff\\synthetic_asthma_dataset.csv")
glimpse(asthma)
Rows: 10,000
Columns: 17
$ Patient_ID              <chr> "ASTH100000", "ASTH100001", "ASTH100002", "AST…
$ Age                     <int> 52, 15, 72, 61, 21, 83, 87, 75, 75, 88, 24, 3,…
$ Gender                  <chr> "Female", "Male", "Female", "Male", "Male", "O…
$ BMI                     <dbl> 27.6, 24.6, 17.6, 16.8, 30.2, 27.8, 32.3, 29.7…
$ Smoking_Status          <chr> "Former", "Former", "Never", "Never", "Never",…
$ Family_History          <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
$ Allergies               <chr> "None", "Dust", "None", "Multiple", "None", "P…
$ Air_Pollution_Level     <chr> "Moderate", "Low", "Moderate", "High", "Modera…
$ Physical_Activity_Level <chr> "Sedentary", "Moderate", "Moderate", "Sedentar…
$ Occupation_Type         <chr> "Outdoor", "Indoor", "Indoor", "Outdoor", "Ind…
$ Comorbidities           <chr> "Diabetes", "Both", "None", "Both", "None", "N…
$ Medication_Adherence    <dbl> 0.38, 0.60, 0.38, 0.60, 0.82, 0.18, 0.18, 0.53…
$ Number_of_ER_Visits     <int> 0, 2, 0, 1, 3, 2, 0, 0, 2, 3, 1, 0, 0, 0, 0, 0…
$ Peak_Expiratory_Flow    <dbl> 421.0, 297.6, 303.3, 438.0, 535.0, 232.9, 370.…
$ FeNO_Level              <dbl> 46.0, 22.9, 15.3, 40.1, 27.7, 45.1, 14.1, 17.6…
$ Has_Asthma              <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
$ Asthma_Control_Level    <chr> "N/A", "N/A", "N/A", "Poorly Controlled", "N/A…
asthma <- asthma|>
  mutate( Age_groups = case_when(
          Age < 26 ~ " 25 & Under",
          Age >= 26 & Age < 51 ~ "26 - 50",
          Age >= 51 & Age < 76 ~ "51 - 75",
          Age >= 76 ~ "76+"), 
          BMI_groups = case_when(
            BMI < 21.6 ~ "First Quantile",
            BMI >= 21.6 & BMI < 25 ~ "Second Quantile",
            BMI >= 25 & BMI < 28.4 ~ "Third Quantile",
            BMI >= 28.4 ~ "Fourth Quantile")
          )
Asthma <- asthma

Age and BMI are transformed into categories.

Results

asthma|>
  ggplot(aes(x=BMI_groups, fill = Age_groups))+
  geom_bar()+facet_wrap(~Has_Asthma)+
  labs(x= "Asthma BMI Levels",
       title = "Barcharts of BMI and Age Groups",
       subtitle = "For differnt occupation locations")

glimpse(asthma)
Rows: 10,000
Columns: 19
$ Patient_ID              <chr> "ASTH100000", "ASTH100001", "ASTH100002", "AST…
$ Age                     <int> 52, 15, 72, 61, 21, 83, 87, 75, 75, 88, 24, 3,…
$ Gender                  <chr> "Female", "Male", "Female", "Male", "Male", "O…
$ BMI                     <dbl> 27.6, 24.6, 17.6, 16.8, 30.2, 27.8, 32.3, 29.7…
$ Smoking_Status          <chr> "Former", "Former", "Never", "Never", "Never",…
$ Family_History          <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
$ Allergies               <chr> "None", "Dust", "None", "Multiple", "None", "P…
$ Air_Pollution_Level     <chr> "Moderate", "Low", "Moderate", "High", "Modera…
$ Physical_Activity_Level <chr> "Sedentary", "Moderate", "Moderate", "Sedentar…
$ Occupation_Type         <chr> "Outdoor", "Indoor", "Indoor", "Outdoor", "Ind…
$ Comorbidities           <chr> "Diabetes", "Both", "None", "Both", "None", "N…
$ Medication_Adherence    <dbl> 0.38, 0.60, 0.38, 0.60, 0.82, 0.18, 0.18, 0.53…
$ Number_of_ER_Visits     <int> 0, 2, 0, 1, 3, 2, 0, 0, 2, 3, 1, 0, 0, 0, 0, 0…
$ Peak_Expiratory_Flow    <dbl> 421.0, 297.6, 303.3, 438.0, 535.0, 232.9, 370.…
$ FeNO_Level              <dbl> 46.0, 22.9, 15.3, 40.1, 27.7, 45.1, 14.1, 17.6…
$ Has_Asthma              <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
$ Asthma_Control_Level    <chr> "N/A", "N/A", "N/A", "Poorly Controlled", "N/A…
$ Age_groups              <chr> "51 - 75", " 25 & Under", "51 - 75", "51 - 75"…
$ BMI_groups              <chr> "Third Quantile", "Second Quantile", "First Qu…
Age_distribution_by_group <- ggplot(asthma, aes(x = Age_groups, color = Gender))+
   geom_bar()+
   labs(title = "Bar chart of Age Distribution by Group",
        ) +
    theme_minimal()
 
 Age_distribution <- ggplot(asthma, aes(x = Age,color = Gender))+
   geom_histogram()+
   labs(title = "Bar chart of Age Distribution",
   ) +
   theme_minimal()
 
 ## combined chart to show how things differ
 Age_distribution + Age_distribution_by_group
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(asthma, aes(x = Has_Asthma, fill = Gender))+
  geom_bar()+
  scale_x_discrete(name = "Asthma Presence",breaks = c(0, 1),
                     labels = c('No Asthma', "Has Asthma"))+
  annotate(geom = "label", x=2.2, y=4100,
           label = '24.33% Have Asthma', hjust = "center",
           vjust = "bottom",
           color = "red")+
  annotate(
    geom = "segment", x= 2.2, y=4100,
    xend = 2, yend = 2500,
    color = "blue",
    arrow = arrow(type = "closed"))+
  
  annotate(geom = "label", x=1.6, y=5500,
           label = '75.67% Do Not Have Asthma', hjust = "left",
           color = "red")+
  annotate(
    geom = "segment", x= 1.6, y=5500,
    xend = 1.45, yend = 4100,
    color = "blue",
    arrow = arrow(type = "closed"))+
  labs(subtitle = 'For both groups there are an approximately equal numbers of males and females at 48%', 
       title = "Barchart For Presence of Asthma by Gender")

#ggplot(data = asthma,aes(x=Age, y = BMI, color = Has_Asthma))+
#  geom_point()+geom_smooth(data = asthma|>
 #                  filter(Has_Asthma ==0), color = "red3")+
  #geom_smooth(data = asthma|>
   #             filter(Has_Asthma ==1), color = "blue3")+
  #scale_color_discrete(labels = c('Does Not Have Asthma', "Has Asthma"))+
 # labs(title = "Scatterplot for BMI by Age when Looking at Presence of Asthma",
  #     subtitle = "Students with Asthma", color = "Has Asthma")
#histogram for occ type in BMI
ggplot(data=asthma,aes(x=BMI, fill = Has_Asthma))+
  facet_wrap(~Occupation_Type)+geom_histogram()+
  scale_fill_discrete(labels = c('Does Not Have Asthma', "Has Asthma"))+
  labs(title = "Faceted Histogram for BMI by Asthma Presence and Occupation Location")
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.
Warning: The following aesthetics were dropped during statistical transformation: fill.
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?
The following aesthetics were dropped during statistical transformation: fill.
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?

Interactive Elements

plot_ly(
  data= asthma,
  x=~BMI,
  type = "box"
)%>%
  layout(
    xaxis = list(title = "BMI")
  )
plot_ly(
  data= asthma,
  x=~BMI,
  type = "histogram"
)%>%
  layout(
    xaxis = list(title = "BMI"),
    yaxis = list(title = "Number of People")
  )

Analysis

#density ridges plot
#library(ggridges)
#ggplot(asthma, aes(x=BMI, y=Has_Asthma,
 #                  fill = Occupation_Type,
  #                 color = Has_Asthma))+
  #geom_density_ridges(alpha = .4,show.legend = FALSE)
#boxplots for  Asthma Control Levels and BMI
asthma|>filter(Asthma_Control_Level!="N/A")|>
  ggplot(aes(x=Asthma_Control_Level,y=BMI, fill = Gender))+
  geom_boxplot()+facet_wrap(~Occupation_Type)+
  labs(x= "Asthma Control Level",
       title = "Boxplots for Asthma Control Levels and BMI",
       subtitle = "For differnt occupation locations")

ggplot(Asthma, aes(x = BMI, y = Medication_Adherence)) +
   geom_point(
     data = Asthma |>
       filter(Has_Asthma == 1)) +
   geom_smooth()+
   labs(title = "Scatterd graph of Medication Adherance by BMI",
        subtitle = "Individuals with Asthma")
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

 ## medication adherance per activity level
 ggplot(Asthma, aes(x = Medication_Adherence, color = Gender)) +
   geom_histogram() +
   facet_wrap( ~ Physical_Activity_Level) +
   labs(title = "Stacked bar chart of Medication Adherance
                  by Activity Level and Gender",
        subtitle = "Individuals with Asthma"
        )
`stat_bin()` using `bins = 30`. Pick better value `binwidth`.

 ##look into prop chart or side/side bar vs stacked
 
 
 ## medication adherance of men who work outdoor 
 ggplot(Asthma, aes(x = Medication_Adherence)) +
  geom_boxplot(data = Asthma |>
              filter(Gender == "Male" & Occupation_Type == "Outdoor")) +
  facet_wrap( ~ Physical_Activity_Level)+
  labs(title = "Box plot of Medication Adherance
                by Activity Level"
       )

  # all graphs are the same 
  
 
 ##Is there a difference in medication adherence between genders
 ggplot(Asthma, aes(x = Medication_Adherence)) +
   geom_bar()+
   facet_grid( ~ Gender)

 #maybe?
 ## change to scatter plot
 
 ggplot(Asthma, aes(x = BMI)) +
   geom_bar()+
   facet_grid( ~ Gender)