setwd("C:\\Users\\star-\\Data Science\\R stuff")
library(readxl)Warning: package 'readxl' was built under R version 4.4.2
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyverse)Warning: package 'tidyverse' was built under R version 4.4.2
Warning: package 'tibble' was built under R version 4.4.1
Warning: package 'tidyr' was built under R version 4.4.1
Warning: package 'readr' was built under R version 4.4.2
Warning: package 'purrr' was built under R version 4.4.2
Warning: package 'dplyr' was built under R version 4.4.2
Warning: package 'stringr' was built under R version 4.4.1
Warning: package 'forcats' was built under R version 4.4.1
Warning: package 'lubridate' was built under R version 4.4.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.2.1
✔ purrr 1.0.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)Warning: package 'scales' was built under R version 4.4.3
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
library(ggrepel)Warning: package 'ggrepel' was built under R version 4.4.2
library(patchwork)Warning: package 'patchwork' was built under R version 4.4.3
library(gridExtra)Warning: package 'gridExtra' was built under R version 4.4.2
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
library(gganimate)Warning: package 'gganimate' was built under R version 4.4.3
library(plotly)Warning: package 'plotly' was built under R version 4.4.2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
asthma <- read.csv("C:\\Users\\star-\\Data Science\\R stuff\\synthetic_asthma_dataset.csv")
glimpse(asthma)Rows: 10,000
Columns: 17
$ Patient_ID <chr> "ASTH100000", "ASTH100001", "ASTH100002", "AST…
$ Age <int> 52, 15, 72, 61, 21, 83, 87, 75, 75, 88, 24, 3,…
$ Gender <chr> "Female", "Male", "Female", "Male", "Male", "O…
$ BMI <dbl> 27.6, 24.6, 17.6, 16.8, 30.2, 27.8, 32.3, 29.7…
$ Smoking_Status <chr> "Former", "Former", "Never", "Never", "Never",…
$ Family_History <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0…
$ Allergies <chr> "None", "Dust", "None", "Multiple", "None", "P…
$ Air_Pollution_Level <chr> "Moderate", "Low", "Moderate", "High", "Modera…
$ Physical_Activity_Level <chr> "Sedentary", "Moderate", "Moderate", "Sedentar…
$ Occupation_Type <chr> "Outdoor", "Indoor", "Indoor", "Outdoor", "Ind…
$ Comorbidities <chr> "Diabetes", "Both", "None", "Both", "None", "N…
$ Medication_Adherence <dbl> 0.38, 0.60, 0.38, 0.60, 0.82, 0.18, 0.18, 0.53…
$ Number_of_ER_Visits <int> 0, 2, 0, 1, 3, 2, 0, 0, 2, 3, 1, 0, 0, 0, 0, 0…
$ Peak_Expiratory_Flow <dbl> 421.0, 297.6, 303.3, 438.0, 535.0, 232.9, 370.…
$ FeNO_Level <dbl> 46.0, 22.9, 15.3, 40.1, 27.7, 45.1, 14.1, 17.6…
$ Has_Asthma <int> 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0…
$ Asthma_Control_Level <chr> "N/A", "N/A", "N/A", "Poorly Controlled", "N/A…
asthma <- asthma|>
mutate( Age_groups = case_when(
Age < 26 ~ " 25 & Under",
Age >= 26 & Age < 51 ~ "26 - 50",
Age >= 51 & Age < 76 ~ "51 - 75",
Age >= 76 ~ "76+"),
BMI_groups = case_when(
BMI < 21.6 ~ "First Quantile",
BMI >= 21.6 & BMI < 25 ~ "Second Quantile",
BMI >= 25 & BMI < 28.4 ~ "Third Quantile",
BMI >= 28.4 ~ "Fourth Quantile")
)
Asthma <- asthma