This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(viridis)
## Loading required package: viridisLite
library(shiny)
library(flexdashboard)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(tidyr)
# Load dataset
data <- read_csv("C:/Users/Admin/OneDrive/Desktop/Notes/Semester_2/Sem_2_Assignments/Data Visualisation and Communication/Assignment-3/smoking_health_data_final.csv")
## Rows: 3900 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): sex, current_smoker, blood_pressure
## dbl (4): age, heart_rate, cigs_per_day, chol
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyr)
data <- data %>%
separate(blood_pressure, into = c("systolic", "diastolic"), sep = "/", convert = TRUE)
data$sex <- factor(data$sex, levels = c("male", "female"))
data$current_smoker <- factor(data$current_smoker, levels = c("yes", "no"))
# Replace missing values in numeric columns with the mean of each column
data <- data %>%
mutate(across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
# Check for the number of missing values in each column
colSums(is.na(data))
## age sex current_smoker heart_rate systolic
## 0 0 0 0 0
## diastolic cigs_per_day chol
## 0 0 0
# Bar plot of smoking status by age group and gender
data %>%
group_by(age, sex, current_smoker) %>%
summarize(count = n()) %>%
plot_ly(x = ~age, y = ~count, color = ~current_smoker, type = 'bar', barmode = 'stack') %>%
layout(title = "Smoking Status by Age and Sex", xaxis = list(title = "Age"), yaxis = list(title = "Count"))
## `summarise()` has grouped output by 'age', 'sex'. You can override using the
## `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'bar' objects don't have these attributes: 'barmode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'bar' objects don't have these attributes: 'barmode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'base', 'basesrc', 'cliponaxis', 'constraintext', 'customdata', 'customdatasrc', 'dx', 'dy', 'error_x', 'error_y', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'offset', 'offsetgroup', 'offsetsrc', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textpositionsrc', 'textsrc', 'texttemplate', 'texttemplatesrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'width', 'widthsrc', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
plot_ly(data, x = ~heart_rate, y = ~chol, color = ~current_smoker, type = 'scatter', mode = 'markers') %>%
layout(title = "Heart Rate vs Cholesterol by Smoking Status",
xaxis = list(title = "Heart Rate"), yaxis = list(title = "Cholesterol"))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# Adding filters for interactivity
selectInput("smoker", "Select Smoking Status:", choices = unique(data$current_smoker))
sliderInput("age_range", "Select Age Range:", min = min(data$age), max = max(data$age),
value = c(min(data$age), max(data$age)))
selectInput("gender", "Select Gender:", choices = unique(data$sex))
renderPlotly({
# Filtered data based on inputs
filtered_data <- data %>%
filter(current_smoker == input$smoker,
age >= input$age_range[1] & age <= input$age_range[2],
sex == input$gender)
# Plot filtered data
plot_ly(filtered_data, x = ~age, y = ~cigs_per_day, type = 'box', color = ~current_smoker) %>%
layout(title = "Cigarette Consumption by Age and Smoking Status",
xaxis = list(title = "Age"), yaxis = list(title = "Cigarettes per Day"))
})