Import Data

df <- read.csv("visualisasi (1).csv" , sep=";")
head(df)
##   PatientID Age Gender Symptoms Diagnosis
## 1         1  77   Male    Cough       Flu
## 2         2  56   Male Headache      Cold
## 3         3  59   Male  Fatigue   Allergy
## 4         4  23 Female    Fever   Allergy
## 5         5  98   Male    Cough  COVID-19
## 6         6  68   Male    Fever      Cold
  # Hitung frekuensi Diagnosis
freq <- table(df$Diagnosis)

# Buat barplot
barplot(freq,
        col = c("red", "blue", "green", "yellow"),  # warna sesuai jumlah kategori
        main = "Distribusi Hasil",
        names.arg = c("Cold", "Covid 19", "Allergy", "Flu"),  # pastikan urutannya sesuai table
        ylab = "Jumlah")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

df %>%
  group_by(Diagnosis) %>%                          # Kelompokkan berdasarkan Diagnosis
  summarise(MeanAge = mean(Age, na.rm = TRUE)) %>% # Hitung rata-rata Age
  ggplot(aes(x = Diagnosis, y = MeanAge, fill = Diagnosis)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Rata-rata Usia Berdasarkan Diagnosis",
       x = "Diagnosis",
       y = "Rata-rata Usia") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotasi nama Diagnosis agar rapi

library(ggplot2)

ggplot(df, aes(x = Diagnosis, y = Age, fill = Diagnosis)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Perbandingan Usia Berdasarkan Diagnosis",
       x = "Diagnosis",
       y = "Usia") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotasi label supaya rapi

library(ggplot2)

ggplot(df, aes(x = Gender, fill = Gender)) +
  geom_bar() +
  theme_minimal() +
  labs(title = "Gender Paling Rentan",
       x = "Gender",
       y = "Jumlah")

library(ggplot2)

# Ubah Diagnosis menjadi numerik
df$Diagnosis_Num <- as.numeric(factor(df$Diagnosis))

# Scatter plot Age vs Diagnosis_Num dengan warna berbeda per Diagnosis
ggplot(df, aes(x = Diagnosis_Num, y = Age, color = factor(Diagnosis))) +  # warna beda per Diagnosis
  geom_jitter(width = 0.2, height = 0, alpha = 0.7, size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "#FF4500", linewidth = 1.2) +
  scale_x_continuous(breaks = 1:length(unique(df$Diagnosis)),
                     labels = levels(factor(df$Diagnosis))) +
  theme_minimal() +
  labs(title = "Scatter Plot Usia vs Diagnosis",
       x = "Diagnosis",
       y = "Age",
       color = "Diagnosis")  # legend untuk warna
## `geom_smooth()` using formula = 'y ~ x'