needed_packages <- c("readr", "dplyr", "sqldf", "RColorBrewer","")
# Extract not installed packages
not_installed <- needed_packages[!(needed_packages %in% installed.packages()[ , "Package"])]
# Install not installed packages
if(length(not_installed)) install.packages(not_installed, repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/dorot/OneDrive/Documents/R/win-library/4.0'
## (as 'lib' is unspecified)
## Warning: package '' is not available (for R version 4.0.2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sqldf)
## Warning: package 'sqldf' was built under R version 4.0.3
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 4.0.3
## Loading required package: proto
## Warning: package 'proto' was built under R version 4.0.3
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 4.0.3
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.0
## v readr 1.3.1
## Warning: package 'ggplot2' was built under R version 4.0.3
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.0.3
url<-'https://raw.githubusercontent.com/dorothymaiti/datavisualisation/48b5a3dcd7f0ec236d65208a85b54980448efa21/processed.cleveland.data'
clevelandData <- read.csv(url, header=FALSE)
df1<- na.omit(clevelandData)
names(df1)<-c("age","sex","cp","trestbps","chol","fbs","restecg","thalach","exang","oldpeak","slope","ca","thal","target")
url2<-'https://raw.githubusercontent.com/dorothymaiti/datavisualisation/main/heart.csv'
heart1 <- read.csv(url2, header= TRUE)
heart_df = rename(heart1, age= ï..age)
heart<- na.omit(heart_df)
df<- df1 %>% full_join(heart,all= TRUE)
## Joining, by = c("age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target")
HD1 <- df %>%
mutate(sex = if_else(sex == 1, "MALE", "FEMALE"),
fbs = if_else(fbs == 1, ">120", "<=120"),
exang = if_else(exang == 1, "YES" ,"NO"),
cp = if_else(cp == 1, "TYPICAL ANGINA",
if_else(cp == 2, "ATYPICAL ANGINA",
if_else(cp== 3 ,"NON-ANGINAL PAIN","ASYMPTOMATIC"))),
restecg = if_else(restecg == 0, "NORMAL",
if_else(restecg == 1, "ABNORMALITY", "PROBABLE OR DEFINITE")),
slope = as.factor(slope),
ca = as.factor(ca),
thal = if_else(thal==1 ,"3",
if_else(thal== 2 ,"6", "7")),
target = if_else(target == 0, "No", "Yes")
) %>%
dplyr::select(target, sex, fbs, exang, cp, restecg, slope, ca, thal, everything())
Sample1<- sqldf("select age, sex, Count(age) as count from HD1 where target=='Yes' group by age")
ggplot(Sample1, aes(x=age, y=count)) + geom_bar(aes(fill=sex),stat="identity",colour="black", position=position_dodge())+scale_fill_brewer(palette = "Pastel1")+
labs(x = 'Age', y = 'No. of people',
title = 'Analysis of presence of heart disease for age and sex')
# Visualization2 # ## Generic Assessment #
HaveDisease<- sqldf("select age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal from HD1 where target == 'Yes'")
ggplot(HaveDisease, aes(x = age, y = trestbps)) + geom_point(aes(color= cp,size=chol))+ geom_smooth( method= "lm",formula = y ~ x,se=FALSE) + scale_color_brewer(palette = "Paired")+
scale_size(range = c(0.5,10))+ labs(x = 'Age', y = 'BloodPressue(in mm Hg)',title = 'Relation of age with Bloodpressure and Chorestoral for different chestpain')
#Generating table with maximum blood pressure and maximum cholestoral#
MaxBp<- sqldf("select age, Max(trestbps), cp from HaveDisease")
View(MaxBp)
MaxChol<- sqldf("select age, Max(chol),cp from HaveDisease")
View(MaxChol)
Thalasemic <- sqldf("select sex , thal, Count(sex)as NumberofPeople from HaveDisease group by thal,sex ")
View(Thalasemic)
Heridity<- sqldf("select Avg(thalach) as MaxHeartRate,thal,fbs from HaveDisease group by thal, fbs")
ggplot(Heridity, aes(x=thal, y=MaxHeartRate)) + geom_bar(aes(fill=fbs),stat="identity",colour="black", position=position_dodge())+
scale_fill_discrete(name = "Fasting Blood Sugar", labels = c("Non-Diabetic", "Diabetic"))+ labs(x = 'types of thalasemia', y = 'Max Heart rate',
title = 'Maximum heart rate for thalasemic and non-thalasemic patients')
ClinicalTest<- sqldf("select age,oldpeak as ST_Depression,exang,ca from HaveDisease where restecg is not 'NORMAL'")
ggplot(ClinicalTest,aes(x = ca , y = ST_Depression, size= age, colour=exang)) + geom_point(alpha=0.5) + scale_size(range = c(0.2,8))+
scale_color_brewer(palette = "Set1")+ labs(x = 'No. of Major Vessel Coloured', y = 'St Depression value',
title = 'Value of ST Depression for types of Major vessel coloured')