1.Create an Example.Using one or more TidyVerse packages, and any dataset from fivethirtyeight.com or Kaggle, create a programming sample “vignette” that demonstrates how to use one or more of the capabilities of the selected TidyVerse package with your selected dataset.

Load the library

library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0       v purrr   0.3.2  
## v tibble  2.1.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## -- Conflicts ------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Read the data using readr

disease <- read_csv("heart.csv")
## Parsed with column specification:
## cols(
##   age = col_double(),
##   sex = col_double(),
##   cp = col_double(),
##   trestbps = col_double(),
##   chol = col_double(),
##   fbs = col_double(),
##   restecg = col_double(),
##   thalach = col_double(),
##   exang = col_double(),
##   oldpeak = col_double(),
##   slope = col_double(),
##   ca = col_double(),
##   thal = col_double(),
##   target = col_double()
## )
head(disease)
## # A tibble: 6 x 14
##     age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak
##   <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl>
## 1    63     1     3      145   233     1       0     150     0     2.3
## 2    37     1     2      130   250     0       1     187     0     3.5
## 3    41     0     1      130   204     0       0     172     0     1.4
## 4    56     1     1      120   236     0       1     178     0     0.8
## 5    57     0     0      120   354     0       1     163     1     0.6
## 6    57     1     0      140   192     0       1     148     0     0.4
## # ... with 4 more variables: slope <dbl>, ca <dbl>, thal <dbl>,
## #   target <dbl>

The selected package I want to use id dplyr.

Capability 1.

filter capability tutorial

Description

Using filter we can select rows of the data frame matching conditions.

Usage

filter(data) ### Example

To select the people of over 20 and less than 65 we can pass the data disease and condtion age>20 and age < 65 to the function . It’ll return matching rows of heart disease.

filter(disease, age>20 & age < 65)
## # A tibble: 262 x 14
##      age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak
##    <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl>
##  1    63     1     3      145   233     1       0     150     0     2.3
##  2    37     1     2      130   250     0       1     187     0     3.5
##  3    41     0     1      130   204     0       0     172     0     1.4
##  4    56     1     1      120   236     0       1     178     0     0.8
##  5    57     0     0      120   354     0       1     163     1     0.6
##  6    57     1     0      140   192     0       1     148     0     0.4
##  7    56     0     1      140   294     0       0     153     0     1.3
##  8    44     1     1      120   263     0       1     173     0     0  
##  9    52     1     2      172   199     1       1     162     0     0.5
## 10    57     1     2      150   168     0       1     174     0     1.6
## # ... with 252 more rows, and 4 more variables: slope <dbl>, ca <dbl>,
## #   thal <dbl>, target <dbl>

Capability 2.

select capability tutorial

Description

Using select we can keep the selected variables

sage

select(data, …)

Example

To keep only age, sex,cp variable we can pass the data disease and age, sex,cp to the function .

df<- select(disease, c("age","sex","cp"))
head(df)
## # A tibble: 6 x 3
##     age   sex    cp
##   <dbl> <dbl> <dbl>
## 1    63     1     3
## 2    37     1     2
## 3    41     0     1
## 4    56     1     1
## 5    57     0     0
## 6    57     1     0

Part 2 Tidyverse - Arun Reddy

The part 2 of the Tidyverse receipe is further cleaning the data to analyse the data set. The following steps are taken to meet the goals 1. Change the column names 2. Change the data types for some of the columns 3. Analyze the data set

#Change the column names
disease->chest_pain
names(chest_pain)<- c("Age","Sex","Chest Pain Type","Resting Blood Pressure","Serum Cholestoral","Fasting Blood Sugar","Resting CardioGraphic results",
                   "Maximum Heart Rate","Excercise Induced angina","oldpeak","the slope of the peak exercise ST segment",
                   "number of major vessels (0-3) colored by flourosopy"," thal","Target")
# Change the values and data type of sex column
chest_pain$Sex[chest_pain$Sex==0]<-"Female"
chest_pain$Sex[chest_pain$Sex==1]<-"Male"
as.factor(chest_pain$Sex)->chest_pain$Sex
as.factor(chest_pain$Target)->chest_pain$Target
as.factor(chest_pain$`Chest Pain Type`)->chest_pain$`Chest Pain Type`


# Data Analysis

# Distribution of Maximum heart rate
ggplot(chest_pain,aes(chest_pain$`Maximum Heart Rate` )) + 
  geom_histogram(fill = "dodgerblue1",alpha =0.5) +
  theme_bw()+theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(chest_pain,aes(`Maximum Heart Rate`)) + geom_density(fill = "red",alpha =0.5) + theme_bw()+theme_classic()

boxplot(chest_pain$`Maximum Heart Rate`,col ="lightblue",notch = T,main ="boxplot of the maximum heart rate")

ggplot(data=chest_pain)+
  aes(x=Sex,fill=Sex)+
  geom_bar(stat = "count")+
    labs(x="Gender",y="Count",title = "# Heart aliments by Gender")+
  geom_label(stat='count',aes(label=..count..), size=7) +
  theme_grey(base_size = 20)+
  theme(panel.background = element_rect(fill="white"))

ggplot( data = chest_pain)+
  aes(x=Target,fill=Target)+
  geom_bar(stat = "count")+
  labs(x="Target",y="Count",title = "Count of Target")+
  geom_label(stat='count',aes(label=..count..), size=7) +
  theme_grey(base_size = 20)+
  theme(panel.background = element_rect(fill="white"))