Importação de dados
`%>%` <- magrittr::`%>%`
library(ggplot2)
library(patchwork)
raw_data <- read.csv(here::here("data/raw/heart.csv"))
str(raw_data)
'data.frame': 303 obs. of 14 variables:
$ age : int 63 37 41 56 57 57 56 44 52 57 ...
$ sex : int 1 1 0 1 0 1 0 1 1 1 ...
$ cp : int 3 2 1 1 0 0 1 1 2 2 ...
$ trestbps: int 145 130 130 120 120 140 140 120 172 150 ...
$ chol : int 233 250 204 236 354 192 294 263 199 168 ...
$ fbs : int 1 0 0 0 0 0 0 0 1 0 ...
$ restecg : int 0 1 0 1 1 1 0 1 1 1 ...
$ thalach : int 150 187 172 178 163 148 153 173 162 174 ...
$ exang : int 0 0 0 0 1 0 0 0 0 0 ...
$ oldpeak : num 2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
$ slope : int 0 0 2 2 2 1 1 2 2 2 ...
$ ca : int 0 0 0 0 0 0 0 0 0 0 ...
$ thal : int 1 2 2 2 2 1 2 3 3 2 ...
$ target : int 1 1 1 1 1 1 1 1 1 1 ...
get_unique_df <- function(x, df){
unique(df[[x]])
}
unique_df <- lapply(colnames(raw_data), get_unique_df, df=raw_data)
names(unique_df) <- colnames(raw_data)
unique_df
$age
[1] 63 37 41 56 57 44 52 54 48 49 64 58 50 66 43 69 59 42 61 40 71 51 65 53 46 45 39 47
[29] 62 34 35 29 55 60 67 68 74 76 70 38 77
$sex
[1] 1 0
$cp
[1] 3 2 1 0
$trestbps
[1] 145 130 120 140 172 150 110 135 160 105 125 142 155 104 138 128 108 134 122 115 118
[22] 100 124 94 112 102 152 101 132 148 178 129 180 136 126 106 156 170 146 117 200 165
[43] 174 192 144 123 154 114 164
$chol
[1] 233 250 204 236 354 192 294 263 199 168 239 275 266 211 283 219 340 226 247 234 243
[22] 302 212 175 417 197 198 177 273 213 304 232 269 360 308 245 208 264 321 325 235 257
[43] 216 256 231 141 252 201 222 260 182 303 265 309 186 203 183 220 209 258 227 261 221
[64] 205 240 318 298 564 277 214 248 255 207 223 288 160 394 315 246 244 270 195 196 254
[85] 126 313 262 215 193 271 268 267 210 295 306 178 242 180 228 149 278 253 342 157 286
[106] 229 284 224 206 167 230 335 276 353 225 330 290 172 305 188 282 185 326 274 164 307
[127] 249 341 407 217 174 281 289 322 299 300 293 184 409 259 200 327 237 218 319 166 311
[148] 169 187 176 241 131
$fbs
[1] 1 0
$restecg
[1] 0 1 2
$thalach
[1] 150 187 172 178 163 148 153 173 162 174 160 139 171 144 158 114 151 161 179 137 157
[22] 123 152 168 140 188 125 170 165 142 180 143 182 156 115 149 146 175 186 185 159 130
[43] 190 132 147 154 202 166 164 184 122 169 138 111 145 194 131 133 155 167 192 121 96
[64] 126 105 181 116 108 129 120 112 128 109 113 99 177 141 136 97 127 103 124 88 195
[85] 106 95 117 71 118 134 90
$exang
[1] 0 1
$oldpeak
[1] 2.3 3.5 1.4 0.8 0.6 0.4 1.3 0.0 0.5 1.6 1.2 0.2 1.8 1.0 2.6 1.5 3.0 2.4 0.1 1.9 4.2
[22] 1.1 2.0 0.7 0.3 0.9 3.6 3.1 3.2 2.5 2.2 2.8 3.4 6.2 4.0 5.6 2.9 2.1 3.8 4.4
$slope
[1] 0 2 1
$ca
[1] 0 2 1 3 4
$thal
[1] 1 2 3 0
$target
[1] 1 0
raw_data %>%
dplyr::mutate(target = as.factor(target)) %>%
ggplot(mapping = aes(y = target)) +
ylab("Target") +
geom_bar()
raw_data %>%
dplyr::select(-target) %>%
reshape2::melt() %>%
ggplot(mapping = aes(x = value)) +
geom_bar() +
facet_wrap(. ~ variable, scale = "free", nrow = 4)
No id variables; using all as measure variables
raw_data %>%
dplyr::group_by(target) %>%
dplyr::mutate_all(rescale) %>%
dplyr::summarise_all(mean) %>%
print() %>%
ggradar::ggradar(base.size = 1,legend.title = "target")
`mutate_all()` ignored the following grouping variables:
Column `target`
Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.
raw_data %>%
dplyr::select(target, age) %>%
ggplot(mapping = aes(x = age, group = target, fill = as.factor(target))) +
labs(fill = "Target") +
geom_histogram(colour = "black",bins = 30) -> p1
raw_data %>%
dplyr::select(target, age) %>%
ggplot(mapping = aes(x = age, group = target, fill = as.factor(target))) +
labs(fill = "Target") +
geom_density(colour = "black") -> p2
raw_data %>%
dplyr::select(target, age) %>%
ggplot(mapping = aes(y = age, group = target, fill = as.factor(target))) +
geom_boxplot(colour = "black") +
coord_flip() +
labs(fill = "Target") +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank())-> p3
p1 / p2 / p3