###vprašanje a
podatki <- read.table("./Maraton.csv",
header = TRUE,
sep = ";",
dec = ",")
head(podatki)
## ID Teža Višina Tlak Utrip Hemoglobin Hematokrit Holesterol Glukoza Spol
## 1 1 72 179.0 105 64 160 50 4.9 4.7 1
## 2 2 68 178.0 105 60 158 51 4.8 4.9 0
## 3 3 64 174.0 109 54 155 51 4.5 7.0 0
## 4 4 63 174.0 112 54 153 58 8.0 7.2 0
## 5 5 61 173.5 100 53 152 59 4.6 6.7 0
## 6 6 60 173.0 99 53 158 49 3.9 6.0 0
###vprašanje c
mean(podatki$Višina)
## [1] 176.9571
sd(podatki$Višina)
## [1] 5.85156
###vprašanje d
podatki$Spol <- factor(podatki$Spol,
levels = c(0, 1),
labels = c("Z", "M"))
###vprašanje e
###vprašanje f
library(pastecs)
round(stat.desc(podatki[ , !colnames(podatki) %in% c("ID", "Spol")]), 2)
## Teža Višina Tlak Utrip Hemoglobin Hematokrit Holesterol
## nbr.val 35.00 35.00 35.00 35.00 35.00 35.00 35.00
## nbr.null 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## min 55.00 166.00 90.00 49.00 143.00 45.00 3.40
## max 81.00 189.00 135.00 64.00 183.00 69.00 8.00
## range 26.00 23.00 45.00 15.00 40.00 24.00 4.60
## sum 2375.00 6193.50 3838.00 1967.00 5445.00 1801.00 167.60
## median 68.00 177.00 108.00 55.00 157.00 51.00 4.70
## mean 67.86 176.96 109.66 56.20 155.57 51.46 4.79
## SE.mean 1.30 0.99 1.79 0.67 1.45 0.82 0.17
## CI.mean.0.95 2.64 2.01 3.64 1.37 2.94 1.66 0.34
## var 59.01 34.24 112.47 15.81 73.13 23.49 1.00
## std.dev 7.68 5.85 10.61 3.98 8.55 4.85 1.00
## coef.var 0.11 0.03 0.10 0.07 0.05 0.09 0.21
## Glukoza
## nbr.val 35.00
## nbr.null 0.00
## nbr.na 0.00
## min 3.80
## max 7.20
## range 3.40
## sum 178.65
## median 4.80
## mean 5.10
## SE.mean 0.18
## CI.mean.0.95 0.36
## var 1.12
## std.dev 1.06
## coef.var 0.21
round(stat.desc(podatki[ , -c(1, 10)]), 2)
## Teža Višina Tlak Utrip Hemoglobin Hematokrit Holesterol
## nbr.val 35.00 35.00 35.00 35.00 35.00 35.00 35.00
## nbr.null 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## nbr.na 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## min 55.00 166.00 90.00 49.00 143.00 45.00 3.40
## max 81.00 189.00 135.00 64.00 183.00 69.00 8.00
## range 26.00 23.00 45.00 15.00 40.00 24.00 4.60
## sum 2375.00 6193.50 3838.00 1967.00 5445.00 1801.00 167.60
## median 68.00 177.00 108.00 55.00 157.00 51.00 4.70
## mean 67.86 176.96 109.66 56.20 155.57 51.46 4.79
## SE.mean 1.30 0.99 1.79 0.67 1.45 0.82 0.17
## CI.mean.0.95 2.64 2.01 3.64 1.37 2.94 1.66 0.34
## var 59.01 34.24 112.47 15.81 73.13 23.49 1.00
## std.dev 7.68 5.85 10.61 3.98 8.55 4.85 1.00
## coef.var 0.11 0.03 0.10 0.07 0.05 0.09 0.21
## Glukoza
## nbr.val 35.00
## nbr.null 0.00
## nbr.na 0.00
## min 3.80
## max 7.20
## range 3.40
## sum 178.65
## median 4.80
## mean 5.10
## SE.mean 0.18
## CI.mean.0.95 0.36
## var 1.12
## std.dev 1.06
## coef.var 0.21
###vprašanje g
hist(podatki$Hematokrit,
main = "Frekvenčna porazdelitev za hematokrit",
xlab = "Hematokrit",
ylab = "Število maratonovcev",
breaks = seq(40, 80, 2),
col = "lightblue",
border = "black")
library(ggplot2)
ggplot(podatki, aes(x = Hematokrit)) +
geom_histogram(binwidth = 2, fill = "lightblue", colour = "black") +
ylab("Frekvenca") +
scale_x_continuous(limits = c(40, 80), breaks = seq(40, 80, 2.5)) +
labs(title = "Frekvenčna porazdelitev za hematokrit")
library(ggplot2)
ggplot(podatki, aes(x=Spol, y=Glukoza, fill = Spol)) +
geom_boxplot() +
scale_fill_manual(values = c("pink", "lightblue"))
median(podatki$Glukoza[podatki$Spol == "Z"])
## [1] 5.8