rm(list = ls())
#inladen packages
library(readr)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(boot)
library(modelr)
library(ggplot2)
library(ggplot2)
#oefening 0a
kruistabel <- table(CO2$Plant, CO2$Treatment)
kruistabel <- CO2 %>% group_by(Plant, Treatment) %>% tally()
#0c- Titanic package - eerst inlezen
Titanic <- read_delim(
file = here::here("data", "Titanic.txt"),
delim = ";",
col_names = TRUE)
##
## -- Column specification --------------------------------------------------------
## cols(
## name = col_character(),
## survived = col_character(),
## age = col_double(),
## passengerClass = col_character(),
## sex = col_character()
## )
Titanic
## # A tibble: 1,313 x 5
## name survived age passengerClass sex
## <chr> <chr> <dbl> <chr> <chr>
## 1 Allen, Miss Elisabeth Walton yes 29 1st fema~
## 2 Allison, Miss Helen Loraine no 2 1st fema~
## 3 Allison, Mr Hudson Joshua Creighton no 30 1st male
## 4 Allison, Mrs Hudson J.C. (Bessie Waldo ~ no 25 1st fema~
## 5 Allison, Master Hudson Trevor yes 0.917 1st male
## 6 Anderson, Mr Harry yes 47 1st male
## 7 Andrews, Miss Kornelia Theodosia yes 63 1st fema~
## 8 Andrews, Mr Thomas, jr no 39 1st male
## 9 Appleton, Mrs Edward Dale (Charlotte La~ yes 58 1st fema~
## 10 Artagaveytia, Mr Ramon no 71 1st male
## # ... with 1,303 more rows
kruistabel_Titanic <- table(Titanic$passengerClass, Titanic$survived)
#1a
View(swiss)
#opgave 2
#2a
View(chickwts)
#niet gedaan, want rm
chickenwts <- chickwts %>%
group_by(feed) %>%
summary(n = n())
rm(list=ls())
chickwts
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
## 6 168 horsebean
## 7 108 horsebean
## 8 124 horsebean
## 9 143 horsebean
## 10 140 horsebean
## 11 309 linseed
## 12 229 linseed
## 13 181 linseed
## 14 141 linseed
## 15 260 linseed
## 16 203 linseed
## 17 148 linseed
## 18 169 linseed
## 19 213 linseed
## 20 257 linseed
## 21 244 linseed
## 22 271 linseed
## 23 243 soybean
## 24 230 soybean
## 25 248 soybean
## 26 327 soybean
## 27 329 soybean
## 28 250 soybean
## 29 193 soybean
## 30 271 soybean
## 31 316 soybean
## 32 267 soybean
## 33 199 soybean
## 34 171 soybean
## 35 158 soybean
## 36 248 soybean
## 37 423 sunflower
## 38 340 sunflower
## 39 392 sunflower
## 40 339 sunflower
## 41 341 sunflower
## 42 226 sunflower
## 43 320 sunflower
## 44 295 sunflower
## 45 334 sunflower
## 46 322 sunflower
## 47 297 sunflower
## 48 318 sunflower
## 49 325 meatmeal
## 50 257 meatmeal
## 51 303 meatmeal
## 52 315 meatmeal
## 53 380 meatmeal
## 54 153 meatmeal
## 55 263 meatmeal
## 56 242 meatmeal
## 57 206 meatmeal
## 58 344 meatmeal
## 59 258 meatmeal
## 60 368 casein
## 61 390 casein
## 62 379 casein
## 63 260 casein
## 64 404 casein
## 65 318 casein
## 66 352 casein
## 67 359 casein
## 68 216 casein
## 69 222 casein
## 70 283 casein
## 71 332 casein
#niet dezelfde verdeling van mediaan en IQR uit boxplot
#2b - gemiddelde en sd berekenen voor de 6 voedingsgroepen
chickwts_summary <- chickwts %>%
group_by(feed) %>%
summarise(gemiddelde = mean(weight),
sd = sd(weight))
#2c - 2 selecties: (1) linseed en (2) horsebean
linseed <- filter(chickwts, feed == "linseed")
horsebean <- filter(chickwts, feed == "horsebean")
#nu t-toets voor de 2 selecties - 1 of 2 zijdig - vgm 2 zijdig
t.test(linseed$weight, horsebean$weight, mu = 0.0) #aantoonbare afwijking van MU, want p < 0.01
##
## Welch Two Sample t-test
##
## data: linseed$weight and horsebean$weight
## t = 3.0172, df = 19.769, p-value = 0.006869
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 18.0403 99.0597
## sample estimates:
## mean of x mean of y
## 218.75 160.20
#nu t-toets voor de 2 selecties - 1 of 2 zijdig - vgm 2 zijdig
t.test(linseed$weight, horsebean$weight, mu = 0.0) #aantoonbare afwijking van MU, want p < 0.01
##
## Welch Two Sample t-test
##
## data: linseed$weight and horsebean$weight
## t = 3.0172, df = 19.769, p-value = 0.006869
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 18.0403 99.0597
## sample estimates:
## mean of x mean of y
## 218.75 160.20
#2d - nu linseed met meatmeal vergelijken - eerst laatste aanmaken
meatmeal <- filter(chickwts, feed == "meatmeal")
t.test(linseed$weight, meatmeal$weight, mu = 0.0) #minder sig. maar nog <5% alpha
##
## Welch Two Sample t-test
##
## data: linseed$weight and meatmeal$weight
## t = -2.3542, df = 19.236, p-value = 0.02933
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -109.822679 -6.495503
## sample estimates:
## mean of x mean of y
## 218.7500 276.9091
#2e - experiment van 8 groepen, hoeveel t-toetsen dan uitvoeren?
#nu 6 groepen, betekent 15 opties en 8 groepen is 28 opties
#opgave 4
#4a
rm(list=ls())
library("readr")
titanic <- read_delim(
file = here::here("data", "Titanic.txt"),
delim = ";",
col_names = TRUE)
##
## -- Column specification --------------------------------------------------------
## cols(
## name = col_character(),
## survived = col_character(),
## age = col_double(),
## passengerClass = col_character(),
## sex = col_character()
## )
titanic
## # A tibble: 1,313 x 5
## name survived age passengerClass sex
## <chr> <chr> <dbl> <chr> <chr>
## 1 Allen, Miss Elisabeth Walton yes 29 1st fema~
## 2 Allison, Miss Helen Loraine no 2 1st fema~
## 3 Allison, Mr Hudson Joshua Creighton no 30 1st male
## 4 Allison, Mrs Hudson J.C. (Bessie Waldo ~ no 25 1st fema~
## 5 Allison, Master Hudson Trevor yes 0.917 1st male
## 6 Anderson, Mr Harry yes 47 1st male
## 7 Andrews, Miss Kornelia Theodosia yes 63 1st fema~
## 8 Andrews, Mr Thomas, jr no 39 1st male
## 9 Appleton, Mrs Edward Dale (Charlotte La~ yes 58 1st fema~
## 10 Artagaveytia, Mr Ramon no 71 1st male
## # ... with 1,303 more rows
titanic %>% group_by(passengerClass, survived) %>% summarise(aantal = n())
## `summarise()` has grouped output by 'passengerClass'. You can override using the `.groups` argument.
## # A tibble: 6 x 3
## # Groups: passengerClass [3]
## passengerClass survived aantal
## <chr> <chr> <int>
## 1 1st no 129
## 2 1st yes 193
## 3 2nd no 161
## 4 2nd yes 119
## 5 3rd no 574
## 6 3rd yes 137
#4b - frequentietelling
kruistabel <- table(titanic$passengerClass, titanic$survived)
titanic_telling <- summarise(
titanic,
Overleefd = sum(survived == "yes"),
NietOverleefd = sum(survived == "no"))
#4c - geschatte overlevingskans via mutate()
titanic_telling <- mutate(
titanic_telling,
p = Overleefd / (Overleefd + NietOverleefd))
#4d - chisq toets
titanic_telling <- select(titanic_telling, Overleefd, NietOverleefd)
chisq.test(titanic_telling)
##
## Chi-squared test for given probabilities
##
## data: titanic_telling
## X-squared = 131.17, df = 1, p-value < 2.2e-16
chisq.test(kruistabel)
##
## Pearson's Chi-squared test
##
## data: kruistabel
## X-squared = 173.81, df = 2, p-value < 2.2e-16
titanic_telling
## # A tibble: 1 x 2
## Overleefd NietOverleefd
## <int> <int>
## 1 449 864
kruistabel
##
## no yes
## 1st 129 193
## 2nd 161 119
## 3rd 574 137
#5a - aanmaken leeftijdsgroepen via cut_width (misschien -5, want <0 probleem?)
titanic <- titanic %>%
mutate(age_class = cut_width(titanic$age, 10, center = 5))
#per leeftijdscat aantal opvarenden titanic
kruistabel_lftdscat <- table(titanic$survived, titanic$age_class)
#nu via count uit dplyr - groot aantal NA's valt op, niet zichtbaar in kruistabel
count_titanic_ageclass <- titanic %>%
count(age_class)
#5c - verschil tussen usena = "always" en usena = "ifany"
kruistabel_lftdscat <- table(titanic$survived, titanic$age_class, useNA = "always") # laat NA zowel in kolom als rij zien
kruistabel_lftdscat <- table(titanic$survived, titanic$age_class, useNA = "ifany") #alleen in kolom
#6a
library(tibble)
telling <- tribble(
~Woord, ~Sense_and_Sensibility, ~Emma, ~Sandition,
"a", 147, 186, 101,
"an", 25, 26, 11,
"this", 32, 39, 15,
"that", 94, 105, 37,
"with", 59, 74, 28,
"without", 18, 10, 10)
#6b - per cat van woord % hoevaak het voorkomt - ja verdelingen lijken redelijk op elkaar
telling <- mutate(
telling,
pSense = round((Sense_and_Sensibility / sum(Sense_and_Sensibility)*100), 1),
pEmma = round((Emma / sum(Emma)*100), 1),
pSandition = round((Sandition / sum(Sandition)*100), 1)
)
#6c - chisq toets voor de categorieen
telling <- select(telling, Sense_and_Sensibility, Emma, Sandition)
chisq.test(telling)#resultaat -> niet significant - geen aantoonbare afwijking
##
## Pearson's Chi-squared test
##
## data: telling
## X-squared = 12.271, df = 10, p-value = 0.2673
#opgave 7 - loops & funties - volgorde : van (1) voorbereiding - input; (2) body; (3) resultaat
#7a - met for loop berekenen nieuwe kostprijs bij 5% korting
vector <- c(1:10)
View(vector)
#prijs bij 95% korting wanneer ieder getal prijs is en 95% korting heeft
#body
for (i in 1:10){
vector[i] <- vector[i] * 0.95
}
#resultaat
vector
## [1] 0.95 1.90 2.85 3.80 4.75 5.70 6.65 7.60 8.55 9.50
prijzen <- c(1:10)
kortingsprijzen <- rep(NA, length(prijzen))
for (i in 1:length(prijzen)){
kortingsprijzen[i] <- 0.95 * prijzen[i]
}
kortingsprijzen
## [1] 0.95 1.90 2.85 3.80 4.75 5.70 6.65 7.60 8.55 9.50
# #De winkel heeft zojuist aangegeven dat de 5% korting alleen geldt voor producten die initieel een
# kostprijs hadden van 5 euro of meer. Gebruik een for-loop en de ifelse functie om nu de nieuwe
# vector van kortingsprijzen te maken
#5% korting voor producten van 5e of meer via for loop filtering op >=5
#body met filtering
for (i in 1:10){
ifelse(vector[i] >= 5,
vector[i] <- vector[i] * 0.95,
vector[i] <- vector[i])
}
#resultaat
vector
## [1] 0.9500 1.9000 2.8500 3.8000 4.7500 5.4150 6.3175 7.2200 8.1225 9.0250
#handmatige check
5*0.95
## [1] 4.75
#opgave 8 - inlezen 5 respondents.txt bestanden
#aantal respondenten - handmatig
aantal_respondenten <- c(5)
respondenten <- NULL
library(stringr)
#single use met str_c = string concatenate functie
str_c(
aantal_respondenten,
sep = ";")
## [1] "5"
—–fin—–