This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
norsjo <- read_csv("norsjo86.csv")
## New names:
## Rows: 260 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): agegrp, sex, smoker dbl (8): ...1, health, height, weight, sbp, dbp,
## cholesterol, bmi
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
table(norsjo$sex)
##
## man woman
## 128 132
#5. Frequency table
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
freq(norsjo$sex)
## Frequencies
## norsjo$sex
## Type: Character
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## man 128 49.23 49.23 49.23 49.23
## woman 132 50.77 100.00 50.77 100.00
## <NA> 0 0.00 100.00
## Total 260 100.00 100.00 100.00 100.00
freq(norsjo$sex,
report.nas = FALSE, # remove NA (missing values) information
totals = FALSE, # remove totals
cumul = FALSE, # remove cumulative
headings = FALSE) # remove heading
##
## Freq %
## ----------- ------ -------
## man 128 49.23
## woman 132 50.77
norsjo$bmi_groups<-cut(norsjo$bmi,breaks=c(0,25,30,Inf), labels=c("normal","overweight","obese"))
norsjo$hypertonia <- 0
norsjo$hypertonia<- as.factor(ifelse(norsjo$sbp>140 | norsjo$dbp>90, "yes", "no"))
?ctable
## starting httpd help server ... done
ctable(x = norsjo$bmi_groups, y = norsjo$hypertonia, prop = 'r')
## Cross-Tabulation, Row Proportions
## bmi_groups * hypertonia
## Data Frame: norsjo
##
## ------------ ------------ ------------- ------------ ----------- --------------
## hypertonia no yes <NA> Total
## bmi_groups
## normal 124 (86.1%) 19 (13.2%) 1 ( 0.7%) 144 (100.0%)
## overweight 60 (70.6%) 25 (29.4%) 0 ( 0.0%) 85 (100.0%)
## obese 12 (44.4%) 15 (55.6%) 0 ( 0.0%) 27 (100.0%)
## <NA> 1 (25.0%) 1 (25.0%) 2 (50.0%) 4 (100.0%)
## Total 197 (75.8%) 60 (23.1%) 3 ( 1.2%) 260 (100.0%)
## ------------ ------------ ------------- ------------ ----------- --------------
ctable(x = norsjo$bmi_groups, y = norsjo$hypertonia, prop = 'r', useNA = 'no')
## Cross-Tabulation, Row Proportions
## bmi_groups * hypertonia
## Data Frame: norsjo
##
## ------------ ------------ ------------- ------------ --------------
## hypertonia no yes Total
## bmi_groups
## normal 124 (86.7%) 19 (13.3%) 143 (100.0%)
## overweight 60 (70.6%) 25 (29.4%) 85 (100.0%)
## obese 12 (44.4%) 15 (55.6%) 27 (100.0%)
## Total 196 (76.9%) 59 (23.1%) 255 (100.0%)
## ------------ ------------ ------------- ------------ --------------
ctable(x = norsjo$bmi_groups, y = norsjo$hypertonia, prop = 'c')
## Cross-Tabulation, Column Proportions
## bmi_groups * hypertonia
## Data Frame: norsjo
##
## ------------ ------------ -------------- ------------- ------------ --------------
## hypertonia no yes <NA> Total
## bmi_groups
## normal 124 ( 62.9%) 19 ( 31.7%) 1 ( 33.3%) 144 ( 55.4%)
## overweight 60 ( 30.5%) 25 ( 41.7%) 0 ( 0.0%) 85 ( 32.7%)
## obese 12 ( 6.1%) 15 ( 25.0%) 0 ( 0.0%) 27 ( 10.4%)
## <NA> 1 ( 0.5%) 1 ( 1.7%) 2 ( 66.7%) 4 ( 1.5%)
## Total 197 (100.0%) 60 (100.0%) 3 (100.0%) 260 (100.0%)
## ------------ ------------ -------------- ------------- ------------ --------------
descr(norsjo, stats = c("mean", "sd"), transpose = TRUE, headings = FALSE)
## Non-numerical variable(s) ignored: agegrp, sex, smoker, bmi_groups, hypertonia
##
## Mean Std.Dev
## ----------------- -------- ---------
## ...1 130.50 75.20
## bmi 25.27 3.98
## cholesterol 6.54 1.48
## dbp 79.17 11.67
## health 0.40 0.49
## height 170.91 8.71
## sbp 124.48 19.00
## weight 73.89 12.88
mean(norsjo$bmi) # what happens here?
## [1] NA
summary(norsjo$bmi)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 16.71 22.58 24.52 25.27 27.28 45.03 4
mean(norsjo$bmi,na.rm=T) # na.rm=T helps
## [1] 25.26896
round(mean(norsjo$bmi,na.rm=T),2)
## [1] 25.27
stby(data = norsjo$bmi, INDICES = norsjo$sex,
FUN = descr, stats = "common", transpose = TRUE)
## Descriptive Statistics
## bmi by sex
## Data Frame: norsjo
## N: 128
##
## Mean Std.Dev Min Median Max N.Valid Pct.Valid
## ----------- ------- --------- ------- -------- ------- --------- -----------
## man 25.32 2.94 19.44 24.96 35.15 125.00 97.66
## woman 25.22 4.78 16.71 23.95 45.03 131.00 99.24
stby(data = norsjo$bmi, INDICES = norsjo$sex,
FUN = descr, stats = c("mean", "sd"), transpose = TRUE)
## Descriptive Statistics
## bmi by sex
## Data Frame: norsjo
## N: 128
##
## Mean Std.Dev
## ----------- ------- ---------
## man 25.32 2.94
## woman 25.22 4.78
mean(norsjo$bmi,)
## [1] NA
?mean
# If you don't have the package installed, uncomment the following line:
# install.packages("dplyr")
library(dplyr)
# Mean of 'value' by 'group'
norsjo %>% group_by(sex) %>%
summarise(mean_value = mean(bmi,na.rm=T))
## # A tibble: 2 × 2
## sex mean_value
## <chr> <dbl>
## 1 man 25.3
## 2 woman 25.2
ggplot(norsjo, aes(x= bmi)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(norsjo, aes(x= bmi)) + geom_histogram(bins=10) # bins controls the number of intervals
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(norsjo, aes(x = bmi)) + geom_histogram(binwidth = 1, colour = "blue", fill = "aquamarine") # Changing the binwidth and colours
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(norsjo, aes(x = bmi)) + geom_histogram(aesbinwidth = 1, colour = "blue", fill = "aquamarine")
## Warning in geom_histogram(aesbinwidth = 1, colour = "blue", fill =
## "aquamarine"): Ignoring unknown parameters: `aesbinwidth`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(norsjo, aes(bmi)) + geom_histogram(aes(y = ..density..), bins=100,
color="black", fill="dodgerblue") +
labs(title="Simple Histogram with density line",
x= "BMI")+
theme(
plot.title = element_text(color = "red", size = 20,
face = "bold"), # Plot title
axis.title = element_text(size = 18), # Axis titles
axis.text= element_text(size = 14)) # Axis text size
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## Removed 4 rows containing non-finite outside the scale range (`stat_bin()`).
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(norsjo, aes(bmi)) + geom_histogram(aes(y = ..density..), bins=100,
color="black", fill="dodgerblue") +
labs(title="Simple Histogram with density line",
x= "BMI")+
geom_density(col=3)
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_density()`).
ggplot(norsjo, aes(x=cholesterol, y=bmi)) + geom_point()
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Install car package if not installed
#install.packages("car")
# Load the car package
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
# Simulate BMI data for women and men
set.seed(123) # For reproducibility
women_bmi <- rnorm(364, mean = 23.0, sd = 4.17)
men_bmi <- rnorm(265, mean = 23.4, sd = 4.12)
# Combine the BMI data into a single vector
bmi <- c(women_bmi, men_bmi)
# Create a group factor indicating 'Women' and 'Men'
group <- factor(c(rep("Women", 364), rep("Men", 265)))
# Perform Levene's test for equality of variances
leveneTest(bmi ~ group)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0 0.997
## 627
# Install the pwr package if not already installed
#install.packages("pwr")
# Load the pwr package
library(pwr)
# Set the parameters
effect_size <- 1 # Effect size (mean difference divided by pooled SD)
alpha <- 0.05 # Significance level
power <- 0.7 # Desired power
# Conduct the power analysis for a two-sample t-test
power_analysis <- pwr.t.test(d = effect_size, sig.level = alpha, power = power, type = "two.sample")
# View the result
power_analysis
##
## Two-sample t test power calculation
##
## n = 13.37179
## d = 1
## sig.level = 0.05
## power = 0.7
## alternative = two.sided
##
## NOTE: n is number in *each* group
# Computer Session 2
library(Publish)
## Loading required package: prodlim
ci.mean(norsjo$sbp)# Confidence interval for systolic blood pressure
## mean CI-95%
## 124.48 [122.14;126.81]
table(norsjo$smoker)
##
## non-smoker smoker
## 206 54
?ci.mean
ci.mean(norsjo$sbp~norsjo$smoker, data = norsjo)
## norsjo$smoker mean CI-95%
## non-smoker 125.61 [123.01;128.21]
## smoker 120.24 [114.95;125.54]
ci.mean(norsjo$sbp~norsjo$sex, data = norsjo)
## norsjo$sex mean CI-95%
## man 125.33 [122.21;128.45]
## woman 123.65 [120.14;127.15]
#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
norsjo$agegrp.cat <- as.factor(norsjo$agegrp)
norsjo$agegrp2 <- revalue(norsjo$agegrp.cat, c('30 years old'="1", '40 years old'="1", '50 years old'="2", '60 years old'="2"))
table(norsjo$agegrp2)
##
## 1 2
## 131 129
ci.mean(norsjo$sbp~norsjo$agegrp2, data = norsjo)
## norsjo$agegrp2 mean CI-95%
## 1 117.12 [114.52;119.71]
## 2 132.02 [128.54;135.49]
?recode()
library(dplyr)
norsjo$age_cat <- recode_factor(
norsjo$agegrp2,
`1` = "Below 49 Years",
`2` = "50 and Above 50 years",
.default = "D"
)
ci.mean(norsjo$sbp~norsjo$age_cat, data = norsjo)
## norsjo$age_cat mean CI-95%
## Below 49 Years 117.12 [114.52;119.71]
## 50 and Above 50 years 132.02 [128.54;135.49]
library(tidyverse)
ggplot(norsjo, aes(sample=sbp))+stat_qq() + stat_qq_line() +
labs(title="Q-Q plot", y = "SBP")+ theme_classic()
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_qq()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_qq_line()`).
ggplot(norsjo, aes(sample=dbp))+stat_qq() + stat_qq_line()
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_qq()`).
## Removed 3 rows containing non-finite outside the scale range
## (`stat_qq_line()`).
ggplot(norsjo, aes(sample=dbp))+stat_qq() + stat_qq_line() +
labs(title="Q-Q plot", y = "DBP")
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_qq()`).
## Removed 3 rows containing non-finite outside the scale range
## (`stat_qq_line()`).
library(haven)
paired<-read_dta("paired_data.dta")
t.test(paired$Clock_8, paired$Clock_20, paired=TRUE)
##
## Paired t-test
##
## data: paired$Clock_8 and paired$Clock_20
## t = 2.0847, df = 19, p-value = 0.05083
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.004397094 2.204397094
## sample estimates:
## mean difference
## 1.1
paired$diff<-paired$Clock_20-paired$Clock_8 # Calculating a variable with the difference between afternoon and morning
sd.diff=sd(paired$diff)/sqrt(nrow(paired)) # Calculating standard error of the mean
testvalue=mean(paired$diff)/sd.diff # Compare test-value with critical value
wilcox.test(paired$Clock_8, paired$Clock_20, paired = TRUE, alternative = "two.sided",exact=FALSE)
##
## Wilcoxon signed rank test with continuity correction
##
## data: paired$Clock_8 and paired$Clock_20
## V = 129.5, p-value = 0.056
## alternative hypothesis: true location shift is not equal to 0
t.test(sbp~sex,data=norsjo,var.equal=T)# groups are separated by sex
##
## Two Sample t-test
##
## data: sbp by sex
## t = 0.70989, df = 255, p-value = 0.4784
## alternative hypothesis: true difference in means between group man and group woman is not equal to 0
## 95 percent confidence interval:
## -2.988604 6.357713
## sample estimates:
## mean in group man mean in group woman
## 125.3307 123.6462
t_bmi_sex<-t.test(bmi~sex,data=norsjo,var.equal=T) # Stores the output of the function in a list
t_bmi_sex[] # Writing only t will give you all information in the list
## $statistic
## t
## 0.2024534
##
## $parameter
## df
## 254
##
## $p.value
## [1] 0.8397243
##
## $conf.int
## [1] -0.8811714 1.0831034
## attr(,"conf.level")
## [1] 0.95
##
## $estimate
## mean in group man mean in group woman
## 25.32063 25.21966
##
## $null.value
## difference in means between group man and group woman
## 0
##
## $stderr
## [1] 0.4987121
##
## $alternative
## [1] "two.sided"
##
## $method
## [1] " Two Sample t-test"
##
## $data.name
## [1] "bmi by sex"
t_dbp_sex<-t.test(dbp~sex,data=norsjo,var.equal=T) # Stores the output of the function in a list
t_dbp_sex[] # Writing only t will give you all information in the list
## $statistic
## t
## 0.7340719
##
## $parameter
## df
## 255
##
## $p.value
## [1] 0.4635793
##
## $conf.int
## [1] -1.800848 3.941248
## attr(,"conf.level")
## [1] 0.95
##
## $estimate
## mean in group man mean in group woman
## 79.70866 78.63846
##
## $null.value
## difference in means between group man and group woman
## 0
##
## $stderr
## [1] 1.457895
##
## $alternative
## [1] "two.sided"
##
## $method
## [1] " Two Sample t-test"
##
## $data.name
## [1] "dbp by sex"
t_Chol_sex<-t.test(norsjo$cholesterol~sex,data=norsjo,var.equal=T) # Stores the output of the function in a list
t_Chol_sex[] # Writing only t will give you all information in the list
## $statistic
## t
## 0.4769153
##
## $parameter
## df
## 255
##
## $p.value
## [1] 0.6338309
##
## $conf.int
## [1] -0.2757759 0.4520318
## attr(,"conf.level")
## [1] 0.95
##
## $estimate
## mean in group man mean in group woman
## 6.580952 6.492824
##
## $null.value
## difference in means between group man and group woman
## 0
##
## $stderr
## [1] 0.1847874
##
## $alternative
## [1] "two.sided"
##
## $method
## [1] " Two Sample t-test"
##
## $data.name
## [1] "norsjo$cholesterol by sex"
publish(t.test(sbp~agegrp2,data=norsjo,var.equal=T))
##
## The Two Sample t-test to compare the difference in means between group 1 and group 2 for sbp by agegrp2 yields a mean difference of 14.90 (CI-95% = [-19.20;-10.60]; p-value = <0.001).
library(car)
leveneTest(sbp ~ agegrp2, data = norsjo)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 5.2111 0.02327 *
## 255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(sbp ~ sex, data = norsjo)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.4154 0.1214
## 255
t.test(sbp ~ agegrp2, data = filter(norsjo, sex=="man"),var.equal=T)
##
## Two Sample t-test
##
## data: sbp by agegrp2
## t = -3.642, df = 125, p-value = 0.0003948
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -16.927289 -5.007537
## sample estimates:
## mean in group 1 mean in group 2
## 120.1493 131.1167
t.test(sbp ~ agegrp2, data = filter(norsjo, sex=="woman"),var.equal=T)
##
## Two Sample t-test
##
## data: sbp by agegrp2
## t = -6.0335, df = 128, p-value = 1.611e-08
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -25.14071 -12.72331
## sample estimates:
## mean in group 1 mean in group 2
## 113.8889 132.8209
table(norsjo$sex)
##
## man woman
## 128 132
wilcox.test(bmi~sex,data=norsjo)
##
## Wilcoxon rank sum test with continuity correction
##
## data: bmi by sex
## W = 9196.5, p-value = 0.08857
## alternative hypothesis: true location shift is not equal to 0
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.