Homework 1: Anxiety Statement
Load data file
## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
dta <- read.table("/Users/haolunfu/Documents/資料管理/week7/stateAnxiety.txt", header = T)
head(dta)## f1 f2 f3 f4 f5 m1 m2 m3 m4 m5
## 1 13 17 18 20 24 6 14 22 20 24
## 2 26 31 33 38 42 4 11 14 12 23
## 3 13 17 24 29 32 17 25 26 29 38
## 4 22 24 26 27 29 19 22 26 30 34
## 5 18 19 19 22 30 12 21 21 23 24
## 6 32 31 30 31 32 11 16 20 19 22
Translate the data formate from wide to long
dtaL <- dta %>%
mutate(ID = row.names(.)) %>%
gather(key = Times, value = Score, 1:10) %<>%
separate(Times, c("Gender", "Week"), sep = 1) %>%
mutate(ID = paste(Gender,ID,sep = ""),
Week = as.factor(Week))
head(dtaL)## ID Gender Week Score
## 1 f1 f 1 13
## 2 f2 f 1 26
## 3 f3 f 1 13
## 4 f4 f 1 22
## 5 f5 f 1 18
## 6 f6 f 1 32
Plot the boxplot between Week and Anxiety Score by Gender
qplot(x = Week, y = Score, col = Gender,
geom = 'boxplot', data = dtaL,
xlab = 'Time (No. of Week)', ylab = 'Anxiety score')Plot the line plot (mean +- SE) between Week and Anxiety Score by Gender
dtaL %>% group_by(Week, Gender) %>%
summarize(m_score=mean(Score),
se_score=sd(Score)/sqrt(n())) %>%
ggplot() +
aes(Week, m_score, color=Gender, group=Gender) +
geom_errorbar(aes(ymin=m_score - se_score,
ymax=m_score + se_score), width=.2, size=.3, position=position_dodge(.3)) +
geom_line(position=position_dodge(.3)) +
geom_point(position=position_dodge(.3), size=rel(3)) +
scale_shape(guide=guide_legend(title=NULL)) +
labs(x="Time (No. of Week)", y="Mean Score") +
theme_minimal() +
theme(legend.position=c(.1, .8)) ## The figure revealed that the anxiety and week may be related to gender.
Plot the line plot between Week and Anxiety Score by Individualy
ggplot(data=dtaL, aes(x=Week, y=Score, group=ID)) +
geom_line() +
stat_summary(aes(group=1), fun.y=mean, geom="line") +
stat_summary(aes(group=1), fun.y=mean, geom="point") +
facet_grid(. ~ Gender) +
labs(x="Time (No. of Week)", y="Anxiety Score") +
theme_minimal()The figure revealed that the anxiety and week may be caused by individual differences.
Homework 2:
T.L., Milic, N.M., Winham, S.J., Garovic, V.D. (2015). Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm. PLOS Biology , 13
see the following link: https://rpubs.com/haolunfu/605478
Homework 3: Emotions and Coping Strategies
Load data file
## annoy sad afraid angry approach avoid support agressive situation sbj
## 1 4 2 2 2 1.00 2.00 1.00 2.50 Fail S2
## 2 4 4 4 2 4.00 3.00 1.25 1.50 NoPart S2
## 3 2 2 2 2 2.67 3.00 1.00 2.33 TeacNo S2
## 4 4 3 4 4 4.00 1.50 3.25 1.00 Bully S2
## 5 4 2 1 1 1.00 2.75 1.25 1.50 Work S2
## 6 4 3 1 4 2.33 2.50 1.00 3.67 MomNo S2
Show the summary table
## annoy sad afraid angry approach
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.670
## Median :3.000 Median :2.00 Median :1.000 Median :2.000 Median :2.000
## Mean :2.762 Mean :1.81 Mean :1.405 Mean :2.131 Mean :2.236
## 3rd Qu.:4.000 3rd Qu.:2.00 3rd Qu.:2.000 3rd Qu.:2.250 3rd Qu.:3.000
## Max. :4.000 Max. :4.00 Max. :4.000 Max. :9.000 Max. :4.000
##
## avoid support agressive situation sbj
## Min. :1.000 Min. :0.000 Min. :1.000 Bully :14 S135 : 6
## 1st Qu.:1.750 1st Qu.:1.330 1st Qu.:1.000 Fail :14 S137 : 6
## Median :2.500 Median :2.000 Median :1.500 MomNo :14 S139 : 6
## Mean :2.398 Mean :1.959 Mean :1.542 NoPart:14 S17 : 6
## 3rd Qu.:3.000 3rd Qu.:2.373 3rd Qu.:1.670 TeacNo:14 S185 : 6
## Max. :4.000 Max. :4.000 Max. :4.000 Work :14 S2 : 6
## (Other):48
Translate the data format from wide to long
library(dplyr)
library(tidyverse)
dta_L <- dta %>%
gather(key="Emotions", value="Values", 1:8) %>%
dplyr::rename(Subject=sbj, Situation = situation, Emotions=Emotions, Values=Values)
head(dta_L)## Situation Subject Emotions Values
## 1 Fail S2 annoy 4
## 2 NoPart S2 annoy 4
## 3 TeacNo S2 annoy 2
## 4 Bully S2 annoy 4
## 5 Work S2 annoy 4
## 6 MomNo S2 annoy 4
Quick plot the density plot across Situation and Emotions to look the pattern
Plot the correlation matrix
Plot the scatter plot of response across Situation and Emotion.
dta_L %>% group_by(Emotions, Situation) %>%
summarise(m = mean(Values),
se = sd(Values)/sqrt(n())) %>%
ggplot() +
aes(x=Emotions, y=m,group=Situation,color=Situation) +
geom_errorbar(aes(ymin=m - se, ymax=m + se),
width=.2, size=.3, position=position_dodge(0.3)) +
geom_line(position=position_dodge(0.3), linetype='dotted') +
geom_point(position=position_dodge(0.3), size=rel(3)) +
scale_shape(guide=guide_legend(title=NULL)) +
labs(x="Emotions", y="Response Score") +
theme_minimal() +
theme(legend.position='top')Homework 4:
assume that the subject data files are downloaded into the data folder in the current working directory
fls <- list.files(path = "/Users/haolunfu/Documents/資料管理/week7/Q4/", pattern = ".csv")
fL <- paste0("/Users/haolunfu/Documents/資料管理/week7/Q4/", fls)
dta <- lapply(fL, read_csv) %>% bind_rows()## Parsed with column specification:
## cols(
## subject = col_character(),
## contrast = col_double(),
## sf = col_double(),
## target_side = col_character(),
## response = col_character(),
## unique_id = col_character()
## )
## Parsed with column specification:
## cols(
## subject = col_character(),
## contrast = col_double(),
## sf = col_double(),
## target_side = col_character(),
## response = col_character(),
## unique_id = col_character()
## )
## Parsed with column specification:
## cols(
## subject = col_character(),
## contrast = col_double(),
## sf = col_double(),
## target_side = col_character(),
## response = col_character(),
## unique_id = col_character()
## )
## Parsed with column specification:
## cols(
## subject = col_character(),
## contrast = col_double(),
## sf = col_double(),
## target_side = col_character(),
## response = col_character(),
## unique_id = col_character()
## )
## Parsed with column specification:
## cols(
## subject = col_character(),
## contrast = col_double(),
## sf = col_double(),
## target_side = col_character(),
## response = col_character(),
## unique_id = col_character()
## )
have a look
## subject contrast sf target_side response
## 1 S1 0.069483451 0.500000 right right
## 2 S1 0.013123729 40.000000 right left
## 3 S1 0.069483451 4.472136 left left
## 4 S1 0.069483451 40.000000 left right
## 5 S1 0.367879441 13.374806 left left
## 6 S1 0.002478752 0.500000 left right
## unique_id
## 1 544ee9ff-2569-4f38-b04e-7e4d0a0be4d2
## 2 b27fe910-e3ba-48fb-b168-5afb1f115d8f
## 3 72c9d6ce-0a90-4d4b-a199-03435c15291b
## 4 48b5bbb2-e6ee-4848-b77e-839ed5320c01
## 5 32a5cce4-3f8a-4e63-80c1-3fee3230d1bd
## 6 47ebce53-9d5a-48de-936b-25d5105a0784
score the correct responses
## subject contrast sf target_side response
## 1 S1 0.069483451 0.500000 right right
## 2 S1 0.013123729 40.000000 right left
## 3 S1 0.069483451 4.472136 left left
## 4 S1 0.069483451 40.000000 left right
## 5 S1 0.367879441 13.374806 left left
## 6 S1 0.002478752 0.500000 left right
## unique_id correct
## 1 544ee9ff-2569-4f38-b04e-7e4d0a0be4d2 1
## 2 b27fe910-e3ba-48fb-b168-5afb1f115d8f 0
## 3 72c9d6ce-0a90-4d4b-a199-03435c15291b 1
## 4 48b5bbb2-e6ee-4848-b77e-839ed5320c01 0
## 5 32a5cce4-3f8a-4e63-80c1-3fee3230d1bd 1
## 6 47ebce53-9d5a-48de-936b-25d5105a0784 0
bootstrapped CIs for proportion of correct responses
p <- ggplot(dta, aes(contrast, correct)) +
stat_summary(fun.data = "mean_cl_boot", color = "dodgerblue") +
scale_x_log10() +
scale_y_continuous(limits = c(0, 1)) +
labs(x = "Contrast in log unit", y = "Proportion of correct responses")
pfit a detection model in which the chance of correct response is .5
generate predicted responses
Modifited version
dta$subject <- factor(dta$subject)
dta$sf <- factor(dta$sf)
# generate predicted responses
x1 <- rep(with(dta, seq(min(contrast), max(contrast), len = 1000)),5)
x2 <- with(dta, sample(levels(subject), 2500, replace = T))
x3 <- with(dta, sample(levels(sf), 2500, replace = T))
xval <- data.frame(contrast = x1, subject = x2, sf = x3)
yval <- predict(m0, xval, type = "response")
m0_pred <- data.frame(xval, yval)augument to the observed plot
Homework 5:
Load data file
## Tetrahydrocortisone Pregnanetriol Type
## a1 3.1 11.70 a
## a2 3.0 1.30 a
## a3 1.9 0.10 a
## a4 3.8 0.04 a
## a5 4.1 1.10 a
## a6 1.9 0.40 a
Show the data structure
## 'data.frame': 27 obs. of 3 variables:
## $ Tetrahydrocortisone: num 3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
## $ Pregnanetriol : num 11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
## $ Type : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
Relabel the type
Plot (the code of figure’s theme was refered from H.-J., Wu)
library(ggrepel)
library(ggplot2)
library(ggthemes)
p<-ggplot(dta, aes(x=Tetrahydrocortisone, y=Pregnanetriol, color=Type))+
geom_point(aes(fill =factor(Type)), colour="black", pch=21, size=3)+
geom_text_repel(data=subset(dta, rownames(dta)%in%c("a1", "b1", "c1", "u1")) %>% group_by(Type), aes(label=Type))+
labs(x="Tetrahydrocortisone (mg/24 hours)",y="Pregnanetriol (mg/24 hours)", title="Cushing's syndrome")+ # using the similar theme, change background to white
theme_economist_white(gray_bg = FALSE)+
theme(plot.title=element_text(hjust=1, face="bold", size=12))+ # adjust title to aligned to the right, bold and size
theme(legend.position=" ")+ # remove the x-axis bottom line and ticks
theme(axis.line.x.bottom = element_line(colour = "white"),
axis.ticks.x = element_line(colour="white"))
p