Homework 1: Anxiety Statement

Load data file

## Homework 1
library(tidyverse)

## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(dplyr)
library(ggplot2)
dta <- read.table("/Users/haolunfu/Documents/資料管理/week7/stateAnxiety.txt", header = T)
head(dta)

##   f1 f2 f3 f4 f5 m1 m2 m3 m4 m5
## 1 13 17 18 20 24  6 14 22 20 24
## 2 26 31 33 38 42  4 11 14 12 23
## 3 13 17 24 29 32 17 25 26 29 38
## 4 22 24 26 27 29 19 22 26 30 34
## 5 18 19 19 22 30 12 21 21 23 24
## 6 32 31 30 31 32 11 16 20 19 22

Translate the data formate from wide to long

dtaL <- dta %>% 
  mutate(ID = row.names(.)) %>% 
  gather(key = Times, value = Score, 1:10) %<>% 
  separate(Times, c("Gender", "Week"), sep = 1) %>%
  mutate(ID = paste(Gender,ID,sep = ""), 
         Week = as.factor(Week))

head(dtaL)

##   ID Gender Week Score
## 1 f1      f    1    13
## 2 f2      f    1    26
## 3 f3      f    1    13
## 4 f4      f    1    22
## 5 f5      f    1    18
## 6 f6      f    1    32

Plot the boxplot between Week and Anxiety Score by Gender

qplot(x = Week, y = Score, col = Gender, 
      geom = 'boxplot', data = dtaL,
      xlab = 'Time (No. of Week)', ylab = 'Anxiety score')

Plot the line plot (mean +- SE) between Week and Anxiety Score by Gender

dtaL %>% group_by(Week, Gender) %>% 
  summarize(m_score=mean(Score),
            se_score=sd(Score)/sqrt(n())) %>%
  ggplot() +
  aes(Week, m_score, color=Gender, group=Gender) +
  geom_errorbar(aes(ymin=m_score - se_score,
                    ymax=m_score + se_score), width=.2, size=.3, position=position_dodge(.3)) +
  geom_line(position=position_dodge(.3)) +
  geom_point(position=position_dodge(.3), size=rel(3)) +
  scale_shape(guide=guide_legend(title=NULL)) +
  labs(x="Time (No. of Week)", y="Mean Score") +
  theme_minimal() +
  theme(legend.position=c(.1, .8))

## The figure revealed that the anxiety and week may be related to gender.

Plot the line plot between Week and Anxiety Score by Individualy

ggplot(data=dtaL, aes(x=Week, y=Score, group=ID)) +
  geom_line() +
  stat_summary(aes(group=1), fun.y=mean, geom="line") +
  stat_summary(aes(group=1), fun.y=mean, geom="point") +
  facet_grid(. ~ Gender) +
  labs(x="Time (No. of Week)", y="Anxiety Score") +
  theme_minimal()

The figure revealed that the anxiety and week may be caused by individual differences.

Homework 2:

T.L., Milic, N.M., Winham, S.J., Garovic, V.D. (2015). Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm. PLOS Biology , 13

see the following link: https://rpubs.com/haolunfu/605478

Homework 3: Emotions and Coping Strategies

Load data file

dta <- read.table("/Users/haolunfu/Documents/資料管理/week7/coping.txt", header = T)
head(dta)

##   annoy sad afraid angry approach avoid support agressive situation sbj
## 1     4   2      2     2     1.00  2.00    1.00      2.50      Fail  S2
## 2     4   4      4     2     4.00  3.00    1.25      1.50    NoPart  S2
## 3     2   2      2     2     2.67  3.00    1.00      2.33    TeacNo  S2
## 4     4   3      4     4     4.00  1.50    3.25      1.00     Bully  S2
## 5     4   2      1     1     1.00  2.75    1.25      1.50      Work  S2
## 6     4   3      1     4     2.33  2.50    1.00      3.67     MomNo  S2

Show the summary table

summary(dta)

##      annoy            sad           afraid          angry          approach    
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.670  
##  Median :3.000   Median :2.00   Median :1.000   Median :2.000   Median :2.000  
##  Mean   :2.762   Mean   :1.81   Mean   :1.405   Mean   :2.131   Mean   :2.236  
##  3rd Qu.:4.000   3rd Qu.:2.00   3rd Qu.:2.000   3rd Qu.:2.250   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :9.000   Max.   :4.000  
##                                                                                
##      avoid          support        agressive      situation       sbj    
##  Min.   :1.000   Min.   :0.000   Min.   :1.000   Bully :14   S135   : 6  
##  1st Qu.:1.750   1st Qu.:1.330   1st Qu.:1.000   Fail  :14   S137   : 6  
##  Median :2.500   Median :2.000   Median :1.500   MomNo :14   S139   : 6  
##  Mean   :2.398   Mean   :1.959   Mean   :1.542   NoPart:14   S17    : 6  
##  3rd Qu.:3.000   3rd Qu.:2.373   3rd Qu.:1.670   TeacNo:14   S185   : 6  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Work  :14   S2     : 6  
##                                                              (Other):48

Translate the data format from wide to long

library(dplyr)
library(tidyverse)
dta_L <- dta %>% 
  gather(key="Emotions", value="Values", 1:8) %>%
  dplyr::rename(Subject=sbj, Situation = situation, Emotions=Emotions, Values=Values)
head(dta_L)

##   Situation Subject Emotions Values
## 1      Fail      S2    annoy      4
## 2    NoPart      S2    annoy      4
## 3    TeacNo      S2    annoy      2
## 4     Bully      S2    annoy      4
## 5      Work      S2    annoy      4
## 6     MomNo      S2    annoy      4

Quick plot the density plot across Situation and Emotions to look the pattern

qplot(Values, data=dta_L, geom="density", facets = Situation ~ Emotions)

Plot the correlation matrix

library(ggcorrplot)
ggcorrplot(cor(dta[1:8]),
           hc.order=T,
           type="lower",
           lab=T)

Plot the scatter plot of response across Situation and Emotion.

dta_L %>% group_by(Emotions, Situation) %>% 
  summarise(m = mean(Values),
            se = sd(Values)/sqrt(n())) %>%
  ggplot() +
  aes(x=Emotions, y=m,group=Situation,color=Situation) +
  geom_errorbar(aes(ymin=m - se, ymax=m + se),
                width=.2, size=.3, position=position_dodge(0.3)) +
  geom_line(position=position_dodge(0.3), linetype='dotted') +
  geom_point(position=position_dodge(0.3), size=rel(3)) +
  scale_shape(guide=guide_legend(title=NULL)) +
  labs(x="Emotions", y="Response Score") +
  theme_minimal() +
  theme(legend.position='top')

Homework 4:

assume that the subject data files are downloaded into the data folder in the current working directory

library(pacman)

# tidyr stuff and psyphy package for model fitting
p_load(tidyverse, psyphy)

fls <- list.files(path = "/Users/haolunfu/Documents/資料管理/week7/Q4/", pattern = ".csv")
fL <- paste0("/Users/haolunfu/Documents/資料管理/week7/Q4/", fls)
dta <- lapply(fL, read_csv) %>% bind_rows()

## Parsed with column specification:
## cols(
##   subject = col_character(),
##   contrast = col_double(),
##   sf = col_double(),
##   target_side = col_character(),
##   response = col_character(),
##   unique_id = col_character()
## )
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   contrast = col_double(),
##   sf = col_double(),
##   target_side = col_character(),
##   response = col_character(),
##   unique_id = col_character()
## )
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   contrast = col_double(),
##   sf = col_double(),
##   target_side = col_character(),
##   response = col_character(),
##   unique_id = col_character()
## )
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   contrast = col_double(),
##   sf = col_double(),
##   target_side = col_character(),
##   response = col_character(),
##   unique_id = col_character()
## )
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   contrast = col_double(),
##   sf = col_double(),
##   target_side = col_character(),
##   response = col_character(),
##   unique_id = col_character()
## )

have a look

head(dta <- as.data.frame(dta))

##   subject    contrast        sf target_side response
## 1      S1 0.069483451  0.500000       right    right
## 2      S1 0.013123729 40.000000       right     left
## 3      S1 0.069483451  4.472136        left     left
## 4      S1 0.069483451 40.000000        left    right
## 5      S1 0.367879441 13.374806        left     left
## 6      S1 0.002478752  0.500000        left    right
##                              unique_id
## 1 544ee9ff-2569-4f38-b04e-7e4d0a0be4d2
## 2 b27fe910-e3ba-48fb-b168-5afb1f115d8f
## 3 72c9d6ce-0a90-4d4b-a199-03435c15291b
## 4 48b5bbb2-e6ee-4848-b77e-839ed5320c01
## 5 32a5cce4-3f8a-4e63-80c1-3fee3230d1bd
## 6 47ebce53-9d5a-48de-936b-25d5105a0784

score the correct responses

dta$correct <- ifelse(dta$target_side == dta$response, 1, 0)
head(dta)

##   subject    contrast        sf target_side response
## 1      S1 0.069483451  0.500000       right    right
## 2      S1 0.013123729 40.000000       right     left
## 3      S1 0.069483451  4.472136        left     left
## 4      S1 0.069483451 40.000000        left    right
## 5      S1 0.367879441 13.374806        left     left
## 6      S1 0.002478752  0.500000        left    right
##                              unique_id correct
## 1 544ee9ff-2569-4f38-b04e-7e4d0a0be4d2       1
## 2 b27fe910-e3ba-48fb-b168-5afb1f115d8f       0
## 3 72c9d6ce-0a90-4d4b-a199-03435c15291b       1
## 4 48b5bbb2-e6ee-4848-b77e-839ed5320c01       0
## 5 32a5cce4-3f8a-4e63-80c1-3fee3230d1bd       1
## 6 47ebce53-9d5a-48de-936b-25d5105a0784       0

bootstrapped CIs for proportion of correct responses

p <- ggplot(dta, aes(contrast, correct)) +
  stat_summary(fun.data = "mean_cl_boot", color = "dodgerblue") +
  scale_x_log10() +
  scale_y_continuous(limits = c(0, 1)) +
  labs(x = "Contrast in log unit", y = "Proportion of correct responses")
p

fit a detection model in which the chance of correct response is .5

m0 <- glm(correct ~ log10(contrast), data = dta, 
          family = binomial(mafc.probit(2)))

generate predicted responses

xval <- with(dta, seq(min(contrast), max(contrast), len = 1000))
yval <- predict(m0, data.frame(contrast = xval), type = "response")
m0_pred <- data.frame(xval, yval)

augument to the observed plot

p <- p + 
  geom_line(data = m0_pred, aes(x = xval, y = yval)) 

print(p)

Modifited version

dta$subject <- factor(dta$subject)
dta$sf <- factor(dta$sf)

# generate predicted responses 
x1 <- rep(with(dta, seq(min(contrast), max(contrast), len = 1000)),5)
x2 <- with(dta, sample(levels(subject), 2500, replace = T))
x3 <- with(dta, sample(levels(sf), 2500, replace = T))
xval <- data.frame(contrast = x1, subject = x2, sf = x3)
yval <- predict(m0, xval, type = "response")
m0_pred <- data.frame(xval, yval)

augument to the observed plot

p <- p + 
  geom_line(data = m0_pred, aes(x = contrast, y = yval))+
  facet_grid(subject ~ sf)

print(p)

Homework 5:

Load data file

dta <- MASS::Cushings
head(dta)

##    Tetrahydrocortisone Pregnanetriol Type
## a1                 3.1         11.70    a
## a2                 3.0          1.30    a
## a3                 1.9          0.10    a
## a4                 3.8          0.04    a
## a5                 4.1          1.10    a
## a6                 1.9          0.40    a

Show the data structure

str(dta)

## 'data.frame':    27 obs. of  3 variables:
##  $ Tetrahydrocortisone: num  3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
##  $ Pregnanetriol      : num  11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
##  $ Type               : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...

Relabel the type

dta$Type <- factor(dta$Type, levels = c("a","b","c","u"), 
                   labels = c("Adenoma","Bilateral Hyperplasia","Carcinoma","Unknown"))

Plot (the code of figure’s theme was refered from H.-J., Wu)

library(ggrepel)
library(ggplot2)
library(ggthemes)

p<-ggplot(dta, aes(x=Tetrahydrocortisone, y=Pregnanetriol, color=Type))+
  geom_point(aes(fill =factor(Type)), colour="black", pch=21, size=3)+
  geom_text_repel(data=subset(dta, rownames(dta)%in%c("a1", "b1", "c1", "u1")) %>% group_by(Type), aes(label=Type))+
  labs(x="Tetrahydrocortisone (mg/24 hours)",y="Pregnanetriol (mg/24 hours)", title="Cushing's syndrome")+ # using the similar theme, change background to white
  theme_economist_white(gray_bg = FALSE)+ 
  theme(plot.title=element_text(hjust=1, face="bold", size=12))+ # adjust title to aligned to the right, bold and size 
  theme(legend.position=" ")+ # remove the x-axis bottom line and ticks
  theme(axis.line.x.bottom =  element_line(colour = "white"), 
        axis.ticks.x = element_line(colour="white"))

p

Week 7 Homework (Grammer)

Hao-Lun Fu

2020-05-09

Homework 1: Anxiety Statement

Load data file

Translate the data formate from wide to long

Plot the boxplot between Week and Anxiety Score by Gender

Plot the line plot (mean +- SE) between Week and Anxiety Score by Gender

Plot the line plot between Week and Anxiety Score by Individualy

The figure revealed that the anxiety and week may be caused by individual differences.

Homework 2:

T.L., Milic, N.M., Winham, S.J., Garovic, V.D. (2015). Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm. PLOS Biology , 13

see the following link: https://rpubs.com/haolunfu/605478

Homework 3: Emotions and Coping Strategies

Load data file

Show the summary table

Translate the data format from wide to long

Quick plot the density plot across Situation and Emotions to look the pattern

Plot the correlation matrix

Plot the scatter plot of response across Situation and Emotion.

Homework 4:

assume that the subject data files are downloaded into the data folder in the current working directory

have a look

score the correct responses

bootstrapped CIs for proportion of correct responses

fit a detection model in which the chance of correct response is .5

generate predicted responses

augument to the observed plot

Modifited version

augument to the observed plot

Homework 5:

Load data file

Show the data structure

Relabel the type

Plot (the code of figure’s theme was refered from H.-J., Wu)