Q1

Fifty male and fifty female students fill out the same questionnaire in weekly intervals starting five weeks before an important examination to measure state anxiety.

The research interests are:

1.whether there are gender difference in state anxiety

2.individual differences in state anxiety

Explore the answers to both questions with plots involving confidence intervals or error bars for the means.

Source: Von Eye, A., & Schuster C. (1998). Regression Analysis for Social Sciences. San Diego: Academic Press.

Column 1: Anxiety score 5 weeks before exam for female Column 2: Anxiety score 4 weeks before exam for female Column 3: Anxiety score 3 weeks before exam for female Column 4: Anxiety score 2 weeks before exam for female Column 5: Anxiety score 1 weeks before exam for female Column 6: Anxiety score 5 weeks before exam for male Column 7: Anxiety score 4 weeks before exam for male Column 8: Anxiety score 3 weeks before exam for male Column 9: Anxiety score 2 weeks before exam for male Column 10: Anxiety score 1 weeks before exam for male

# input data
dta1 <-read.csv("C:/Users/Ching-Fang Wu/Documents/dataM/stateAnxiety.txt",header=T,sep='') #sep=''以空白鍵分隔

dta1 |> head() |> knitr::kable()
f1 f2 f3 f4 f5 m1 m2 m3 m4 m5
13 17 18 20 24 6 14 22 20 24
26 31 33 38 42 4 11 14 12 23
13 17 24 29 32 17 25 26 29 38
22 24 26 27 29 19 22 26 30 34
18 19 19 22 30 12 21 21 23 24
32 31 30 31 32 11 16 20 19 22
str(dta1)
## 'data.frame':    50 obs. of  10 variables:
##  $ f1: int  13 26 13 22 18 32 16 18 14 20 ...
##  $ f2: int  17 31 17 24 19 31 16 22 17 19 ...
##  $ f3: int  18 33 24 26 19 30 21 25 23 23 ...
##  $ f4: int  20 38 29 27 22 31 27 29 21 25 ...
##  $ f5: int  24 42 32 29 30 32 30 35 25 28 ...
##  $ m1: int  6 4 17 19 12 11 14 9 12 11 ...
##  $ m2: int  14 11 25 22 21 16 23 18 16 13 ...
##  $ m3: int  22 14 26 26 21 20 26 20 23 17 ...
##  $ m4: int  20 12 29 30 23 19 29 20 26 14 ...
##  $ m5: int  24 23 38 34 24 22 33 24 32 20 ...
typeof(dta1)
## [1] "list"

Gender difference

# 女性考前五周焦慮分數變化(做法參考講義tidy_02 p.4)
matplot(x=c(1, 2, 3, 4, 5), 
        y=t(dta1[, c(1:5)]), 
        type='b', 
        pch=1, 
        cex=.5,
        col='#4428bc',
        bty='n',
        xlab="Times (Weeks)",
        ylab="Anxiety score",
        main="Female State Anxiety before Important Examination")

# 男性考前五周焦慮分數變化(做法參考講義tidy_02 p.4)
matplot(x=c(1, 2, 3, 4, 5), 
        y=t(dta1[, c(6:10)]), 
        type='b', 
        pch=1, 
        cex=.5,
        col='#4428bc',
        bty='n',
        xlab="Times (Weeks)",
        ylab="Anxiety score",
        main="Male State Anxiety before Important Examination")

不分性別,越接近考試,焦慮分數越高。

# Pivot data from wide to long (tidy_02 p.7)
pacman::p_load(readr, tidyr)

dta1_long <- dta1 %>% 
 pivot_longer(cols=starts_with(c("f","m")), 
              names_to="Gender.Week", 
              values_to="Score")
#%>%
 #arrange(SID)
dta1_long |> as.data.frame() |> head(11)
##    Gender.Week Score
## 1           f1    13
## 2           f2    17
## 3           f3    18
## 4           f4    20
## 5           f5    24
## 6           m1     6
## 7           m2    14
## 8           m3    22
## 9           m4    20
## 10          m5    24
## 11          f1    26
library(ggplot2)
#boxplot(dta1)
#
ggplot(dta1_long, aes(x = Gender.Week, y = Score)) +
  geom_boxplot()

考前五周,每一周女性焦慮分數都高於男性,且第一周男女焦慮分數差異最大。

#Pull apart a variable - separate
#new_dta1 %<>% 
 #separate(Gender.Week, c("gender", "week")) %>%
 #mutate(ID=paste0("s", 101:200))
# mutate(IS=parse_number(id)) %>%
# dplyr::select(-c(prefix, week))
# female
#dta1_f<-dta1[,1:5]

# add female ID
#pacman::p_load(magrittr,tidyverse,dplyr)

#dta1_f <- as_tibble(dta1_f) %>% 
  #mutate(SID=paste0("S", 101:150)) 
  # mutate(SID=paste0("f", 101:150)) %>%
  # rename(W1=f1,W2=f2,W3=f3,W4=f4,W5=f5)

# look first 6 rowa
#head(dta1_f)

# male 
#dta1_m<-dta1[,6:10]

# add male ID
#dta1_m <- as_tibble(dta1_m) %>% 
  #mutate(SID=paste0("S", 151:200))
#  mutate(SID=paste0("m", 101:150)) %>%
#  rename(W1=m1,W2=m2,W3=m3,W4=m4,W5=m5)
    
# look first 6 rowa
#head(dta1_m)
# 合併男女資料
#rbind_dta1<-rbind(dta1_m, dta1_f, by = "SID")
#tail(rbind_dta1) #不懂為何會多出一列

#new_dta1<-rbind_dta1[-101,] #手動刪除
#head(new_dta1)
#tail(new_dta1)

Q3

The dataset consists of a sample of 14 primary school children between 8 and 12 years old. The children were asked to respond on 8 emotions and coping strategies scales for each of 6 situations: fail to fulfill assingments in class, not allowed to play with other children, forbidden to do something by the teacher, victim of bullying, too much school work, forbidden to do something by the mother.

Plot the data in some meaningful ways. You may have to manipulate data into a different format first.

Column 1: Unpleasant (Annoy) Column 2: Sad Column 3: Afraid Column 4: Angry Column 5: Approach coping Column 6: Avoidant coping Column 7: Social support seeking Column 8: Emotional reaction, especially agression Column 9: Situation ID Column 10: Children ID

Source: Roeder, I., Boekaerts, M., & Kroonenberg, P. M. (2002). The stress and coping questionnaire for children (School version and Asthma version): Construction, factor structure, and psychometric properties. Psychological Reports, 91, 29-36.

# input data
dta3 <-read.csv("C:/Users/Ching-Fang Wu/Documents/dataM/coping.txt",header=T,sep='') 
dta3 |> head() |> knitr::kable()
annoy sad afraid angry approach avoid support agressive situation sbj
4 2 2 2 1.00 2.00 1.00 2.50 Fail S2
4 4 4 2 4.00 3.00 1.25 1.50 NoPart S2
2 2 2 2 2.67 3.00 1.00 2.33 TeacNo S2
4 3 4 4 4.00 1.50 3.25 1.00 Bully S2
4 2 1 1 1.00 2.75 1.25 1.50 Work S2
4 3 1 4 2.33 2.50 1.00 3.67 MomNo S2
str(dta3)
## 'data.frame':    84 obs. of  10 variables:
##  $ annoy    : int  4 4 2 4 4 4 3 3 3 4 ...
##  $ sad      : int  2 4 2 3 2 3 2 1 1 4 ...
##  $ afraid   : int  2 4 2 4 1 1 2 1 1 2 ...
##  $ angry    : int  2 2 2 4 1 4 2 2 2 1 ...
##  $ approach : num  1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
##  $ avoid    : num  2 3 3 1.5 2.75 2.5 1 4 1 4 ...
##  $ support  : num  1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
##  $ agressive: num  2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
##  $ situation: chr  "Fail" "NoPart" "TeacNo" "Bully" ...
##  $ sbj      : chr  "S2" "S2" "S2" "S2" ...
typeof(dta3)
## [1] "list"
# install tools
pacman::p_load(dplyr, magrittr)

# rename
dta3 %<>%
 dplyr::rename(Annoy=annoy, Sad=sad, Afraid=afraid, Angry=angry, Approach=approach,
               Avoid=avoid, Support=support,Agressive=agressive,
               Situation=situation,Sbj=sbj)

Q4

Use the USPersonalExpenditure{datasets} for this problem.

This data set consists of United States personal expenditures (in billions of dollars) in the categories; food and tobacco, household operation, medical and health, personal care, and private education for the years 1940, 1945, 1950, 1955 and 1960.

Plot the US personal expenditure data in the style of the third plot on the “Time Use” case study in the course web page.

You might want to transform the dollar amounts to log base 10 unit first.

# input data
data(USPersonalExpenditure,package = "datasets")
dta4<-USPersonalExpenditure
# look first 6 row
dta4|> head() |> knitr::kable()
1940 1945 1950 1955 1960
Food and Tobacco 22.200 44.500 59.60 73.2 86.80
Household Operation 10.500 15.500 29.00 36.5 46.20
Medical and Health 3.530 5.760 9.71 14.0 21.10
Personal Care 1.040 1.980 2.45 3.4 5.40
Private Education 0.341 0.974 1.80 2.6 3.64
str(dta4)
##  num [1:5, 1:5] 22.2 10.5 3.53 1.04 0.341 44.5 15.5 5.76 1.98 0.974 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:5] "Food and Tobacco" "Household Operation" "Medical and Health" "Personal Care" ...
##   ..$ : chr [1:5] "1940" "1945" "1950" "1955" ...
typeof(dta4)
## [1] "double"
# transform the dollar amounts to log base 10 unit
log_dta4<-log10(dta4)
log_dta4
##                            1940        1945      1950      1955      1960
## Food and Tobacco     1.34635297  1.64836001 1.7752463 1.8645111 1.9385197
## Household Operation  1.02118930  1.19033170 1.4623980 1.5622929 1.6646420
## Medical and Health   0.54777471  0.76042248 0.9872192 1.1461280 1.3242825
## Personal Care        0.01703334  0.29666519 0.3891661 0.5314789 0.7323938
## Private Education   -0.46724562 -0.01144104 0.2552725 0.4149733 0.5611014

取log10後會壓縮變數的尺度,但變數之間的相關係數不會改變。

library(reshape2)
## Warning: 套件 'reshape2' 是用 R 版本 4.1.2 來建造的
## 
## 載入套件:'reshape2'
## 下列物件被遮斷自 'package:tidyr':
## 
##     smiths
# wide to long
time_dta4 <- melt(log_dta4)
names(time_dta4) <- c("Item", "Year", "Dollar")

library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## 載入套件:'plyr'
## 下列物件被遮斷自 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
# Compute excess money
time_dta4 <- ddply(time_dta4, "Item", transform, excess = Dollar - mean(Dollar))
# show it
time_dta4
##                   Item Year      Dollar       excess
## 1     Food and Tobacco 1940  1.34635297 -0.368245036
## 2     Food and Tobacco 1945  1.64836001 -0.066237999
## 3     Food and Tobacco 1950  1.77524626  0.060648249
## 4     Food and Tobacco 1955  1.86451108  0.149913071
## 5     Food and Tobacco 1960  1.93851973  0.223921715
## 6  Household Operation 1940  1.02118930 -0.358981468
## 7  Household Operation 1945  1.19033170 -0.189839069
## 8  Household Operation 1950  1.46239800  0.082227231
## 9  Household Operation 1955  1.56229286  0.182122097
## 10 Household Operation 1960  1.66464198  0.284471209
## 11  Medical and Health 1940  0.54777471 -0.405390677
## 12  Medical and Health 1945  0.76042248 -0.192742899
## 13  Medical and Health 1950  0.98721923  0.034053848
## 14  Medical and Health 1955  1.14612804  0.192962654
## 15  Medical and Health 1960  1.32428246  0.371117073
## 16       Personal Care 1940  0.01703334 -0.376314119
## 17       Personal Care 1945  0.29666519 -0.096682268
## 18       Personal Care 1950  0.38916608 -0.004181374
## 19       Personal Care 1955  0.53147892  0.138131459
## 20       Personal Care 1960  0.73239376  0.339046302
## 21   Private Education 1940 -0.46724562 -0.617777736
## 22   Private Education 1945 -0.01144104 -0.161973158
## 23   Private Education 1950  0.25527251  0.104740391
## 24   Private Education 1955  0.41497335  0.264441233
## 25   Private Education 1960  0.56110138  0.410569269
#
library(ggplot2)
#
library(plyr)
# 
qplot(Item, excess, data = time_dta4) +
  geom_hline(yintercept = 0, colour = "grey50") +
  geom_line(aes(group = 1)) +
  facet_wrap(~ Year)

#
qplot(excess, Item, data = time_dta4) +
  geom_segment(aes(xend = 0, yend = Item)) +
  geom_vline(xintercept = 0, colour = "grey50") +
  facet_wrap(~ Year, nrow = 1)

Q5

Use the Cushings{MASS} data set to generate a plot similar to the following one:

# input data
data(Cushings,package = "MASS")
dta5<-Cushings

# look first 6 row
dta5|> head() |> knitr::kable()
Tetrahydrocortisone Pregnanetriol Type
a1 3.1 11.70 a
a2 3.0 1.30 a
a3 1.9 0.10 a
a4 3.8 0.04 a
a5 4.1 1.10 a
a6 1.9 0.40 a
str(dta5)
## 'data.frame':    27 obs. of  3 variables:
##  $ Tetrahydrocortisone: num  3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
##  $ Pregnanetriol      : num  11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
##  $ Type               : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
library(ggplot2)
library(ggrepel)

pacman::p_load(hrbrthemes)
#Extensions - ggrepel(講義p.41)
set.seed(20200410)

ggplot(dta5, aes(Pregnanetriol, 
                Tetrahydrocortisone, 
                color=Type)) +
  geom_point(pch=16, 
             color="peru")+
  geom_label_repel(aes(label=Type)) +
  labs(x="Tetrahydrocortisone (mg/24hours)",
       y="Pregnanetriol (mg/24hours)") +
  scale_color_manual(values=c('dodgerblue',
                              'indianred',
                              'forestgreen',
                              'goldenrod'))+
  theme_ipsum()+
  theme(legend.position='top')
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列

還沒找到怎麼換主題 theme_economist()