Fifty male and fifty female students fill out the same questionnaire in weekly intervals starting five weeks before an important examination to measure state anxiety.
The research interests are:
1.whether there are gender difference in state anxiety
2.individual differences in state anxiety
Explore the answers to both questions with plots involving confidence intervals or error bars for the means.
Source: Von Eye, A., & Schuster C. (1998). Regression Analysis for Social Sciences. San Diego: Academic Press.
Column 1: Anxiety score 5 weeks before exam for female Column 2: Anxiety score 4 weeks before exam for female Column 3: Anxiety score 3 weeks before exam for female Column 4: Anxiety score 2 weeks before exam for female Column 5: Anxiety score 1 weeks before exam for female Column 6: Anxiety score 5 weeks before exam for male Column 7: Anxiety score 4 weeks before exam for male Column 8: Anxiety score 3 weeks before exam for male Column 9: Anxiety score 2 weeks before exam for male Column 10: Anxiety score 1 weeks before exam for male
# input data
<-read.csv("C:/Users/Ching-Fang Wu/Documents/dataM/stateAnxiety.txt",header=T,sep='') #sep=''以空白鍵分隔
dta1
|> head() |> knitr::kable() dta1
f1 | f2 | f3 | f4 | f5 | m1 | m2 | m3 | m4 | m5 |
---|---|---|---|---|---|---|---|---|---|
13 | 17 | 18 | 20 | 24 | 6 | 14 | 22 | 20 | 24 |
26 | 31 | 33 | 38 | 42 | 4 | 11 | 14 | 12 | 23 |
13 | 17 | 24 | 29 | 32 | 17 | 25 | 26 | 29 | 38 |
22 | 24 | 26 | 27 | 29 | 19 | 22 | 26 | 30 | 34 |
18 | 19 | 19 | 22 | 30 | 12 | 21 | 21 | 23 | 24 |
32 | 31 | 30 | 31 | 32 | 11 | 16 | 20 | 19 | 22 |
str(dta1)
## 'data.frame': 50 obs. of 10 variables:
## $ f1: int 13 26 13 22 18 32 16 18 14 20 ...
## $ f2: int 17 31 17 24 19 31 16 22 17 19 ...
## $ f3: int 18 33 24 26 19 30 21 25 23 23 ...
## $ f4: int 20 38 29 27 22 31 27 29 21 25 ...
## $ f5: int 24 42 32 29 30 32 30 35 25 28 ...
## $ m1: int 6 4 17 19 12 11 14 9 12 11 ...
## $ m2: int 14 11 25 22 21 16 23 18 16 13 ...
## $ m3: int 22 14 26 26 21 20 26 20 23 17 ...
## $ m4: int 20 12 29 30 23 19 29 20 26 14 ...
## $ m5: int 24 23 38 34 24 22 33 24 32 20 ...
typeof(dta1)
## [1] "list"
# 女性考前五周焦慮分數變化(做法參考講義tidy_02 p.4)
matplot(x=c(1, 2, 3, 4, 5),
y=t(dta1[, c(1:5)]),
type='b',
pch=1,
cex=.5,
col='#4428bc',
bty='n',
xlab="Times (Weeks)",
ylab="Anxiety score",
main="Female State Anxiety before Important Examination")
# 男性考前五周焦慮分數變化(做法參考講義tidy_02 p.4)
matplot(x=c(1, 2, 3, 4, 5),
y=t(dta1[, c(6:10)]),
type='b',
pch=1,
cex=.5,
col='#4428bc',
bty='n',
xlab="Times (Weeks)",
ylab="Anxiety score",
main="Male State Anxiety before Important Examination")
不分性別,越接近考試,焦慮分數越高。
# Pivot data from wide to long (tidy_02 p.7)
::p_load(readr, tidyr)
pacman
<- dta1 %>%
dta1_long pivot_longer(cols=starts_with(c("f","m")),
names_to="Gender.Week",
values_to="Score")
#%>%
#arrange(SID)
|> as.data.frame() |> head(11) dta1_long
## Gender.Week Score
## 1 f1 13
## 2 f2 17
## 3 f3 18
## 4 f4 20
## 5 f5 24
## 6 m1 6
## 7 m2 14
## 8 m3 22
## 9 m4 20
## 10 m5 24
## 11 f1 26
library(ggplot2)
#boxplot(dta1)
#
ggplot(dta1_long, aes(x = Gender.Week, y = Score)) +
geom_boxplot()
考前五周,每一周女性焦慮分數都高於男性,且第一周男女焦慮分數差異最大。
#Pull apart a variable - separate
#new_dta1 %<>%
#separate(Gender.Week, c("gender", "week")) %>%
#mutate(ID=paste0("s", 101:200))
# mutate(IS=parse_number(id)) %>%
# dplyr::select(-c(prefix, week))
# female
#dta1_f<-dta1[,1:5]
# add female ID
#pacman::p_load(magrittr,tidyverse,dplyr)
#dta1_f <- as_tibble(dta1_f) %>%
#mutate(SID=paste0("S", 101:150))
# mutate(SID=paste0("f", 101:150)) %>%
# rename(W1=f1,W2=f2,W3=f3,W4=f4,W5=f5)
# look first 6 rowa
#head(dta1_f)
# male
#dta1_m<-dta1[,6:10]
# add male ID
#dta1_m <- as_tibble(dta1_m) %>%
#mutate(SID=paste0("S", 151:200))
# mutate(SID=paste0("m", 101:150)) %>%
# rename(W1=m1,W2=m2,W3=m3,W4=m4,W5=m5)
# look first 6 rowa
#head(dta1_m)
# 合併男女資料
#rbind_dta1<-rbind(dta1_m, dta1_f, by = "SID")
#tail(rbind_dta1) #不懂為何會多出一列
#new_dta1<-rbind_dta1[-101,] #手動刪除
#head(new_dta1)
#tail(new_dta1)
The dataset consists of a sample of 14 primary school children between 8 and 12 years old. The children were asked to respond on 8 emotions and coping strategies scales for each of 6 situations: fail to fulfill assingments in class, not allowed to play with other children, forbidden to do something by the teacher, victim of bullying, too much school work, forbidden to do something by the mother.
Plot the data in some meaningful ways. You may have to manipulate data into a different format first.
Column 1: Unpleasant (Annoy) Column 2: Sad Column 3: Afraid Column 4: Angry Column 5: Approach coping Column 6: Avoidant coping Column 7: Social support seeking Column 8: Emotional reaction, especially agression Column 9: Situation ID Column 10: Children ID
Source: Roeder, I., Boekaerts, M., & Kroonenberg, P. M. (2002). The stress and coping questionnaire for children (School version and Asthma version): Construction, factor structure, and psychometric properties. Psychological Reports, 91, 29-36.
# input data
<-read.csv("C:/Users/Ching-Fang Wu/Documents/dataM/coping.txt",header=T,sep='')
dta3 |> head() |> knitr::kable() dta3
annoy | sad | afraid | angry | approach | avoid | support | agressive | situation | sbj |
---|---|---|---|---|---|---|---|---|---|
4 | 2 | 2 | 2 | 1.00 | 2.00 | 1.00 | 2.50 | Fail | S2 |
4 | 4 | 4 | 2 | 4.00 | 3.00 | 1.25 | 1.50 | NoPart | S2 |
2 | 2 | 2 | 2 | 2.67 | 3.00 | 1.00 | 2.33 | TeacNo | S2 |
4 | 3 | 4 | 4 | 4.00 | 1.50 | 3.25 | 1.00 | Bully | S2 |
4 | 2 | 1 | 1 | 1.00 | 2.75 | 1.25 | 1.50 | Work | S2 |
4 | 3 | 1 | 4 | 2.33 | 2.50 | 1.00 | 3.67 | MomNo | S2 |
str(dta3)
## 'data.frame': 84 obs. of 10 variables:
## $ annoy : int 4 4 2 4 4 4 3 3 3 4 ...
## $ sad : int 2 4 2 3 2 3 2 1 1 4 ...
## $ afraid : int 2 4 2 4 1 1 2 1 1 2 ...
## $ angry : int 2 2 2 4 1 4 2 2 2 1 ...
## $ approach : num 1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
## $ avoid : num 2 3 3 1.5 2.75 2.5 1 4 1 4 ...
## $ support : num 1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
## $ agressive: num 2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
## $ situation: chr "Fail" "NoPart" "TeacNo" "Bully" ...
## $ sbj : chr "S2" "S2" "S2" "S2" ...
typeof(dta3)
## [1] "list"
# install tools
::p_load(dplyr, magrittr)
pacman
# rename
%<>%
dta3 ::rename(Annoy=annoy, Sad=sad, Afraid=afraid, Angry=angry, Approach=approach,
dplyrAvoid=avoid, Support=support,Agressive=agressive,
Situation=situation,Sbj=sbj)
Use the USPersonalExpenditure{datasets} for this problem.
This data set consists of United States personal expenditures (in billions of dollars) in the categories; food and tobacco, household operation, medical and health, personal care, and private education for the years 1940, 1945, 1950, 1955 and 1960.
Plot the US personal expenditure data in the style of the third plot on the “Time Use” case study in the course web page.
You might want to transform the dollar amounts to log base 10 unit first.
# input data
data(USPersonalExpenditure,package = "datasets")
<-USPersonalExpenditure
dta4# look first 6 row
|> head() |> knitr::kable() dta4
1940 | 1945 | 1950 | 1955 | 1960 | |
---|---|---|---|---|---|
Food and Tobacco | 22.200 | 44.500 | 59.60 | 73.2 | 86.80 |
Household Operation | 10.500 | 15.500 | 29.00 | 36.5 | 46.20 |
Medical and Health | 3.530 | 5.760 | 9.71 | 14.0 | 21.10 |
Personal Care | 1.040 | 1.980 | 2.45 | 3.4 | 5.40 |
Private Education | 0.341 | 0.974 | 1.80 | 2.6 | 3.64 |
str(dta4)
## num [1:5, 1:5] 22.2 10.5 3.53 1.04 0.341 44.5 15.5 5.76 1.98 0.974 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5] "Food and Tobacco" "Household Operation" "Medical and Health" "Personal Care" ...
## ..$ : chr [1:5] "1940" "1945" "1950" "1955" ...
typeof(dta4)
## [1] "double"
# transform the dollar amounts to log base 10 unit
<-log10(dta4)
log_dta4 log_dta4
## 1940 1945 1950 1955 1960
## Food and Tobacco 1.34635297 1.64836001 1.7752463 1.8645111 1.9385197
## Household Operation 1.02118930 1.19033170 1.4623980 1.5622929 1.6646420
## Medical and Health 0.54777471 0.76042248 0.9872192 1.1461280 1.3242825
## Personal Care 0.01703334 0.29666519 0.3891661 0.5314789 0.7323938
## Private Education -0.46724562 -0.01144104 0.2552725 0.4149733 0.5611014
取log10後會壓縮變數的尺度,但變數之間的相關係數不會改變。
library(reshape2)
## Warning: 套件 'reshape2' 是用 R 版本 4.1.2 來建造的
##
## 載入套件:'reshape2'
## 下列物件被遮斷自 'package:tidyr':
##
## smiths
# wide to long
<- melt(log_dta4)
time_dta4 names(time_dta4) <- c("Item", "Year", "Dollar")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## 載入套件:'plyr'
## 下列物件被遮斷自 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
# Compute excess money
<- ddply(time_dta4, "Item", transform, excess = Dollar - mean(Dollar))
time_dta4 # show it
time_dta4
## Item Year Dollar excess
## 1 Food and Tobacco 1940 1.34635297 -0.368245036
## 2 Food and Tobacco 1945 1.64836001 -0.066237999
## 3 Food and Tobacco 1950 1.77524626 0.060648249
## 4 Food and Tobacco 1955 1.86451108 0.149913071
## 5 Food and Tobacco 1960 1.93851973 0.223921715
## 6 Household Operation 1940 1.02118930 -0.358981468
## 7 Household Operation 1945 1.19033170 -0.189839069
## 8 Household Operation 1950 1.46239800 0.082227231
## 9 Household Operation 1955 1.56229286 0.182122097
## 10 Household Operation 1960 1.66464198 0.284471209
## 11 Medical and Health 1940 0.54777471 -0.405390677
## 12 Medical and Health 1945 0.76042248 -0.192742899
## 13 Medical and Health 1950 0.98721923 0.034053848
## 14 Medical and Health 1955 1.14612804 0.192962654
## 15 Medical and Health 1960 1.32428246 0.371117073
## 16 Personal Care 1940 0.01703334 -0.376314119
## 17 Personal Care 1945 0.29666519 -0.096682268
## 18 Personal Care 1950 0.38916608 -0.004181374
## 19 Personal Care 1955 0.53147892 0.138131459
## 20 Personal Care 1960 0.73239376 0.339046302
## 21 Private Education 1940 -0.46724562 -0.617777736
## 22 Private Education 1945 -0.01144104 -0.161973158
## 23 Private Education 1950 0.25527251 0.104740391
## 24 Private Education 1955 0.41497335 0.264441233
## 25 Private Education 1960 0.56110138 0.410569269
#
library(ggplot2)
#
library(plyr)
#
qplot(Item, excess, data = time_dta4) +
geom_hline(yintercept = 0, colour = "grey50") +
geom_line(aes(group = 1)) +
facet_wrap(~ Year)
#
qplot(excess, Item, data = time_dta4) +
geom_segment(aes(xend = 0, yend = Item)) +
geom_vline(xintercept = 0, colour = "grey50") +
facet_wrap(~ Year, nrow = 1)
Use the Cushings{MASS} data set to generate a plot similar to the following one:
# input data
data(Cushings,package = "MASS")
<-Cushings
dta5
# look first 6 row
|> head() |> knitr::kable() dta5
Tetrahydrocortisone | Pregnanetriol | Type | |
---|---|---|---|
a1 | 3.1 | 11.70 | a |
a2 | 3.0 | 1.30 | a |
a3 | 1.9 | 0.10 | a |
a4 | 3.8 | 0.04 | a |
a5 | 4.1 | 1.10 | a |
a6 | 1.9 | 0.40 | a |
str(dta5)
## 'data.frame': 27 obs. of 3 variables:
## $ Tetrahydrocortisone: num 3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
## $ Pregnanetriol : num 11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
## $ Type : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
library(ggplot2)
library(ggrepel)
::p_load(hrbrthemes)
pacman#Extensions - ggrepel(講義p.41)
set.seed(20200410)
ggplot(dta5, aes(Pregnanetriol,
Tetrahydrocortisone, color=Type)) +
geom_point(pch=16,
color="peru")+
geom_label_repel(aes(label=Type)) +
labs(x="Tetrahydrocortisone (mg/24hours)",
y="Pregnanetriol (mg/24hours)") +
scale_color_manual(values=c('dodgerblue',
'indianred',
'forestgreen',
'goldenrod'))+
theme_ipsum()+
theme(legend.position='top')
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): Windows 字型資
## 料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## Windows 字型資料庫裡不明的字型系列
還沒找到怎麼換主題 theme_economist()