Fifty male and fifty female students fill out the same questionnaire in weekly intervals starting five weeks before an important examination to measure state anxiety. The research interests are: 1. whether there are gender difference in state anxiety 2. individual differences in state anxiety Explore the answers to both questions with plots involving confidence intervals or error bars for the means.
Source: Von Eye, A., & Schuster C. (1998). Regression Analysis for Social Sciences. San Diego: Academic Press.
Column 1: Anxiety score 5 weeks before exam for female Column 2: Anxiety score 4 weeks before exam for female Column 3: Anxiety score 3 weeks before exam for female Column 4: Anxiety score 2 weeks before exam for female Column 5: Anxiety score 1 weeks before exam for female Column 6: Anxiety score 5 weeks before exam for male Column 7: Anxiety score 4 weeks before exam for male Column 8: Anxiety score 3 weeks before exam for male Column 9: Anxiety score 2 weeks before exam for male Column 10: Anxiety score 1 weeks before exam for male
::p_load(dplyr, tidyr, readr, ggplot2)
pacman<- read.table("stateAnxiety.txt", header = T)
dta1 head(dta1)
f1 f2 f3 f4 f5 m1 m2 m3 m4 m5
1 13 17 18 20 24 6 14 22 20 24
2 26 31 33 38 42 4 11 14 12 23
3 13 17 24 29 32 17 25 26 29 38
4 22 24 26 27 29 19 22 26 30 34
5 18 19 19 22 30 12 21 21 23 24
6 32 31 30 31 32 11 16 20 19 22
str(dta1)
'data.frame': 50 obs. of 10 variables:
$ f1: int 13 26 13 22 18 32 16 18 14 20 ...
$ f2: int 17 31 17 24 19 31 16 22 17 19 ...
$ f3: int 18 33 24 26 19 30 21 25 23 23 ...
$ f4: int 20 38 29 27 22 31 27 29 21 25 ...
$ f5: int 24 42 32 29 30 32 30 35 25 28 ...
$ m1: int 6 4 17 19 12 11 14 9 12 11 ...
$ m2: int 14 11 25 22 21 16 23 18 16 13 ...
$ m3: int 22 14 26 26 21 20 26 20 23 17 ...
$ m4: int 20 12 29 30 23 19 29 20 26 14 ...
$ m5: int 24 23 38 34 24 22 33 24 32 20 ...
<- dta1 |>
dtaL mutate(ID = as.factor(paste0("S", 1:50))) |>
pivot_longer(cols = -ID, names_to = "week", values_to = "Score") |>
mutate(
Gender = as.factor(substr(week,1,1)),
Week=parse_number(week))|>
::select(-c(week)) |>
dplyr::select("ID", "Gender", "Week", "Score")
dplyrhead(dtaL)
# A tibble: 6 x 4
ID Gender Week Score
<fct> <fct> <dbl> <int>
1 S1 f 1 13
2 S1 f 2 17
3 S1 f 3 18
4 S1 f 4 20
5 S1 f 5 24
6 S1 m 1 6
str(dtaL)
tibble [500 x 4] (S3: tbl_df/tbl/data.frame)
$ ID : Factor w/ 50 levels "S1","S10","S11",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Gender: Factor w/ 2 levels "f","m": 1 1 1 1 1 2 2 2 2 2 ...
$ Week : num [1:500] 1 2 3 4 5 1 2 3 4 5 ...
$ Score : int [1:500] 13 17 18 20 24 6 14 22 20 24 ...
<-position_dodge(.2)
pd ggplot(dtaL, aes(x=Week, y=Score, group= Gender, color= Gender))+
geom_point(position=pd)+
stat_smooth(aes(color=Gender), method="lm", se=T)+
labs(x="Weeks bafore an important examination", y="Anxiety score")+
theme_bw()
女性相對男性在重要考試前有較高的焦慮分數
ggplot(dtaL, aes(x=Week, y=Score))+
geom_point()+
stat_smooth(aes(group=ID, color=ID), method = "lm", se=F)+
facet_grid(.~Gender)+
labs(x="Weeks bafore an important examination", y="Anxiety score")+
theme_bw()+
theme(legend.position="")
男性和女性在焦慮分數上初始值(截距)不同,其中女性每個人的斜率變化有較明顯差異,男性的斜率倒是比較一致。
Use the markdown file to replicate the contents of Weissgerber, T.L., Milic, N.M., Winham, S.J., Garovic, V.D. (2015). Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm. PLOS Biology , 13. The two data sets are here provided: journal.pbio.1002128.s002.XLS and journal.pbio.1002128.s003.XLS . You can also download everything in a zip file from this location.
library(readxl)
# read in data from PLOS Biology article supplementary materials
<- read_excel("journal.pbio.1002128.s002.XLSX", sheet = 1)
independent_data # subset just groups 1-5 from the 'No overlapping points' sheet
<- independent_data[15:30,2:6]
independent_data # assign column names
names(independent_data) <- independent_data[1, ]
# remove first row with column names
<- independent_data[-1, ]
independent_data ::kable(independent_data) knitr
Group 1 | Group 2 | Group 3 | Group 4 | Group 5 |
---|---|---|---|---|
5 | 7 | 9 | 42 | 2 |
3 | 3 | 7 | 2 | 0 |
6 | 9 | 10 | 5 | 3 |
8 | 10 | 12 | 55 | 5 |
10 | 33 | 14 | 9 | 7 |
13 | 15 | 17 | 12 | 10 |
1 | 18 | 20 | 15 | 13 |
4 | 6 | 40 | 3 | 1 |
18 | 20 | 22 | NA | 15 |
4 | 30 | 35 | NA | 1 |
7 | NA | 42 | NA | 4 |
9 | NA | 13 | NA | 6 |
14 | NA | NA | NA | 11 |
15 | NA | NA | NA | 12 |
17 | NA | NA | NA | 14 |
# reshape for plotting
library(tidyr)
<- gather(independent_data, group, value, `Group 1`:`Group 5`, convert = TRUE)
independent_data_long ::kable(independent_data_long) knitr
group | value |
---|---|
Group 1 | 5 |
Group 1 | 3 |
Group 1 | 6 |
Group 1 | 8 |
Group 1 | 10 |
Group 1 | 13 |
Group 1 | 1 |
Group 1 | 4 |
Group 1 | 18 |
Group 1 | 4 |
Group 1 | 7 |
Group 1 | 9 |
Group 1 | 14 |
Group 1 | 15 |
Group 1 | 17 |
Group 2 | 7 |
Group 2 | 3 |
Group 2 | 9 |
Group 2 | 10 |
Group 2 | 33 |
Group 2 | 15 |
Group 2 | 18 |
Group 2 | 6 |
Group 2 | 20 |
Group 2 | 30 |
Group 2 | NA |
Group 2 | NA |
Group 2 | NA |
Group 2 | NA |
Group 2 | NA |
Group 3 | 9 |
Group 3 | 7 |
Group 3 | 10 |
Group 3 | 12 |
Group 3 | 14 |
Group 3 | 17 |
Group 3 | 20 |
Group 3 | 40 |
Group 3 | 22 |
Group 3 | 35 |
Group 3 | 42 |
Group 3 | 13 |
Group 3 | NA |
Group 3 | NA |
Group 3 | NA |
Group 4 | 42 |
Group 4 | 2 |
Group 4 | 5 |
Group 4 | 55 |
Group 4 | 9 |
Group 4 | 12 |
Group 4 | 15 |
Group 4 | 3 |
Group 4 | NA |
Group 4 | NA |
Group 4 | NA |
Group 4 | NA |
Group 4 | NA |
Group 4 | NA |
Group 4 | NA |
Group 5 | 2 |
Group 5 | 0 |
Group 5 | 3 |
Group 5 | 5 |
Group 5 | 7 |
Group 5 | 10 |
Group 5 | 13 |
Group 5 | 1 |
Group 5 | 15 |
Group 5 | 1 |
Group 5 | 4 |
Group 5 | 6 |
Group 5 | 11 |
Group 5 | 12 |
Group 5 | 14 |
# Plotting groups 1, 2
library(ggplot2)
library(dplyr)
# subset groups 1 & 2
<- independent_data_long %>%
independent_data_long_groups_1_and_2 filter(group %in% c("Group 1", "Group 2"))
# plot
ggplot(independent_data_long_groups_1_and_2, aes(group, as.numeric(value))) +
geom_point(shape = 1, size = 4) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# Plotting groups 1, 2 & 3
<- independent_data_long %>%
independent_data_long_groups_1_2_3 filter(group %in% c("Group 1", "Group 2", "Group 3"))
# plot
ggplot(independent_data_long_groups_1_2_3, aes(group, as.numeric(value))) +
geom_point(shape = 1, size = 4) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# groups 1, 2, 3, & 4
<- independent_data_long %>%
independent_data_long_groups_1_2_3_4 filter(group %in% c("Group 1", "Group 2", "Group 3", "Group 4"))
# plot
ggplot(independent_data_long_groups_1_2_3_4, aes(group, as.numeric(value))) +
geom_point(shape = 1, size = 4) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# all five groups -no subsetting required
ggplot(independent_data_long, aes(group, as.numeric(value))) +
geom_point(shape = 1, size = 4) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
library(readxl)
# read in data from PLOS Biology article supplementary materials
<- read_excel("journal.pbio.1002128.s002.XLSX", sheet = 2)
independent_data_j # subset data from the 'points jittered' sheet
<- independent_data_j[16:115,2:3]
independent_data_j # group numbers are not given in the spreadsheet, so we'll add them
$Groups <- c(rep(1, 20), rep(2, 20), rep(3, 20), rep(4, 20), rep(5, 20))
independent_data_j# assign column names
names(independent_data_j) <- c("Subject ID", "Measurement", "Group")
# plot
library(ggplot2)
library(dplyr)
# groups 1 & 2
<- independent_data_j %>%
independent_data_j_groups_1_and_2 filter(Group %in% 1:2)
# plot
ggplot(independent_data_j_groups_1_and_2, aes(as.factor(Group), as.numeric(Measurement))) +
geom_jitter(shape = 1, size = 4, position=position_jitter(width = 0.2, height = 0.2)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# groups 1, 2 & 3
<- independent_data_j %>%
independent_data_j_groups_1_2_3 filter(Group %in% 1:3)
# plot
ggplot(independent_data_j_groups_1_2_3, aes(as.factor(Group), as.numeric(Measurement))) +
geom_jitter(shape = 1, size = 4, position=position_jitter(width = 0.2, height = 0.2)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# groups 1, 2, 3, & 4
<- independent_data_j %>%
independent_data_j_groups_1_2_3_4 filter(Group %in% 1:4)
# plot
ggplot(independent_data_j_groups_1_2_3_4, aes(as.factor(Group), as.numeric(Measurement))) +
geom_jitter(shape = 1, size = 4, position=position_jitter(width = 0.2, height = 0.2)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
# all five groups
ggplot(independent_data_j, aes(as.factor(Group), as.numeric(Measurement))) +
geom_jitter(shape = 1, size = 4, position=position_jitter(width = 0.2, height = 0.2)) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, size = 1) +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16)
library(readxl)
# read in data from PLOS Biology article supplementary materials
<- read_excel("journal.pbio.1002128.s003.XLS", sheet = 1)
One_group_two_conditions # subset data from the 'points jittered' sheet
<- One_group_two_conditions[12:23,1:3]
One_group_two_conditions # assign column names
names(One_group_two_conditions) <- c("Subject ID", "Condition 1 Name", "Condition 2 Name")
$difference <- as.numeric(One_group_two_conditions$`Condition 2 Name`) - as.numeric(One_group_two_conditions$`Condition 1 Name`) One_group_two_conditions
# reshape for plotting
library(tidyr)
<- gather(One_group_two_conditions, group, value, `Condition 1 Name`:`Condition 2 Name`, -`Subject ID`, -difference, convert = TRUE) One_group_two_conditions_long
# plot
library(ggplot2)
library(gridExtra)
<- ggplot(One_group_two_conditions_long, aes(group, as.numeric(value), group = `Subject ID`)) +
g1 geom_point(shape = 1, size = 4) +
geom_line() +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# differences
<- ggplot(One_group_two_conditions_long, aes(x = 1, y = difference)) +
g2 geom_point(shape = 1, size = 4) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.001, size = 1) +
xlab("") +
ylab("Difference in Measurement (units)") +
theme_minimal(base_size = 16) +
scale_x_continuous(breaks = NULL) +
coord_fixed(ratio = 0.0005)
# combine the two plots
grid.arrange(g1, g2, ncol = 2)
library(readxl)
# read in data from PLOS Biology article supplementary materials
<- read_excel("journal.pbio.1002128.s003.XLS", sheet = 2)
Two_groups_two_conditions # subset data from the 'points jittered' sheet
<- Two_groups_two_conditions[12:41,2:5]
Two_groups_two_conditions # assign group names
$group <- c(rep("Group 1 Name", 15), rep("Group 2 Name", 15))
Two_groups_two_conditionsnames(Two_groups_two_conditions) <- c("Condition 1 Name A", "Condition 2 Name A", "Condition 1 Name B", "Condition 2 Name B")
# convert to simple long form
1] <- unlist(c(Two_groups_two_conditions[1:15,1], Two_groups_two_conditions[16:30,3]))
Two_groups_two_conditions[,2] <- unlist(c(Two_groups_two_conditions[1:15,2], Two_groups_two_conditions[16:30,4]))
Two_groups_two_conditions[,# drop unneeded columns
<- Two_groups_two_conditions[,c(1:2, 5)]
Two_groups_two_conditions # assign column names
names(Two_groups_two_conditions) <- c("Condition 1", "Condition 2", "Group")
$`Subject ID` <- 1:30
Two_groups_two_conditions# compute differences
$difference <- as.numeric(Two_groups_two_conditions$`Condition 2`) - as.numeric(Two_groups_two_conditions$`Condition 1`)
Two_groups_two_conditions
# convert to long again
library(tidyr)
<- gather(Two_groups_two_conditions, condition, value, c(`Condition 1`, `Condition 2`), convert = TRUE)
Two_groups_two_conditions_long ::kable(Two_groups_two_conditions_long) knitr
Group | Subject ID | difference | condition | value |
---|---|---|---|---|
Group 1 Name | 1 | 8 | Condition 1 | 5 |
Group 1 Name | 2 | 4 | Condition 1 | 1 |
Group 1 Name | 3 | 5 | Condition 1 | 7 |
Group 1 Name | 4 | 2 | Condition 1 | 9 |
Group 1 Name | 5 | 7 | Condition 1 | 2 |
Group 1 Name | 6 | -1 | Condition 1 | 6 |
Group 1 Name | 7 | 1 | Condition 1 | 4 |
Group 1 Name | 8 | 3 | Condition 1 | 11 |
Group 1 Name | 9 | -2 | Condition 1 | 14 |
Group 1 Name | 10 | 6 | Condition 1 | 13 |
Group 1 Name | 11 | NA | Condition 1 | NA |
Group 1 Name | 12 | NA | Condition 1 | NA |
Group 1 Name | 13 | NA | Condition 1 | NA |
Group 1 Name | 14 | NA | Condition 1 | NA |
Group 1 Name | 15 | NA | Condition 1 | NA |
Group 2 Name | 16 | -2 | Condition 1 | 20 |
Group 2 Name | 17 | -4 | Condition 1 | 13 |
Group 2 Name | 18 | 1 | Condition 1 | 15 |
Group 2 Name | 19 | 5 | Condition 1 | 8 |
Group 2 Name | 20 | 2 | Condition 1 | 3 |
Group 2 Name | 21 | 1 | Condition 1 | 7 |
Group 2 Name | 22 | -7 | Condition 1 | 14 |
Group 2 Name | 23 | 0 | Condition 1 | 12 |
Group 2 Name | 24 | 3 | Condition 1 | 11 |
Group 2 Name | 25 | 1 | Condition 1 | 9 |
Group 2 Name | 26 | NA | Condition 1 | NA |
Group 2 Name | 27 | NA | Condition 1 | NA |
Group 2 Name | 28 | NA | Condition 1 | NA |
Group 2 Name | 29 | NA | Condition 1 | NA |
Group 2 Name | 30 | NA | Condition 1 | NA |
Group 1 Name | 1 | 8 | Condition 2 | 13 |
Group 1 Name | 2 | 4 | Condition 2 | 5 |
Group 1 Name | 3 | 5 | Condition 2 | 12 |
Group 1 Name | 4 | 2 | Condition 2 | 11 |
Group 1 Name | 5 | 7 | Condition 2 | 9 |
Group 1 Name | 6 | -1 | Condition 2 | 5 |
Group 1 Name | 7 | 1 | Condition 2 | 5 |
Group 1 Name | 8 | 3 | Condition 2 | 14 |
Group 1 Name | 9 | -2 | Condition 2 | 12 |
Group 1 Name | 10 | 6 | Condition 2 | 19 |
Group 1 Name | 11 | NA | Condition 2 | NA |
Group 1 Name | 12 | NA | Condition 2 | NA |
Group 1 Name | 13 | NA | Condition 2 | NA |
Group 1 Name | 14 | NA | Condition 2 | NA |
Group 1 Name | 15 | NA | Condition 2 | NA |
Group 2 Name | 16 | -2 | Condition 2 | 18 |
Group 2 Name | 17 | -4 | Condition 2 | 9 |
Group 2 Name | 18 | 1 | Condition 2 | 16 |
Group 2 Name | 19 | 5 | Condition 2 | 13 |
Group 2 Name | 20 | 2 | Condition 2 | 5 |
Group 2 Name | 21 | 1 | Condition 2 | 8 |
Group 2 Name | 22 | -7 | Condition 2 | 7 |
Group 2 Name | 23 | 0 | Condition 2 | 12 |
Group 2 Name | 24 | 3 | Condition 2 | 14 |
Group 2 Name | 25 | 1 | Condition 2 | 10 |
Group 2 Name | 26 | NA | Condition 2 | NA |
Group 2 Name | 27 | NA | Condition 2 | NA |
Group 2 Name | 28 | NA | Condition 2 | NA |
Group 2 Name | 29 | NA | Condition 2 | NA |
Group 2 Name | 30 | NA | Condition 2 | NA |
# plot
library(ggplot2)
<- ggplot(Two_groups_two_conditions_long, aes(condition, as.numeric(value), group = `Subject ID`)) +
g1 geom_point(size = 4, shape = 1) +
geom_line() +
xlab("") +
ylab("Measurement (units)") +
theme_minimal(base_size = 16) +
facet_grid(~Group) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# difference
<- ggplot(Two_groups_two_conditions_long, aes(Group, difference)) +
g2 geom_point(size = 4, shape = 1) +
stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.3) +
xlab("") +
ylab("Difference in Measurement (units)") +
theme_minimal(base_size = 16) +
coord_fixed(ratio = 0.15)
# combine the two plots
grid.arrange(g1, g2, ncol = 2)
The dataset consists of a sample of 14 primary school children between 8 and 12 years old. The children were asked to respond on 8 emotions and coping strategies scales for each of 6 situations: fail to fulfill assingments in class, not allowed to play with other children, forbidden to do something by the teacher, victim of bullying, too much school work, forbidden to do something by the mother. Plot the data in some meaningful ways. You may have to manipulate data into a different format first.
Column 1: Unpleasant (Annoy) Column 2: Sad Column 3: Afraid Column 4: Angry Column 5: Approach coping Column 6: Avoidant coping Column 7: Social support seeking Column 8: Emotional reaction, especially agression Column 9: Situation ID Column 10: Children ID Source: Roeder, I., Boekaerts, M., & Kroonenberg, P. M. (2002). The stress and coping questionnaire for children (School version and Asthma version): Construction, factor structure, and psychometric properties. Psychological Reports, 91, 29-36.
::p_load(reshape)
pacman<- read.table("coping.txt", header = T)
dta3 head(dta3)
annoy sad afraid angry approach avoid support agressive situation sbj
1 4 2 2 2 1.00 2.00 1.00 2.50 Fail S2
2 4 4 4 2 4.00 3.00 1.25 1.50 NoPart S2
3 2 2 2 2 2.67 3.00 1.00 2.33 TeacNo S2
4 4 3 4 4 4.00 1.50 3.25 1.00 Bully S2
5 4 2 1 1 1.00 2.75 1.25 1.50 Work S2
6 4 3 1 4 2.33 2.50 1.00 3.67 MomNo S2
str(dta3)
'data.frame': 84 obs. of 10 variables:
$ annoy : int 4 4 2 4 4 4 3 3 3 4 ...
$ sad : int 2 4 2 3 2 3 2 1 1 4 ...
$ afraid : int 2 4 2 4 1 1 2 1 1 2 ...
$ angry : int 2 2 2 4 1 4 2 2 2 1 ...
$ approach : num 1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
$ avoid : num 2 3 3 1.5 2.75 2.5 1 4 1 4 ...
$ support : num 1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
$ agressive: num 2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
$ situation: chr "Fail" "NoPart" "TeacNo" "Bully" ...
$ sbj : chr "S2" "S2" "S2" "S2" ...
names(dta3)<-c("Annoy", "Sad", "Afraid", "Angry",
"Approach", "Avoidance", "Socialsupport", "Aggression",
"Situation", "ChildrenID")
# wide to long
<- dta3|>
dta3L ::melt(id = c("ChildrenID", "Situation"),
reshapevariable_name = "Emotion")
::p_load(hrbrthemes,ggridges)
pacmanggplot(dta3L, aes(value,
color=Emotion,
fill=Emotion)) +
geom_density(alpha=.1,
bw="SJ-ste") +
theme_ipsum() +
theme(legend.position=c(.7,.6))
<- unlist(lapply(split(dta3L, dta3L$Emotion),
bw function(x) bw.SJ(x$value)))
ggplot(dta3L, aes(x=value,
y=Emotion,
color=Emotion,
fill=Emotion)) +
geom_density_ridges(bandwidth=bw, alpha=.5) +
theme_ridges()
bw.SJ implements the methods of Sheather & Jones (1991) to select the bandwidth using pilot estimation of derivatives.
<-position_dodge(.1)#避免重疊
pd <- dta3L|>
p ::group_by(Situation,Emotion) |> summarise(value=mean(value, na.rm=T)) |>as.data.frame() |>
dplyr::rename(m_value = value) |>
dplyr::unite(catg, Situation, Emotion) |>
tidyrggplot( ) +
aes(x=m_value,
y=reorder(catg, m_value)) +
geom_point(size=rel(1)) +
geom_segment(aes(xend=mean(dta3L$value),
yend=reorder(catg, m_value)),
position = pd)+
labs(x="score",
y="Situation, Emotion") +
theme_minimal()
+guides(y = guide_axis(n.dodge = 1))+theme(axis.text=element_text(size=5)) p
Use the USPersonalExpenditure{datasets} for this problem. This data set consists of United States personal expenditures (in billions of dollars) in the categories; food and tobacco, household operation, medical and health, personal care, and private education for the years 1940, 1945, 1950, 1955 and 1960. Plot the US personal expenditure data in the style of the third plot on the “Time Use” case study in the course web page. You might want to transform the dollar amounts to log base 10 unit first.
::p_load(tidyr)
pacman<- datasets::USPersonalExpenditure
dta4 class(dta4)
[1] "matrix" "array"
<- melt(dta4)
dta4L names(dta4L) <- c("Category", "Year", "Expenditure")
<- dta4L |>
dta4L mutate(logExp = log10(Expenditure))
ggplot(dta4L, aes(x=Year, y=logExp)) +
geom_point()+
geom_line()+
geom_hline(yintercept = 0, colour = "grey25",linetype="dashed") +
facet_wrap(.~Category, ncol=5)+
labs(x="Year",
y="log Expenditure (original in billion)")+
theme(axis.text.x=element_text(angle=45, hjust=1))
ggplot(dta4L, aes(Category, logExp)) +
# geom_point() +
geom_boxplot(col='gray') +
geom_point() +
facet_wrap(. ~ Year, ncol=5) +
theme(axis.text.x=element_text(angle=45, hjust=1))
Use the Cushings{MASS} data set to generate a plot similar to the following one:
::p_load(ggrepel, ggthemes)
pacman
<- MASS::Cushings
dta5 str(dta5)
'data.frame': 27 obs. of 3 variables:
$ Tetrahydrocortisone: num 3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
$ Pregnanetriol : num 11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
$ Type : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...
$Type <- factor(dta5$Type,
dta5levels = c("u", "b", "c", "a"),
labels = c("Unknown",
"Bilateral Hyperplasia",
"Carcinoma",
"Adenoma"))
# for geom_text_repel() lable
<- subset(dta5, row.names(dta5) %in% c("a1","b1","c1","u1")) dta5v
ggplot(dta5, aes(Tetrahydrocortisone, Pregnanetriol, fill = Type, color = Type))+
geom_point(pch = 21, size = rel(2))+
# add point label (annotation is an alternative)
geom_text_repel(dta5v, mapping=aes(label = Type))+
# setting the breaks in x and y axis
scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10, 12))+
scale_x_continuous(limits = c(0, 60), breaks = c(0, 10, 20, 30, 40, 50, 60))+
# theme
theme_hc()+
# plot and axis title
labs(x = "Tetrahydrocortisone (mg/24 hours)",
y = "Pregnanetriol (mg/24 hours)",
title = "Cushings's syndrome")+
# title 靠右邊
theme(plot.title = element_text(hjust = 1),
legend.position="") # 不顯示legend