# 导入数据
# 如果联网,从Gitee上导入数据,或者下载后导入
library(readr)
ldt_data <- read_csv("https://gitee.com/vv_victorwei/statistics_for_undergraduate/raw/master/dataset/ldt_data.csv")
## Rows: 100 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): id
## dbl (6): age, language, rt_word, rt_nonword, acc_word, acc_nonword
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 导入ggplot2
library(ggplot2)
# 如果想要在统计图中使用中文,可以导入显示汉字的库
library(showtext)
## Warning: 程辑包'showtext'是用R版本4.3.1 来建造的
## 载入需要的程辑包:sysfonts
## Warning: 程辑包'sysfonts'是用R版本4.3.1 来建造的
## 载入需要的程辑包:showtextdb
## Warning: 程辑包'showtextdb'是用R版本4.3.1 来建造的
showtext_auto()
#此处自动使用字体,如果想要修改字体,可以参考:https://rpubs.com/maomaoworm/679092
# 数据变量描述
# •Participant information:
# – id: participant ID
# – age: age
# • One between-subjects independent variable (IV):
# – language: language group (1 = monolingual,
# 2 = bilingual)
# • Four columns for the two dependent variables
# (DVs) of reaction time (RT) and accuracy, crossed
# by the within-subjects IV of condition:
# – rt_word: reaction time (milliseconds) for
# word trials
# – rt_nonword: reaction time (milliseconds) for
# nonword trials
# – acc_word: accuracy for word trials
# – acc_nonword: accuracy for nonword trials.
# 修改language的变量类型为factor
ldt_data$language<-factor(ldt_data$language)
# 最基本的散点图,x和y如果按照顺序,可以省略x=和y=
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point()
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point(aes(colour='red'))
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point(shape = 3)
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point(shape = '点')
# 绘制年龄和阅读词语时间的散点图
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point() +
geom_smooth(method = "lm",se=FALSE,colour='red')
## `geom_smooth()` using formula = 'y ~ x'
#如果没有指定直线,会拟合曲线
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point() +
geom_smooth(se=FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# 分组(单语,双语),绘制年龄和阅读词语时间的散点图
ggplot(ldt_data, aes(x = age, y = rt_word, color=language)) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
#修改坐标系的变量名以及分组变量的名称及颜色,使用scale_colour_manual
ggplot(ldt_data, aes(x = age, y = rt_word, color=language)) +
geom_point() +
geom_smooth(method = "lm")+
xlab("年龄")+
ylab("反应时")+
scale_colour_manual(name = "语言类型",labels = c("单语", "双语"),values=c('red','blue'))
## `geom_smooth()` using formula = 'y ~ x'
#使用预先设定的颜色方案,使用scale_color_brewer
ggplot(ldt_data, aes(x = age, y = rt_word, color=language)) +
geom_point() +
geom_smooth(method = "lm")+
xlab("年龄(岁)")+
ylab("反应时(毫秒)")+
scale_color_brewer(palette = "Dark2",name = "语言类型",labels = c("单语", "双语"))
## `geom_smooth()` using formula = 'y ~ x'
#采用多个面板呈现多个相似图
# ~language表明分组/板变量,labeller修改分组变量水平(为了避免这种修改,可以将变量值直接设定为单语和双语)
ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point() +
geom_smooth(method = "lm")+
xlab("年龄(岁)")+
ylab("反应时(毫秒)")+
facet_wrap(~language,
labeller = labeller(language = c('1' = "单语",'2' = "双语")))
## `geom_smooth()` using formula = 'y ~ x'
#采用patchwork直接用+进行展示
library(patchwork)
## Warning: 程辑包'patchwork'是用R版本4.3.2 来建造的
p1<-ggplot(ldt_data, aes(x = age, y = rt_word, color=language)) +
geom_point() +
geom_smooth(method = "lm")
p2<-ggplot(ldt_data, aes(x = age, y = rt_word)) +
geom_point(shape = '点')
p1+p2+p1+p2
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# 读取在线数据,或下载后导入
library(readxl)
url <- "https://gitee.com/vv_victorwei/statistics_for_undergraduate/raw/master/dataset/shmetro.xlsx"
destfile <- "shmetro.xlsx"
curl::curl_download(url, destfile)
shmetro <- read_excel(destfile, col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
# 修改变量名
names(shmetro)<-c('date','count')
# 绘制线图,在指定时间,上海地铁客运量变化
ggplot(shmetro,aes(date,count))+
geom_line(color='red')
#直方图展示单个变量的分布情况
ggplot(ldt_data, aes(x = rt_word)) +
geom_histogram(binwidth = 10, fill = "white", colour = "black") +
scale_x_continuous(name = "反应时(毫秒)")
ggplot(ldt_data, aes(x = rt_word)) +
scale_x_continuous(name = "反应时(毫秒)")+
geom_freqpoly(binwidth = 10)
#分组呈现直方图
ggplot(ldt_data, aes(x = rt_word,fill = language, color=language)) +
geom_histogram(binwidth = 10,alpha=0.5) +
scale_x_continuous(name = "反应时(毫秒)")
# 使用两个面板
ggplot(ldt_data, aes(x = rt_word)) +
geom_histogram(binwidth = 10, fill = "white", colour = "black") +
facet_wrap(~language)+
scale_x_continuous(name = "反应时(毫秒)")
# x和y采用不同类型变量,一个是discrete离散,一个是连续的。离散可以指定x轴名称,连续可以指定间隔
# 直接一个变量的话,默认统计频次
ggplot(ldt_data,aes(x = language)) +
geom_bar()+
scale_x_discrete(name = "语言类型",
labels = c("单语", "双语"))+
scale_y_continuous(name='数量',
breaks = c(0,10,20,30,40,50))
#可以指定显示language各组的均值
ggplot(ldt_data,aes(x = language,y=rt_word)) +
geom_bar(stat = 'summary_bin',fun=mean)+
ggplot(ldt_data,aes(x = language,y=rt_word)) +
stat_summary(fun = "mean", geom = "bar", size = 3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##箱线图或小提琴图
#单个变量的箱线图
ggplot(ldt_data, aes(rt_word)) + geom_boxplot()
# 分组的箱线图
ggplot(ldt_data, aes(language, rt_word)) + geom_boxplot()
ggplot(ldt_data, aes(language, rt_word)) + geom_violin()
# ggplot一般只对数据直接进行绘图,如提供均值可以绘制图形。但如果需要对原始数据进行计算后的统计量制图,需要进行统计操作
df<-data.frame(age=c(4,5,6,4,5,6),
condition=rep(c('congruent','incongruent'),each=3),
mean=c(1055,704,730,1238,769,803),
sd=c(15,13,15,19,14,13)
)
ggplot(df, aes(x=factor(age), y=mean, color=condition)) +
geom_line(aes(group = condition,colour=condition)) +
geom_point(size=3) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd),width=0.2)+
labs(x="年龄",y="反应时间(ms)")+
scale_y_continuous(limits = c(500,1300))+
scale_x_discrete(labels=c('4岁','5岁','6岁'))+
scale_color_brewer(palette = "Dark2",name = "朝向类型",labels = c("一致", "不一致"))
ggplot(ldt_data, aes(x = language, y = rt_word)) +
stat_summary(fun = "mean", geom = "point", size = 3) +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
scale_x_discrete(name = "语言类型",
labels = c("单语", "双语"))+
scale_y_continuous(name="反应时",
limits = c(330,360))
## Warning: Removed 73 rows containing non-finite values (`stat_summary()`).
## Removed 73 rows containing non-finite values (`stat_summary()`).
# 包含被试间变量和被试内变量的均值和标准误的差异比较图
# 首先需要整理数据,将短宽型转化为长窄型,即被试内变量作为个体的不同水平,以下演示不用tidyverse的做法
# s1 rt_word rt_nonword 改为
# s1 condition1 rt1
# s1 condition2 rt2
# 将被试间变量重复一遍,增加condition变量,分别为word和nonword,然后将两种条件下的rt和acc合并即可
long_data<-data.frame(id=rep(ldt_data$id,2),
age=rep(ldt_data$age,2),
language=rep(ldt_data$language,2),
condition=rep(c('word','nonword'),each=dim(ldt_data)[1]),
rt=c(ldt_data$rt_word,ldt_data$rt_nonword),
acc=c(ldt_data$acc_word,ldt_data$acc_nonword)
)
ggplot(long_data, aes(x = condition, y = rt,
shape = language,
group = language,
color = language)) +
stat_summary(fun = "mean", geom = "point", size = 3) +
stat_summary(fun = "mean", geom = "line") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2) +
scale_color_manual(values = c("blue", "darkorange")) +
scale_x_discrete(name = "语言类型",
labels = c("单语", "双语"))+
theme_classic()
# 对于被试内变量,往往采用折线反映不同水平上的变化,尤其是对于重复测量数据中,使用更频繁
ggplot(long_data, aes(x = condition, y = rt, group = language, shape = language)) +
geom_point(aes(colour = language),alpha = .2) +
geom_line(aes(group = id, colour = language), alpha = .2) +
stat_summary(fun = "mean", geom = "point", size = 2, colour = "black") +
stat_summary(fun = "mean", geom = "line", colour = "black") +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = .2, colour = "black") +
theme_minimal()