# 加载必要包
library(scales)
library(tidyverse)
library(DT)
library(lubridate)
library(ggthemes)
library(gridExtra)
# 从NASA官网下载温度数据(直接读取在线CSV)
temp_data <- read_csv("https://data.giss.nasa.gov/gistemp/tabledata_v4/GLB.Ts+dSST.csv",
skip = 1, # 跳过标题行
na = "***") %>% # 将***识别为NA
select(Year = Year, Jan:Dec) %>% # 选择年份和各月数据
pivot_longer(cols = Jan:Dec,
names_to = "Month",
values_to = "Temp_Anomaly") %>% # 转换为长格式
mutate(
Date = ymd(paste(Year, Month, "15")), # 创建日期列(每月取15日)
Temp_Anomaly = as.numeric(Temp_Anomaly), # 确保温度是数值型
Month = factor(Month, levels = month.abb), # 将月份转为有序因子
Season = case_when(
Month %in% c("Dec", "Jan", "Feb") ~ "Winter",
Month %in% c("Mar", "Apr", "May") ~ "Spring",
Month %in% c("Jun", "Jul", "Aug") ~ "Summer",
Month %in% c("Sep", "Oct", "Nov") ~ "Fall"
)
) %>%
filter(!is.na(Temp_Anomaly), Year >= 1950) 数据可视化期末报告
1 报告要求
期末实验报告由5章节5个图形组成,每个章节需要作一个图形。
每个章节选择作什么图自主选择,作图前补充完整图形标题名称,例如:图形1——多变量条形图。
案例数据自主收集,不同章节可以公用一个数据集。但同学间不允许使用相同数据集。
每个章节的数据集合需要通过
datatable函数展示,并简要解释数据来源和变量意义。每个输出图形后需要对图形作简要解读,最少需针对图形提出一个观点。
渲染html文件保留代码展示,6月22日前将发布网址提交至共享文档
“8、期末报告”列中。评分标准:
每章节图形各20分
能有效输出图形和合理解释75%
数据独特性强10%
图形个性化强15%
2 类别数据可视化
2.1 案例数据解释与展示
数据是全球温度异常数据,来源于NASA表面温度分析,数据时间从1950到2023年
library(ggridges) library(viridis) # 计算各季节温度异常的密度分布 season_density <- temp_data %>% filter(Year >= 2000) %>% # 使用2000年后数据更显著 mutate( Season = factor(Season, levels = c("Winter", "Spring", "Summer", "Fall"), labels = c("冬季", "春季", "夏季", "秋季")) ) DT::datatable(head(temp_data, 10), caption = "NASA全球温度异常数据样本(1950-2023)")2.1.1 变量说明
变量名 类型 单位 含义 示例值 Year数值型 年 记录年份 2023 Month因子型 - 月份缩写(Jan-Dec) “Aug” Temp_Anomaly数值型 °C 温度异常值(相对于1951-1980年均值的偏差) +1.25 Date日期型 - 每月15日的标准化日期 2023-08-15 Season字符型 - 北半球季节(Winter/Spring/Summer/Fall) “Summer”
2.2 图形1——脊形图
ggplot(season_density,
aes(x = Temp_Anomaly, y = Season, fill = ..x..)) +
geom_density_ridges_gradient(
scale = 3, # 调整层叠高度
rel_min_height = 0.01, # 最小密度显示阈值
gradient_lwd = 0.5 # 渐变色边界线宽
) +
scale_fill_viridis(
name = "温度异常(°C)",
option = "C", # 使用viridis的magma配色
direction = -1 # 反转颜色方向
) +
labs(
title = "2000-2023年各季节温度异常分布",
subtitle = "脊形图展示不同季节的温度偏离模式",
x = "温度异常值(相对于1951-1980基准)",
y = NULL,
caption = "数据来源:NASA GISTEMP"
) +
theme_ridges() + # 专用主题
theme(
legend.position = "right",
plot.title = element_text(size = 16, face = "bold"),
axis.title.x = element_text(hjust = 0.5)
) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") # 基准线- 图形解读:峰的位置代表温度异常最有可能出现的值,
- 红色越深代表温度异常越高,
- 峰的宽度越宽代表温度变异越稳定
3 数据分布可视化
3.1 案例数据解释与展示
- NASA全球温度异常数据,将该数据划分为两个时期进行对比,1950-1979为基准,2000-2023为近期,从而对比近期的温度变异指数的变化
# 划分两个对比时期
temp_compare <- temp_data %>%
mutate(
Period = if_else(Year < 1980, "1950-1979 (基准期)", "2000-2023 (近期)"),
Period = factor(Period, levels = c("2000-2023 (近期)", "1950-1979 (基准期)"))
) %>%
filter(!is.na(Temp_Anomaly))
DT::datatable(head(temp_compare, 10),
caption = "NASA全球温度异常数据样本(1950-2023)")3.2 图形2——镜像直方图
library(tidyverse)
# 使用NASA数据创建对比时期
df <- temp_data %>%
mutate(
Period = ifelse(Year < 1980, "1950-1979 (基准期)", "2000-2023 (近期)"),
Period = factor(Period, levels = c("2000-2023 (近期)", "1950-1979 (基准期)"))
) %>%
filter(!is.na(Temp_Anomaly))
# 镜像直方图绘制
p <- ggplot(df) +
# 上部直方图(近期)
geom_histogram(
aes(x = Temp_Anomaly, y = ..density..),
data = ~ filter(.x, Period == "2000-2023 (近期)"),
fill = "#D53E4F",
alpha = 0.3,
color = "grey50",
bins = 30
) +
# 上部标签
geom_label(
aes(x = 1.5, y = 0.5),
label = "2000-2023 (近期)",
color = "#D53E4F",
size = 5
) +
# 下部直方图(基准期)
geom_histogram(
aes(x = Temp_Anomaly, y = -..density..),
data = ~ filter(.x, Period == "1950-1979 (基准期)"),
fill = "#3288BD",
alpha = 0.3,
color = "grey50",
bins = 30
) +
# 下部标签
geom_label(
aes(x = -0.5, y = -0.5),
label = "1950-1979 (基准期)",
color = "#3288BD",
size = 5
) +
# 基准线
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
# 坐标轴调整
scale_y_continuous(
labels = abs,
breaks = seq(-1, 1, 0.2),
limits = c(-1, 1)
) +
labs(
x = "温度异常值(°C,相对于1951-1980基准)",
y = "密度",
title = "全球温度异常分布对比(镜像直方图)"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
panel.grid.major.y = element_blank()
)
print(p)- 图形解读:近期的温度异常整体偏右移,已经偏离基准期
- 近期分布右侧延申,呈独立峰形
- 这表明,近期极端天气温度上升且频率增多,气候变率增大
4 变量关系可视化
4.1 案例数据解释与展示
- 数据计算了季节指标,标准化气泡大小,然后根据不同变暖强度进行分级
# 计算各季节平均异常和极端事件频率
bubble <- temp_data %>%
filter(Year >= 1990) %>%
group_by(Year, Season) %>%
summarise(
Avg_Anomaly = mean(Temp_Anomaly, na.rm = TRUE),
Extreme_Events = sum(Temp_Anomaly > 1.5, na.rm = TRUE), # 统计极端高温事件次数
.groups = "drop"
) %>%
mutate(
Season = factor(Season,
levels = c("Winter", "Spring", "Summer", "Fall"),
labels = c("冬季", "春季", "夏季", "秋季"))
)
DT::datatable(head(bubble, 10),
caption = "NASA全球温度异常数据样本(1950-2023)")4.2 图形3——气泡图
ggplot(bubble,
aes(x = Year, y = Avg_Anomaly, color = Season)) +
# 气泡层(大小表示极端事件频率)
geom_point(aes(size = Extreme_Events), alpha = 0.7) +
# 趋势线
geom_smooth(method = "loess", se = FALSE, linewidth = 0.5) +
# 标注重大气候事件
annotate("text", x = 2016, y = 1.3,
label = "2016: 强厄尔尼诺", color = "#D62728", size = 3.5) +
annotate("text", x = 2020, y = 1.1,
label = "2020: 澳洲山火", color = "#FF7F0E", size = 3.5) +
# 坐标轴与图例设置
scale_size_continuous(
range = c(2, 8),
name = "极端高温事件次数",
breaks = c(1, 3, 5)
) +
scale_color_manual(
values = c("#1F77B4", "#2CA02C", "#D62728", "#FF7F0E"),
name = "季节"
) +
scale_y_continuous(
limits = c(-0.5, 1.5),
breaks = seq(-0.5, 1.5, 0.5)
) +
labs(
title = "全球季节温度异常与极端事件(1990-2023)",
subtitle = "气泡大小表示每季节温度异常>1.5°C的天数",
x = "年份",
y = "平均温度异常(°C)",
caption = "数据来源:NASA GISTEMP"
) +
theme_economist() +
theme(
legend.position = "right",
panel.grid.minor.y = element_blank(),
plot.title = element_text(face = "bold")
)- 图形解读:气泡代表不同季节发生的极端天气,据气泡密度可知,夏季极端事件频率多,冬天的极端天气温度变化大
5 样本相似性可视化
5.1 案例数据解释与展示
- 数据先按照季节顺序排序,再按照季节进行了标准化处理,转化为了宽格式,后面将数据转换为长格式
df4<- data.frame(
Decade = c("1980s", "1990s", "2000s", "2010s"),
Winter = c(0.2, 0.3, 0.5, 0.7),
Spring = c(0.3, 0.4, 0.6, 0.8),
Summer = c(0.5, 0.6, 0.8, 1.0),
Fall = c(0.4, 0.5, 0.7, 0.9)
)
DT::datatable(head(df4, 10),
caption = "NASA全球温度异常数据样本(1950-2023)")5.2 图形4——平面坐标图
df41<- df4 %>%
pivot_longer(
cols = c(Winter, Spring, Summer, Fall),
names_to = "Season",
values_to = "Temp_Anomaly"
) %>%
mutate(
Season = factor(Season,
levels = c("Winter", "Spring", "Summer", "Fall"),
labels = c("冬季", "春季", "夏季", "秋季")),
Decade = factor(Decade)
)
ggplot(df41,
aes(x = Season, y = Temp_Anomaly,
group = Decade, color = Decade)) +
# 1. 绘制折线
geom_line(linewidth = 0.5, alpha = 0.8) +
# 2. 绘制空心点(带灰边)
geom_point(
shape = 21, # 空心圆
size = 3, # 比原代码稍大
fill = "white", # 填充白色
stroke = 0.8, # 边框粗细
color = "grey40" # 边框颜色
) +
# 3. 图例与主题设置
theme_minimal() +
theme(
legend.position = c(0.75, 0.66), # 图例位置(内嵌)
legend.text = element_text(size = 9, color = "blue4"),
legend.direction = "horizontal", # 水平排列
legend.background = element_rect(fill = "grey90", color = "grey"),
panel.grid.minor.y = element_blank(), # 隐藏次要网格线
axis.text.x = element_text(angle = 45, hjust = 1) # X轴标签倾斜
) +
# 4. 图例调整
guides(color = guide_legend(
nrow = 2, # 改为2行
title = NULL, # 移除图例标题
keywidth = unit(1, "cm") # 图例键宽度
)) +
# 5. 坐标轴与标签
labs(
x = NULL,
y = "温度异常值(°C)",
title = "全球温度异常季节模式(1980s-2010s)"
) +
# 6. 颜色与标签格式
scale_color_brewer(palette = "Set1") + # 使用ColorBrewer配色
scale_y_continuous(
limits = c(0, 1.2),
breaks = seq(0, 1.2, 0.2),
labels = label_number(suffix = "°C") # Y轴添加单位
) +
# 7. X轴标签换行处理
scale_x_discrete(
labels = function(x) str_wrap(x, width = 2) # 每2字符换行
) +
# 8. 添加基准线
geom_hline(
yintercept = 0.5,
linetype = "dashed",
color = "red",
linewidth = 0.3
) +
annotate(
"text", x = 3.5, y = 0.55,
label = "IPCC警戒线",
color = "red",
size = 3
)- 图形解读:图形中,2010年代气候全季节温度升高,全球变暖加重;在1990s之后,全球气温都超过0.5°的ipcc警戒线,1990s-2010s冬季温度增幅最大
6 时间序列可视化
6.1 案例数据解释与展示
- 计算出数据的当年总异常的贡献百分比,并且按照季节进行排序
df5 <- temp_data %>%
filter(Year >= 2000) %>%
group_by(Year, Season) %>%
summarise(
Anomaly_Contribution = mean(Temp_Anomaly, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
Season = factor(Season,
levels = c("Winter", "Spring", "Summer", "Fall"),
labels = c("冬季", "春季", "夏季", "秋季"))
)
DT::datatable(head(df5, 10),
caption = "NASA全球温度异常数据样本(1950-2023)")6.2 图形5——堆叠面积图
ggplot(df5, aes(x = Year, y = Anomaly_Contribution, fill = Season)) +
geom_area(alpha = 0.8) +
geom_line(position = "stack", color = "grey30", linewidth = 0.3) +
scale_fill_brewer(
palette = "Blues",
name = "季节贡献",
guide = guide_legend(reverse = TRUE) # 图例顺序与堆叠顺序一致
) +
scale_x_continuous(breaks = seq(2000, 2023, 5)) +
labs(
title = "季节对全球温度异常的贡献(2000-2023)",
subtitle = "堆叠高度表示年度总异常,各颜色区块代表季节贡献",
x = "年份",
y = "温度异常值(°C)",
caption = "数据来源:NASA GISTEMP"
) +
theme_bw() +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold")
)- 图形解读:堆叠高度代表全年温度异常强度,由图可知,2020年后的贡献超过50%年度异常,冬季的面积逐年扩大,2015年后春季贡献明显增厚,春季提前变暖