R可视化各国收入和寿命的关系

基于R的各国收入和寿命关系可视化

## 加载库
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(RColorBrewer)
library(gganimate)
library(readxl)
library(rworldmap)

## Loading required package: sp

## ### Welcome to rworldmap ###

## For a short introduction type :   vignette('rworldmap')

library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(RColorBrewer)

读取数据

## 读取数据，数据融合
gdpdata <- read_excel("indicator gapminder gdp_per_capita_ppp.xlsx",sheet = "Data")
popudata <- read_excel("indicator gapminder population.xlsx",sheet = "Data")
lifedata <- read_excel("indicator life_expectancy_at_birth.xlsx",sheet = "Data")

数据与处理

## 查看一共有多少个国家
Countrydata <- data.frame(Country = unique(gdpdata$`GDP per capita`),stringsAsFactors=FALSE)
## 得出每个国家所属的洲
malMap <- joinCountryData2Map(Countrydata, joinCode = "NAME",
                              nameJoinColumn = "Country")

## 220 codes from your data successfully matched countries in the map
## 41 codes from your data failed to match with a country code in the map
## 23 codes from the map weren't represented in your data

CRdata <- data.frame(Country = as.character(malMap$NAME),
                     Region = as.character(malMap$REGION),
                     stringsAsFactors=FALSE)

## 连接数据,一共得到188个国家的数据
CounRegion <- left_join(Countrydata,CRdata,by = "Country")
CounRegion <- CounRegion[!is.na(CounRegion$Region),]
nrow(CounRegion)

## [1] 188

## 宽数据转化为长数据
Gdpdata <- tidyr::gather(gdpdata,key="Year",value="income",2:217)
colnames(Gdpdata) <- c("Country","Year","income")
Gdpdata$Year <- as.integer(Gdpdata$Year)
population <- tidyr::gather(popudata,key="Year",value="Number",2:82)
colnames(population) <- c("Country","Year","Number")
population$Year <- as.integer(population$Year)
Lifedata <- tidyr::gather(lifedata[,1:217],key="Year",value="life",2:217)
colnames(Lifedata) <- c("Country","Year","life")
Lifedata$Year <- as.integer(Lifedata$Year)
## 数据连接
GPdata <- left_join(Gdpdata,population,by = c("Country", "Year"))
GPLdata <- left_join(GPdata,Lifedata,by = c("Country", "Year"))
GPLRdata <- left_join(GPLdata,CounRegion,by = "Country")
## 剔除Region为缺失值得数据
GPLRdata <- GPLRdata[!is.na(GPLRdata$Region),]

colnames(GPLRdata)

## [1] "Country" "Year"    "income"  "Number"  "life"    "Region"

## 针对人口数据，使用前一个值来填补缺失值
GPLRdata <- GPLRdata%>%group_by(Country, Year)%>%
  arrange(.by_group = TRUE) %>%
  data.frame()%>%
  mutate(Number2 = na.locf(Number))


head(GPLRdata)

可视化散点图

plotdata <- GPLRdata[GPLRdata$Year== 2001,]

ggplot(plotdata,aes(x=income,y=life,colour=Region))+
  theme_bw()+
  geom_point(aes(size=Number2 / 1e8),alpha = 0.8)+
  scale_color_brewer(palette = "Set1")+
  theme(legend.position = "bottom")

## Warning: Removed 21 rows containing missing values (geom_point).

将每个洲得数据分开

ggplot(plotdata,aes(x=income,y=life,colour=Region))+
  theme_bw()+
  geom_point(aes(size=Number2 / 1e8),alpha = 0.8)+
  scale_color_brewer(palette = "Set1")+
  facet_wrap(.~Region)+
  theme(legend.position = "bottom")

## Warning: Removed 21 rows containing missing values (geom_point).

动态散点图1

ggplot(GPLRdata,aes(x=income,y=life,colour=Region,size=Number2 / 1e8))+
  theme_bw()+
  geom_point(na.rm = TRUE)+
  scale_color_brewer(palette = "Set1")+
  theme(legend.position = "bottom")+
  transition_time(Year) +
  labs(title = "Year: {frame_time}")+
  ease_aes("linear")

动态散点图2

ggplot(GPLRdata,aes(x=income,y=life,colour=Region,size=Number2 / 1e8))+
  theme_bw()+
  geom_point(na.rm = TRUE,show.legend = FALSE)+
  scale_color_brewer(palette = "Set1")+
  facet_wrap(.~Region,nrow = 3)+
  transition_time(Year) +
  labs(title = "Year: {frame_time}")+
  ease_aes("linear")