比较相对于固定参考的少量项目(或类别)之间的值变化。
分散条形图是可以处理负值和正值的条形图。这可以通过使用进行智能调整来实现geom_bar()
默认情况下,geom_bar()将stat设置为count。这意味着,当您仅提供连续的X变量(而不提供Y变量)时,它将尝试从数据中生成直方图。
为了使条形图创建条形而不是直方图,您需要做两件事。
为了确保您获得的条形不只是条形,请确保分类变量具有2个类别,这些类别会在连续变量的某个阈值处更改值。在下面的示例中,mpg通过计算z得分对来自mtcars数据集进行归一化。mpg高于零的车辆标记为绿色,低于mpg的车辆标记为红色。
# Data Prep
library(tidyverse)
## -- Attaching packages ------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.2.1 √ purrr 0.3.3
## √ tibble 2.1.3 √ dplyr 0.8.3
## √ tidyr 1.0.0 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## -- Conflicts --------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data("mtcars") # load data
mtcars$`car name` <- rownames(mtcars) # create new column for car names
mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg))/sd(mtcars$mpg), 2) # compute normalized mpg
mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, "below", "above") # above / below avg flag
mtcars <- mtcars[order(mtcars$mpg_z), ] # sort
mtcars$`car name` <- factor(mtcars$`car name`, levels = mtcars$`car name`) # convert to factor to retain sorted order in plot.
mtcars %>% head()
## mpg cyl disp hp drat wt qsec vs am gear carb
## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
## car name mpg_z mpg_type
## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
## Lincoln Continental Lincoln Continental -1.61 below
## Camaro Z28 Camaro Z28 -1.13 below
## Duster 360 Duster 360 -0.96 below
## Chrysler Imperial Chrysler Imperial -0.89 below
## Maserati Bora Maserati Bora -0.84 below
mtcars %>% str()
## 'data.frame': 32 obs. of 14 variables:
## $ mpg : num 10.4 10.4 13.3 14.3 14.7 15 15.2 15.2 15.5 15.8 ...
## $ cyl : num 8 8 8 8 8 8 8 8 8 8 ...
## $ disp : num 472 460 350 360 440 ...
## $ hp : num 205 215 245 245 230 335 180 150 150 264 ...
## $ drat : num 2.93 3 3.73 3.21 3.23 3.54 3.07 3.15 2.76 4.22 ...
## $ wt : num 5.25 5.42 3.84 3.57 5.34 ...
## $ qsec : num 18 17.8 15.4 15.8 17.4 ...
## $ vs : num 0 0 0 0 0 0 0 0 0 0 ...
## $ am : num 0 0 0 0 0 1 0 0 0 1 ...
## $ gear : num 3 3 3 3 3 5 3 3 3 5 ...
## $ carb : num 4 4 4 4 4 8 3 2 2 4 ...
## $ car name: Factor w/ 32 levels "Cadillac Fleetwood",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ mpg_z : num -1.61 -1.61 -1.13 -0.96 -0.89 -0.84 -0.81 -0.81 -0.76 -0.71 ...
## $ mpg_type: chr "below" "below" "below" "below" ...
# Diverging Barcharts
ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) +
geom_bar(stat='identity', aes(fill=mpg_type), width=.5) +
scale_fill_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
labs(subtitle="Normalised mileage from 'mtcars'",
title= "Diverging Bars") +
coord_flip() +
theme(plot.title = element_text(size = 20,hjust = 0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 10))
棒棒糖图表传达的信息与条形图和分散条形图相同。除了看起来更现代。我使用geom_point和geom_segment来代替棒棒糖,而不是geom_bar 。让我们使用在上一个分支示例中准备的相同数据绘制一个棒棒糖。
ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) +
geom_point(stat='identity', fill="black", size=6) +
geom_segment(aes(y = 0,
x = `car name`,
yend = mpg_z,
xend = `car name`),
color = "black") +
geom_text(color="white", size=2) +
labs(title="Diverging Lollipop Chart",
subtitle="Normalized mileage from 'mtcars': Lollipop") +
ylim(-2.5, 2.5) +
coord_flip() +
theme(plot.title = element_text(size = 20,hjust = 0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 10))
点图传达了类似的信息。除了仅使用点外,原理与在分散条中看到的原理相同。下面的示例使用在分支栏中示例中准备的相同数据。
mtcars %>%
select(`car name`,mpg_z)
## car name mpg_z
## Cadillac Fleetwood Cadillac Fleetwood -1.61
## Lincoln Continental Lincoln Continental -1.61
## Camaro Z28 Camaro Z28 -1.13
## Duster 360 Duster 360 -0.96
## Chrysler Imperial Chrysler Imperial -0.89
## Maserati Bora Maserati Bora -0.84
## Merc 450SLC Merc 450SLC -0.81
## AMC Javelin AMC Javelin -0.81
## Dodge Challenger Dodge Challenger -0.76
## Ford Pantera L Ford Pantera L -0.71
## Merc 450SE Merc 450SE -0.61
## Merc 450SL Merc 450SL -0.46
## Merc 280C Merc 280C -0.38
## Valiant Valiant -0.33
## Hornet Sportabout Hornet Sportabout -0.23
## Merc 280 Merc 280 -0.15
## Pontiac Firebird Pontiac Firebird -0.15
## Ferrari Dino Ferrari Dino -0.06
## Mazda RX4 Mazda RX4 0.15
## Mazda RX4 Wag Mazda RX4 Wag 0.15
## Hornet 4 Drive Hornet 4 Drive 0.22
## Volvo 142E Volvo 142E 0.22
## Toyota Corona Toyota Corona 0.23
## Datsun 710 Datsun 710 0.45
## Merc 230 Merc 230 0.45
## Merc 240D Merc 240D 0.72
## Porsche 914-2 Porsche 914-2 0.98
## Fiat X1-9 Fiat X1-9 1.20
## Honda Civic Honda Civic 1.71
## Lotus Europa Lotus Europa 1.71
## Fiat 128 Fiat 128 2.04
## Toyota Corolla Toyota Corolla 2.29
ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) +
geom_point(stat='identity', aes(col=mpg_type), size=6) +
scale_color_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
geom_text(color="white", size=2) +
labs(title="Diverging Dot Plot",
subtitle="Normalized mileage from 'mtcars': Dotplot") +
ylim(-2.5, 2.5) +
coord_flip() +
theme(plot.title = element_text(size = 20,hjust = 0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 10))
data("economics", package = "ggplot2")
economics %>%
head()
## # A tibble: 6 x 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
# Compute % Returns
economics$returns_perc <- c(0, diff(economics$psavert)/economics$psavert[-length(economics$psavert)])
# Create break points and labels for axis ticks
brks <- economics$date[seq(1, length(economics$date), 12)]
lbls <- lubridate::year(economics$date[seq(1, length(economics$date), 12)])
brks
## [1] "1967-07-01" "1968-07-01" "1969-07-01" "1970-07-01" "1971-07-01"
## [6] "1972-07-01" "1973-07-01" "1974-07-01" "1975-07-01" "1976-07-01"
## [11] "1977-07-01" "1978-07-01" "1979-07-01" "1980-07-01" "1981-07-01"
## [16] "1982-07-01" "1983-07-01" "1984-07-01" "1985-07-01" "1986-07-01"
## [21] "1987-07-01" "1988-07-01" "1989-07-01" "1990-07-01" "1991-07-01"
## [26] "1992-07-01" "1993-07-01" "1994-07-01" "1995-07-01" "1996-07-01"
## [31] "1997-07-01" "1998-07-01" "1999-07-01" "2000-07-01" "2001-07-01"
## [36] "2002-07-01" "2003-07-01" "2004-07-01" "2005-07-01" "2006-07-01"
## [41] "2007-07-01" "2008-07-01" "2009-07-01" "2010-07-01" "2011-07-01"
## [46] "2012-07-01" "2013-07-01" "2014-07-01"
lbls
## [1] 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
## [16] 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
## [31] 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
## [46] 2012 2013 2014
# Plot
ggplot(economics, aes(date, returns_perc)) +
geom_area() +
scale_x_date(breaks=brks, labels=lbls) +
theme(axis.text.x = element_text(angle=90)) +
labs(title="Area Chart",
subtitle = "Perc Returns for Personal Savings",
y="% Returns for Personal savings",
caption="Source: economics") +
theme(plot.title = element_text(size = 20,hjust = 0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 10))
ggplot(economics[1:100,], aes(date, returns_perc)) +
geom_point(col = "red",size = 2.1) +
geom_smooth(method = "lm",se = F,size = 1.2) +
geom_line(size = 2) +
geom_area(fill = "#f8766d") +
scale_x_date(breaks=brks, labels=lbls) +
theme(axis.text.x = element_text(angle=90)) +
labs(title="Area Chart",
subtitle = "Perc Returns for Personal Savings",
y="% Returns for Personal savings",
caption="Source: economics") +
theme(plot.title = element_text(size = 20,hjust = 0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 10))