library(ggplot2)
library(datasets)
Перший датасет
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
Найпростіші залежності:
plot(mtcars$wt,mtcars$mpg)
plot(mtcars$disp,mtcars$hp)
plot(mtcars$qsec,mtcars$carb)
Давайте для звичності переведемо у наші одиниці виміру
my_cars <- mtcars
my_cars$mpg <-100*3.78541178/(1.609344*my_cars$mpg)
my_cars$disp <- my_cars$disp*0.016387064
my_cars$wt <- my_cars$wt*0.45359237
my_cars$qsec <- my_cars$qsec/4.02336
Для використання бібліотеки ggplot2 потрібно визначити (як мінімум) три елемента:
library(ggplot2)
ggplot(my_cars, aes(x=wt, y=mpg)) + geom_point()
ggplot(my_cars, aes(x=factor(cyl))) + geom_bar()
ggplot(my_cars, aes(x=mpg)) + geom_histogram(binwidth=3)
library(ggplot2)
ggplot(my_cars, aes(x=factor(cyl), y=wt)) + geom_boxplot()
ggplot(my_cars, aes(x=factor(am), y=qsec)) + geom_boxplot()
ggplot(my_cars, aes(y=mpg, x=factor(am), fill=am))+
geom_violin()+
xlab("transmission") + ylab("MPG")
ggplot(my_cars, aes(y=mpg, x=factor(am), fill=factor(am)))+
geom_violin()+
xlab("transmission") + ylab("MPG")
ggplot(my_cars, aes(x=wt, y=mpg)) + geom_point() + geom_smooth(method = "loess")
ggplot(my_cars, aes(hp, mpg)) + geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ylab("літрів на 100 км") +
xlab("кінських сил") +
ggtitle("Вплив кількості кінських сил на витрати пального")
ggplot(mtcars, aes(x=wt, y=mpg, colour=factor(cyl))) + geom_point()
ggplot(my_cars, aes(x=mpg, y=wt)) +
geom_point(aes(col=factor(am), size=wt)) +
geom_smooth(method="loess", se=F)
Набір даних зі схожими вимірами.
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p
## # ... with 1 more variables: class <chr>
Тепер ще декілька графіків
ggplot(mpg, aes(cty, hwy)) + geom_point() +
geom_smooth(method="lm", se=F)
ggplot(mpg, aes(cty, hwy)) + geom_jitter(width = .5, size=1) +
labs(subtitle="mpg: city vs highway mileage",
y="hwy",
x="cty",
title="Jittered Points")
ggplot(mpg, aes(cty, hwy)) + geom_count(col="tomato", show.legend=F)
Складний аналіз. Усереднення за виробниками.
cty_mpg <- aggregate(mpg$cty, by=list(mpg$manufacturer), FUN=mean) # aggregate
colnames(cty_mpg) <- c("make", "mileage") # change column names
head(cty_mpg, 4)
## make mileage
## 1 audi 17.61111
## 2 chevrolet 15.00000
## 3 dodge 13.13514
## 4 ford 14.00000
mtcars$`car name` <- rownames(mtcars) # create new column for car names
mtcars$mpg_z <- round((mtcars$mpg - mean(mtcars$mpg))/sd(mtcars$mpg), 2) # compute normalized mpg
mtcars$mpg_type <- ifelse(mtcars$mpg_z < 0, "below", "above") # above / below avg flag
# Diverging Barcharts
ggplot(mtcars, aes(x=`car name`, y=mpg_z, label=mpg_z)) +
geom_bar(stat='identity', aes(fill=mpg_type), width=.5) +
scale_fill_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
labs(subtitle="Normalised mileage from 'mtcars'",
title= "Diverging Bars") +
coord_flip()
# Завдання. Повторити цей графік для попередньго датасета (показник - розхід палива)
ggplot(cty_mpg, aes(x=make, y=mileage)) +
geom_bar(stat="identity", width=.5, fill="tomato3") +
labs(title="Ordered Bar Chart",
subtitle="Make Vs Avg. Mileage",
caption="source: mpg") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
g <- ggplot(mpg, aes(manufacturer))
g + geom_bar(aes(fill=class), width = 0.5) +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title="Histogram on Categorical Variable",
subtitle="Manufacturer across Vehicle Classes")
g <- ggplot(mpg, aes(cty))
g + geom_density(aes(fill=factor(cyl)), alpha=0.8) +
labs(title="Density plot",
subtitle="City Mileage Grouped by Number of cylinders",
caption="Source: mpg",
x="City Mileage",
fill="# Cylinders")
g <- ggplot(mpg, aes(class, cty))
g + geom_boxplot(varwidth=T, fill="plum") +
labs(title="Box plot",
subtitle="City Mileage grouped by Class of vehicle",
caption="Source: mpg",
x="Class of Vehicle",
y="City Mileage")
freqtable <- table(mpg$manufacturer)
df <- as.data.frame.table(freqtable)
g <- ggplot(df, aes(Var1, Freq))
g + geom_bar(stat="identity", width = 0.5, fill="tomato2") +
labs(title="Bar Chart",
subtitle="Manufacturer of vehicles",
caption="Source: Frequency of Manufacturers from 'mpg' dataset") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
library(maps)
library(ggplot2)
world_map <- map_data("world2")
ggplot(world_map, aes(x=long, y=lat, group=group)) +geom_polygon(colour="blue") + scale_fill_brewer(palette="Set2")
geom_polygon(colour="blue")
## geom_polygon: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
east_asia <- map_data("world", region=c("Japan", "China", "North Korea",
"South Korea"))
ggplot(east_asia, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(colour="black") +
scale_fill_brewer(palette="Set2")
east_europe <- map_data("world", region=c("Ukraine", "Poland", "Moldova", "Romania"))
ggplot(east_europe, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(colour="black")
some_countries <- map_data("world2", region=c("Ukraine"))
ggplot(some_countries, aes(x=long, y=lat, group=group, fill=region)) +
geom_polygon(colour="black")