Tom tat cac buoc chinh trong ggplot 2

Bieu do histogram theo luong va gioi tinh

Nhap lieu

datt=read.csv("E:/CONG VIEC/Ky nang ngoai/Xu ly so lieu Bang R/Van Lang R and Machine Learning 2023/Thuc hanh ngay 1/Salaries.csv",header=T,na.strings = T)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.1
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
names(datt)
## [1] "ID"            "Rank"          "Discipline"    "Yrs.since.phd"
## [5] "Yrs.service"   "Sex"           "Salary"

Ve bieu do histogram theo luong va so luong

ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="seashell1")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Trang tri

ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Phân bố lương",x="Mức lương",y="Số lượng")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ve histogram theo Density

p= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(col="coral")
p
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ve 2 loai bieu do cung 1 cua so

library(gridExtra)
p1=ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram 1",x="Salary",y="Count")
p2= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(col="coral")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

grid.arrange(p1,p2,ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ve Histogram theo gioi tinh, cac cot xep canh nhau, bins=40, chia 40 cot

p2= ggplot(data=datt,aes(x=Salary,fill=Sex))+geom_histogram(aes(y=..density..),col="mediumpurple4",position = "dodge",bins = 40)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")
p2

# Ve bieu do Density Luong cho 2 gioi

p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")
p2

# Ve bieu do Density Luong rieng cho 2 gioi bang facet_grid(Sex~.)

p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+facet_grid(Sex~.)
p2

# Ve bieu do Density Luong cho 2 gioi, Trang tri voi theme(…)

p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+ theme(axis.title.x = element_text(color="blue",size = 14,face="bold"),axis.title.y = element_text(color="blue",size = 14,face="bold"),axis.text.x = element_text(colour="brown",angle=45,vjust=0.5,size=10,face="bold"),axis.text.y = element_text(colour="brown",vjust=0.5,size=10,face="bold"),legend.position = "none")
p2

# Ve 2 bieu do histogram rieng cho nam va nu: facet_grid(Sex~.), bins=50 chia thanh 50 cot

library(ggplot2)
p2= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4",bins = 30)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(alpha=0.3,col="coral",fill="violet")+facet_grid(Sex~.)
p2