datt=read.csv("E:/CONG VIEC/Ky nang ngoai/Xu ly so lieu Bang R/Van Lang R and Machine Learning 2023/Thuc hanh ngay 1/Salaries.csv",header=T,na.strings = T)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.1
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
names(datt)
## [1] "ID" "Rank" "Discipline" "Yrs.since.phd"
## [5] "Yrs.service" "Sex" "Salary"
ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="seashell1")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Trang tri
ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Phân bố lương",x="Mức lương",y="Số lượng")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Ve histogram theo Density
p= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(col="coral")
p
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Ve 2 loai bieu do cung 1 cua so
library(gridExtra)
p1=ggplot(data=datt,aes(x=Salary))+geom_histogram(fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram 1",x="Salary",y="Count")
p2= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4")+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(col="coral")
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
grid.arrange(p1,p2,ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Ve Histogram theo gioi tinh, cac cot xep canh nhau, bins=40, chia 40
cot
p2= ggplot(data=datt,aes(x=Salary,fill=Sex))+geom_histogram(aes(y=..density..),col="mediumpurple4",position = "dodge",bins = 40)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")
p2
# Ve bieu do Density Luong cho 2 gioi
p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")
p2
# Ve bieu do Density Luong rieng cho 2 gioi bang facet_grid(Sex~.)
p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+facet_grid(Sex~.)
p2
# Ve bieu do Density Luong cho 2 gioi, Trang tri voi theme(…)
p2= ggplot(data=datt,aes(x=Salary,fill=Sex,col=Sex))+geom_density(alpha=0.3)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+ theme(axis.title.x = element_text(color="blue",size = 14,face="bold"),axis.title.y = element_text(color="blue",size = 14,face="bold"),axis.text.x = element_text(colour="brown",angle=45,vjust=0.5,size=10,face="bold"),axis.text.y = element_text(colour="brown",vjust=0.5,size=10,face="bold"),legend.position = "none")
p2
# Ve 2 bieu do histogram rieng cho nam va nu: facet_grid(Sex~.), bins=50
chia thanh 50 cot
library(ggplot2)
p2= ggplot(data=datt,aes(x=Salary))+geom_histogram(aes(y=..density..),fill="mediumpurple",col="mediumpurple4",bins = 30)+theme_bw()+labs(title="Histogram of salary",x="salary",y="density")+geom_density(alpha=0.3,col="coral",fill="violet")+facet_grid(Sex~.)
p2