##Lấy dữ liệu từ gapminder Package/install/gapminder
library(gapminder)
data(gapminder)
head(gapminder)
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
##ggp
library(ggplot2)
##GPP
p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_point()
p
p=p+geom_line()
p
p=p+geom_point()+geom_smooth()
p
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
### Bieu do
p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp,color=continent))
p=p+geom_point()
p
p=p+geom_smooth(method = "loess")
p
## `geom_smooth()` using formula = 'y ~ x'
### chinh bieu do
p=p+geom_smooth(method = "loess")
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
### chinh mau
p=p+scale_x_log10()
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
### mau 2
p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_line()
p
p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_point(aes(color=continent))
p=p+geom_smooth(method = "loess")+scale_x_log10()
p=p+geom_smooth(method = "loess")+scale_x_log10()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p=p+labs(x="Log GDP per Capita",y="Life Expectancy")
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
### bieu do hoan thien
p=p+ggtitle("Association between GDP Per Capita and Life Expectancy")+theme(plot.title = element_text(lineheight = 0.8,face = "bold",hjust = 0.5))
library(ggthemes)
p+theme_economist()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
### Chart 64-2
labs(title = "Association between GDP Per Capita and Life Expectancy",x="Log GDP per Capita",y="Life Expectancy")
## $x
## [1] "Log GDP per Capita"
##
## $y
## [1] "Life Expectancy"
##
## $title
## [1] "Association between GDP Per Capita and Life Expectancy"
##
## attr(,"class")
## [1] "labels"
##BIỂU ĐỒ -PAGE 63 ### FILE.CHOOSE(LINK CHNSDATA) ###Readcsv(link CHNSDATA)
df=read.csv("D:\\R\\DỮ LIỆU THỰC HÀNH (TS Thạch gửi)\\CHNS data full.csv")
###HEAD(DF)
head(df)
## id whours wgroup dead fu.time gender age edu marital residence income occu
## 1 1 35 1 0 4 2 52 3 3 1 116000 1
## 2 2 48 3 0 4 1 36 3 2 1 25200 1
## 3 3 40 1 0 4 2 31 3 2 1 27000 1
## 4 4 48 3 0 4 2 51 2 2 1 27600 2
## 5 5 32 2 0 4 1 58 2 2 1 34800 2
## 6 6 40 1 0 4 1 42 3 2 1 77000 1
## smoking drinking height weight bmi sys1 sys2 sys3 dias1 dias2 dias3 tsf1
## 1 0 0 168 83.5 29.58 120 126 120 80 82 76 28
## 2 1 0 173 85.0 28.40 120 120 120 90 80 80 25
## 3 0 1 167 50.0 17.93 110 108 110 70 70 70 18
## 4 0 0 164 80.0 29.74 120 110 120 80 82 80 27
## 5 0 0 175 65.0 21.22 120 120 120 80 82 80 23
## 6 0 1 179 75.0 23.41 110 112 110 72 76 70 24
## tsf2 tsf3 uac hc wc
## 1 27 28 36 111 103
## 2 44 25 35 102 95
## 3 17 18 25 96 72
## 4 26 27 32 104 97
## 5 22 22 35 102 90
## 6 23 24 28 96 90
###LIBRARY(GGPLOT2)
library(ggplot2)
p = ggplot(data=df,aes (x=income))
p + geom_histogram( )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).
###CHART
p + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).
p + geom_histogram(fill= "blue",col= "white ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).
p = ggplot(data=df,aes(x=log(income)))
p + geom_histogram(fill="blue",col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).
### Trang tri chart
p+geom_histogram(fill="blue",col="white")+labs(x="income(log scale)",y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).
p = ggplot(data=df,aes(x=log(income),fill = factor(gender)))
p+geom_histogram(col="white")+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).
### chart doi type
p+geom_density()+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_density()`).
p+geom_density(alpha=0.5)+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_density()`).
###Chart biểu đồ hộp
p = ggplot(data=df,aes(x=factor(gender),y=log(income),col= factor(gender)))
p + geom_boxplot()
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
p + geom_boxplot()+geom_jitter(alpha=0.05)
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 224 rows containing missing values or values outside the scale range
## (`geom_point()`).
##Biểu đồ tương quan page 73 ### chart tương quan
p=ggplot(data=df,aes(x=age,y=log(income)))
p+geom_point()
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).
p = ggplot(data = df, aes(x = age, y = log(income), col = factor(gender)))
p1 = p + geom_point() + labs(x = "Tuổi", y = "Thu nhập (logarithm scale)") + ggtitle("Mối liên quan giữa thu nhập và tuổi theo giới tính")
p1
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).
### chart màu
p2 = p1 + geom_smooth()
p2
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 243 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).
p3 = p1 + geom_smooth(method = "lm", formula = y ~ x + I(x^2) + I(x^3))
p3
## Warning: Removed 243 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).