Vẽ biểu đồ

##Lấy dữ liệu từ gapminder Package/install/gapminder

library(gapminder)
data(gapminder)
head(gapminder)
## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.

##ggp

library(ggplot2)

##GPP

p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_point()
p

p=p+geom_line()
p

p=p+geom_point()+geom_smooth()
p
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

### Bieu do

p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp,color=continent))
p=p+geom_point()
p

p=p+geom_smooth(method = "loess")
p
## `geom_smooth()` using formula = 'y ~ x'

### chinh bieu do

p=p+geom_smooth(method = "loess")
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

### chinh mau

p=p+scale_x_log10()
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

### mau 2

p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_line()
p

p=ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))
p=p+geom_point(aes(color=continent))
p=p+geom_smooth(method = "loess")+scale_x_log10()
p=p+geom_smooth(method = "loess")+scale_x_log10()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
p=p+labs(x="Log GDP per Capita",y="Life Expectancy")
p
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

### bieu do hoan thien

p=p+ggtitle("Association between GDP Per Capita and Life Expectancy")+theme(plot.title = element_text(lineheight = 0.8,face = "bold",hjust = 0.5))

Chart 64

library(ggthemes)
p+theme_economist()
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

### Chart 64-2

labs(title = "Association between GDP Per Capita and Life Expectancy",x="Log GDP per Capita",y="Life Expectancy")
## $x
## [1] "Log GDP per Capita"
## 
## $y
## [1] "Life Expectancy"
## 
## $title
## [1] "Association between GDP Per Capita and Life Expectancy"
## 
## attr(,"class")
## [1] "labels"

##BIỂU ĐỒ -PAGE 63 ### FILE.CHOOSE(LINK CHNSDATA) ###Readcsv(link CHNSDATA)

df=read.csv("D:\\R\\DỮ LIỆU THỰC HÀNH (TS Thạch gửi)\\CHNS data full.csv")

###HEAD(DF)

head(df)
##   id whours wgroup dead fu.time gender age edu marital residence income occu
## 1  1     35      1    0       4      2  52   3       3         1 116000    1
## 2  2     48      3    0       4      1  36   3       2         1  25200    1
## 3  3     40      1    0       4      2  31   3       2         1  27000    1
## 4  4     48      3    0       4      2  51   2       2         1  27600    2
## 5  5     32      2    0       4      1  58   2       2         1  34800    2
## 6  6     40      1    0       4      1  42   3       2         1  77000    1
##   smoking drinking height weight   bmi sys1 sys2 sys3 dias1 dias2 dias3 tsf1
## 1       0        0    168   83.5 29.58  120  126  120    80    82    76   28
## 2       1        0    173   85.0 28.40  120  120  120    90    80    80   25
## 3       0        1    167   50.0 17.93  110  108  110    70    70    70   18
## 4       0        0    164   80.0 29.74  120  110  120    80    82    80   27
## 5       0        0    175   65.0 21.22  120  120  120    80    82    80   23
## 6       0        1    179   75.0 23.41  110  112  110    72    76    70   24
##   tsf2 tsf3 uac  hc  wc
## 1   27   28  36 111 103
## 2   44   25  35 102  95
## 3   17   18  25  96  72
## 4   26   27  32 104  97
## 5   22   22  35 102  90
## 6   23   24  28  96  90

###LIBRARY(GGPLOT2)

library(ggplot2)
p = ggplot(data=df,aes (x=income))
p + geom_histogram( )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).

###CHART

p + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).

p + geom_histogram(fill= "blue",col= "white ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 224 rows containing non-finite outside the scale range
## (`stat_bin()`).

p = ggplot(data=df,aes(x=log(income)))
p + geom_histogram(fill="blue",col="white")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).

### Trang tri chart

p+geom_histogram(fill="blue",col="white")+labs(x="income(log scale)",y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).

p = ggplot(data=df,aes(x=log(income),fill = factor(gender)))

Chart xanh đỏ

p+geom_histogram(col="white")+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_bin()`).

### chart doi type

p+geom_density()+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_density()`).

 p+geom_density(alpha=0.5)+labs(x="Income log(scale)",y="Frequency",title = "Phân bổ thu nhập")
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_density()`).

Biểu đồ thanh- page69- lỗi

thiếu temp- không chạy

Biểu đồ hộp

###Chart biểu đồ hộp

 p = ggplot(data=df,aes(x=factor(gender),y=log(income),col= factor(gender)))
p + geom_boxplot()
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

p + geom_boxplot()+geom_jitter(alpha=0.05)
## Warning: Removed 241 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 224 rows containing missing values or values outside the scale range
## (`geom_point()`).

##Biểu đồ tương quan page 73 ### chart tương quan

p=ggplot(data=df,aes(x=age,y=log(income)))
p+geom_point()
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).

 p = ggplot(data = df, aes(x = age, y = log(income), col = factor(gender)))
p1 = p + geom_point() + labs(x = "Tuổi", y = "Thu nhập (logarithm scale)") + ggtitle("Mối liên quan giữa thu nhập và tuổi theo giới tính")
p1
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).

### chart màu

p2 = p1 + geom_smooth()
p2
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 243 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).

p3 = p1 + geom_smooth(method = "lm", formula = y ~ x + I(x^2) + I(x^3))
p3
## Warning: Removed 243 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 226 rows containing missing values or values outside the scale range
## (`geom_point()`).