library(gapminder)
data(gapminder)
head(gapminder)
## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
dim(gapminder)
## [1] 1704 6
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~country, data=gapminder)
| Overall (n=1704) |
|
|---|---|
| country | |
| Afghanistan | 12 (0.7%) |
| Albania | 12 (0.7%) |
| Algeria | 12 (0.7%) |
| Angola | 12 (0.7%) |
| Argentina | 12 (0.7%) |
| Australia | 12 (0.7%) |
| Austria | 12 (0.7%) |
| Bahrain | 12 (0.7%) |
| Bangladesh | 12 (0.7%) |
| Belgium | 12 (0.7%) |
| Benin | 12 (0.7%) |
| Bolivia | 12 (0.7%) |
| Bosnia and Herzegovina | 12 (0.7%) |
| Botswana | 12 (0.7%) |
| Brazil | 12 (0.7%) |
| Bulgaria | 12 (0.7%) |
| Burkina Faso | 12 (0.7%) |
| Burundi | 12 (0.7%) |
| Cambodia | 12 (0.7%) |
| Cameroon | 12 (0.7%) |
| Canada | 12 (0.7%) |
| Central African Republic | 12 (0.7%) |
| Chad | 12 (0.7%) |
| Chile | 12 (0.7%) |
| China | 12 (0.7%) |
| Colombia | 12 (0.7%) |
| Comoros | 12 (0.7%) |
| Congo, Dem. Rep. | 12 (0.7%) |
| Congo, Rep. | 12 (0.7%) |
| Costa Rica | 12 (0.7%) |
| Cote d'Ivoire | 12 (0.7%) |
| Croatia | 12 (0.7%) |
| Cuba | 12 (0.7%) |
| Czech Republic | 12 (0.7%) |
| Denmark | 12 (0.7%) |
| Djibouti | 12 (0.7%) |
| Dominican Republic | 12 (0.7%) |
| Ecuador | 12 (0.7%) |
| Egypt | 12 (0.7%) |
| El Salvador | 12 (0.7%) |
| Equatorial Guinea | 12 (0.7%) |
| Eritrea | 12 (0.7%) |
| Ethiopia | 12 (0.7%) |
| Finland | 12 (0.7%) |
| France | 12 (0.7%) |
| Gabon | 12 (0.7%) |
| Gambia | 12 (0.7%) |
| Germany | 12 (0.7%) |
| Ghana | 12 (0.7%) |
| Greece | 12 (0.7%) |
| Guatemala | 12 (0.7%) |
| Guinea | 12 (0.7%) |
| Guinea-Bissau | 12 (0.7%) |
| Haiti | 12 (0.7%) |
| Honduras | 12 (0.7%) |
| Hong Kong, China | 12 (0.7%) |
| Hungary | 12 (0.7%) |
| Iceland | 12 (0.7%) |
| India | 12 (0.7%) |
| Indonesia | 12 (0.7%) |
| Iran | 12 (0.7%) |
| Iraq | 12 (0.7%) |
| Ireland | 12 (0.7%) |
| Israel | 12 (0.7%) |
| Italy | 12 (0.7%) |
| Jamaica | 12 (0.7%) |
| Japan | 12 (0.7%) |
| Jordan | 12 (0.7%) |
| Kenya | 12 (0.7%) |
| Korea, Dem. Rep. | 12 (0.7%) |
| Korea, Rep. | 12 (0.7%) |
| Kuwait | 12 (0.7%) |
| Lebanon | 12 (0.7%) |
| Lesotho | 12 (0.7%) |
| Liberia | 12 (0.7%) |
| Libya | 12 (0.7%) |
| Madagascar | 12 (0.7%) |
| Malawi | 12 (0.7%) |
| Malaysia | 12 (0.7%) |
| Mali | 12 (0.7%) |
| Mauritania | 12 (0.7%) |
| Mauritius | 12 (0.7%) |
| Mexico | 12 (0.7%) |
| Mongolia | 12 (0.7%) |
| Montenegro | 12 (0.7%) |
| Morocco | 12 (0.7%) |
| Mozambique | 12 (0.7%) |
| Myanmar | 12 (0.7%) |
| Namibia | 12 (0.7%) |
| Nepal | 12 (0.7%) |
| Netherlands | 12 (0.7%) |
| New Zealand | 12 (0.7%) |
| Nicaragua | 12 (0.7%) |
| Niger | 12 (0.7%) |
| Nigeria | 12 (0.7%) |
| Norway | 12 (0.7%) |
| Oman | 12 (0.7%) |
| Pakistan | 12 (0.7%) |
| Panama | 12 (0.7%) |
| Paraguay | 12 (0.7%) |
| Peru | 12 (0.7%) |
| Philippines | 12 (0.7%) |
| Poland | 12 (0.7%) |
| Portugal | 12 (0.7%) |
| Puerto Rico | 12 (0.7%) |
| Reunion | 12 (0.7%) |
| Romania | 12 (0.7%) |
| Rwanda | 12 (0.7%) |
| Sao Tome and Principe | 12 (0.7%) |
| Saudi Arabia | 12 (0.7%) |
| Senegal | 12 (0.7%) |
| Serbia | 12 (0.7%) |
| Sierra Leone | 12 (0.7%) |
| Singapore | 12 (0.7%) |
| Slovak Republic | 12 (0.7%) |
| Slovenia | 12 (0.7%) |
| Somalia | 12 (0.7%) |
| South Africa | 12 (0.7%) |
| Spain | 12 (0.7%) |
| Sri Lanka | 12 (0.7%) |
| Sudan | 12 (0.7%) |
| Swaziland | 12 (0.7%) |
| Sweden | 12 (0.7%) |
| Switzerland | 12 (0.7%) |
| Syria | 12 (0.7%) |
| Taiwan | 12 (0.7%) |
| Tanzania | 12 (0.7%) |
| Thailand | 12 (0.7%) |
| Togo | 12 (0.7%) |
| Trinidad and Tobago | 12 (0.7%) |
| Tunisia | 12 (0.7%) |
| Turkey | 12 (0.7%) |
| Uganda | 12 (0.7%) |
| United Kingdom | 12 (0.7%) |
| United States | 12 (0.7%) |
| Uruguay | 12 (0.7%) |
| Venezuela | 12 (0.7%) |
| Vietnam | 12 (0.7%) |
| West Bank and Gaza | 12 (0.7%) |
| Yemen, Rep. | 12 (0.7%) |
| Zambia | 12 (0.7%) |
| Zimbabwe | 12 (0.7%) |
vn = subset(gapminder, country=="Vietnam")
vn
## # A tibble: 12 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Vietnam Asia 1952 40.4 26246839 605.
## 2 Vietnam Asia 1957 42.9 28998543 676.
## 3 Vietnam Asia 1962 45.4 33796140 772.
## 4 Vietnam Asia 1967 47.8 39463910 637.
## 5 Vietnam Asia 1972 50.3 44655014 700.
## 6 Vietnam Asia 1977 55.8 50533506 714.
## 7 Vietnam Asia 1982 58.8 56142181 707.
## 8 Vietnam Asia 1987 62.8 62826491 821.
## 9 Vietnam Asia 1992 67.7 69940728 989.
## 10 Vietnam Asia 1997 70.7 76048996 1386.
## 11 Vietnam Asia 2002 73.0 80908147 1764.
## 12 Vietnam Asia 2007 74.2 85262356 2442.
# Tuổi thọ trung bình ở người Việt 1952 - 2007
plot(lifeExp ~ year, pch=16, col="blue", data=vn)
# Tăng trưởng tuổi thọ trung bì ## Mô hình hồi qui tuyến tính: ## LE = 𝛼 + 𝛽×Year + 𝜖 - Triển khai bằng
m = lm(lifeExp ~ year, data=vn)
summary(m)
##
## Call:
## lm(formula = lifeExp ~ year, data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1884 -0.5840 0.1335 0.7396 1.7873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.272e+03 4.349e+01 -29.25 5.10e-11 ***
## year 6.716e-01 2.197e-02 30.57 3.29e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.314 on 10 degrees of freedom
## Multiple R-squared: 0.9894, Adjusted R-squared: 0.9884
## F-statistic: 934.5 on 1 and 10 DF, p-value: 3.289e-11
library(visreg)
visreg(m)
# Hoán chuyển đơn vị X # Phương trình mô tả xu hướng tăng tuổi thọ: ## LE = 1272 + 0.672 x Year # Chú ý hằng số intercept = 1272. Ý nghĩa là nếu năm = 0 thì tuổi thọ trung bình là 1272! ## Vô nghĩa!
vn$Year = vn$year - 1952
m = lm(lifeExp ~ Year, data=vn); summary(m)
##
## Call:
## lm(formula = lifeExp ~ Year, data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1884 -0.5840 0.1335 0.7396 1.7873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.01008 0.71334 54.69 1.01e-13 ***
## Year 0.67162 0.02197 30.57 3.29e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.314 on 10 degrees of freedom
## Multiple R-squared: 0.9894, Adjusted R-squared: 0.9884
## F-statistic: 934.5 on 1 and 10 DF, p-value: 3.289e-11
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
plot(gdpPercap ~ year, pch=16, col="blue", data=vn)
m = lm(gdpPercap ~ year, data=vn)
summary(m)
##
## Call:
## lm(formula = gdpPercap ~ year, data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -387.87 -267.13 -69.85 207.79 723.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -49382.260 11587.193 -4.262 0.00166 **
## year 25.461 5.853 4.350 0.00144 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 350 on 10 degrees of freedom
## Multiple R-squared: 0.6542, Adjusted R-squared: 0.6196
## F-statistic: 18.92 on 1 and 10 DF, p-value: 0.001444
m = lm(gdpPercap ~ year, data=vn)
visreg(m)
m = lm(gdpPercap ~ year + I(year^2), data=vn)
summary(m)
##
## Call:
## lm(formula = gdpPercap ~ year + I(year^2), data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.84 -95.87 -24.78 103.39 223.31
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.228e+06 6.868e+05 6.156 0.000167 ***
## year -4.297e+03 6.939e+02 -6.192 0.000160 ***
## I(year^2) 1.092e+00 1.753e-01 6.228 0.000154 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 160.1 on 9 degrees of freedom
## Multiple R-squared: 0.9349, Adjusted R-squared: 0.9204
## F-statistic: 64.61 on 2 and 9 DF, p-value: 4.587e-06
# Mô hình tiên lượng thu nhập bình quân thích hợp?
m = lm(gdpPercap ~ year + I(year^2), data=vn)
visreg(m)
vn$pred = predict(m)
vn
## # A tibble: 12 x 8
## country continent year lifeExp pop gdpPercap Year pred
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Vietnam Asia 1952 40.4 26246839 605. 0 818.
## 2 Vietnam Asia 1957 42.9 28998543 676. 5 672.
## 3 Vietnam Asia 1962 45.4 33796140 772. 10 581.
## 4 Vietnam Asia 1967 47.8 39463910 637. 15 545.
## 5 Vietnam Asia 1972 50.3 44655014 700. 20 563.
## 6 Vietnam Asia 1977 55.8 50533506 714. 25 636.
## 7 Vietnam Asia 1982 58.8 56142181 707. 30 763.
## 8 Vietnam Asia 1987 62.8 62826491 821. 35 945.
## 9 Vietnam Asia 1992 67.7 69940728 989. 40 1181.
## 10 Vietnam Asia 1997 70.7 76048996 1386. 45 1472.
## 11 Vietnam Asia 2002 73.0 80908147 1764. 50 1818.
## 12 Vietnam Asia 2007 74.2 85262356 2442. 55 2218.
vn$knot = ifelse(vn$year>1992, 1, 0)
vn$diff = vn$year - 1992
vn$int = vn$diff * vn$knot
m = lm(gdpPercap ~ year + int, data=vn)
summary(m)
##
## Call:
## lm(formula = gdpPercap ~ year + int, data = vn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -106.291 -54.225 -5.144 41.823 133.329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11328.061 3963.982 -2.858 0.0188 *
## year 6.116 2.009 3.044 0.0139 *
## int 95.389 7.244 13.168 3.48e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 81.95 on 9 degrees of freedom
## Multiple R-squared: 0.9829, Adjusted R-squared: 0.9791
## F-statistic: 259.2 on 2 and 9 DF, p-value: 1.107e-08
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
plot_ly(vn, x=~year, y=~gdpPercap, type="scatter")%>% add_lines(x = ~year, y = fitted(m))
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.