This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Read file
setwd("C:/Users/Yuan/Desktop/R/5")
USPopulation=read.csv(file="USPOPULATION.csv",header=TRUE, sep=",")
hist(USPopulation$Population,100,col="black")
hist(USPopulation$Population, breaks=10)
qqnorm(USPopulation$Population)
shapiro.test(USPopulation$Population)
##
## Shapiro-Wilk normality test
##
## data: USPopulation$Population
## W = 0.88825, p-value = 0.01742
cor(USPopulation, use="complete.obs", method="pearson")
## Population Year YearSq
## Population 1.0000000 0.9589847 0.9630800
## Year 0.9589847 1.0000000 0.9998886
## YearSq 0.9630800 0.9998886 1.0000000
cov(USPopulation, use="complete.obs")
## Population Year YearSq
## Population 7604.771 5430.495 20671769
## Year 5430.495 4216.667 15981167
## YearSq 20671769.252 15981166.667 60582115000
cor.test(USPopulation$Population, USPopulation$Year, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: USPopulation$Population and USPopulation$Year
## t = 15.13, df = 20, p-value = 2.052e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9021165 0.9831072
## sample estimates:
## cor
## 0.9589847
plot(USPopulation$Population, USPopulation$Year, xlab="population", ylab="year", pch=21)
abline(lm(Population ~ Year, data=USPopulation))
#simple linear regression
model=lm(USPopulation$Population ~ USPopulation$Year)
model
##
## Call:
## lm(formula = USPopulation$Population ~ USPopulation$Year)
##
## Coefficients:
## (Intercept) USPopulation$Year
## -2345.855 1.288
summary(model)
##
## Call:
## lm(formula = USPopulation$Population ~ USPopulation$Year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.26 -21.08 -10.12 19.54 51.55
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.346e+03 1.614e+02 -14.54 4.29e-12 ***
## USPopulation$Year 1.288e+00 8.512e-02 15.13 2.05e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.33 on 20 degrees of freedom
## Multiple R-squared: 0.9197, Adjusted R-squared: 0.9156
## F-statistic: 228.9 on 1 and 20 DF, p-value: 2.052e-12
plot(USPopulation$Population ~ USPopulation$Year)
abline(model)
predict(model, newdata=data.frame(Units=4), interval = "confidence")
## Warning: 'newdata' had 1 row but variables found have 22 rows
## fit lwr upr
## 1 -40.577759 -62.360203 -18.795315
## 2 -27.699115 -47.982649 -7.415581
## 3 -14.820471 -33.653260 4.012317
## 4 -1.941828 -19.384058 15.500403
## 5 10.936816 -5.190620 27.064252
## 6 23.815460 8.906995 38.723924
## 7 36.694103 22.883392 50.504815
## 8 49.572747 36.707503 62.437991
## 9 62.451391 50.343602 74.559180
## 10 75.330034 63.754719 86.905350
## 11 88.208678 76.909005 99.508351
## 12 101.087322 89.787649 112.386995
## 13 113.965966 102.390650 125.541281
## 14 126.844609 114.736820 138.952398
## 15 139.723253 126.858009 152.588497
## 16 152.601897 138.791185 166.412608
## 17 165.480540 150.572076 180.389005
## 18 178.359184 162.231748 194.486620
## 19 191.237828 173.795597 208.680058
## 20 204.116471 185.283683 222.949260
## 21 216.995115 196.711581 237.278649
## 22 229.873759 208.091315 251.656203
#residual
model.res = resid(model)
plot(USPopulation$Population, model.res)
abline(0, 0)
#influence
influence.measures(model)
## Influence measures of
## lm(formula = USPopulation$Population ~ USPopulation$Year) :
##
## dfb.1_ dfb.USP. dffit cov.r cook.d hat inf
## 1 0.82280 -0.80694 0.9428 0.885 0.380821 0.1700
## 2 0.51210 -0.50118 0.6027 1.054 0.172123 0.1474
## 3 0.29093 -0.28399 0.3544 1.161 0.063225 0.1270
## 4 0.13031 -0.12679 0.1661 1.215 0.014344 0.1090
## 5 0.01846 -0.01788 0.0250 1.221 0.000329 0.0932
## 6 -0.05422 0.05223 -0.0797 1.195 0.003334 0.0796
## 7 -0.08895 0.08499 -0.1469 1.153 0.011185 0.0683
## 8 -0.09375 0.08844 -0.1831 1.115 0.017162 0.0593
## 9 -0.08556 0.07891 -0.2152 1.073 0.023353 0.0525
## 10 -0.06010 0.05268 -0.2289 1.046 0.026157 0.0480
## 11 -0.02503 0.01758 -0.2238 1.043 0.024979 0.0457
## 12 0.01004 -0.01746 -0.2222 1.045 0.024647 0.0457
## 13 0.03923 -0.04573 -0.1987 1.074 0.019964 0.0480
## 14 0.06719 -0.07347 -0.2004 1.085 0.020362 0.0525
## 15 0.07748 -0.08253 -0.1709 1.122 0.014998 0.0593
## 16 0.12685 -0.13321 -0.2302 1.104 0.026879 0.0683
## 17 0.10601 -0.11033 -0.1684 1.163 0.014677 0.0796
## 18 -0.00864 0.00893 0.0125 1.222 0.000082 0.0932
## 19 -0.12736 0.13115 0.1718 1.213 0.015336 0.1090
## 20 -0.28155 0.28893 0.3605 1.158 0.065341 0.1270
## 21 -0.46854 0.47953 0.5766 1.072 0.158909 0.1474
## 22 -0.95312 0.97334 1.1372 0.752 0.510853 0.1700 *
fit=lm(Population ~ Year+YearSq,data=USPopulation)
summary(fit)
##
## Call:
## lm(formula = Population ~ Year + YearSq, data = USPopulation)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5642 -0.3134 0.5894 1.4263 4.7578
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.163e+04 6.395e+02 33.83 <2e-16 ***
## Year -2.405e+01 6.755e-01 -35.60 <2e-16 ***
## YearSq 6.684e-03 1.782e-04 37.51 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3 on 19 degrees of freedom
## Multiple R-squared: 0.9989, Adjusted R-squared: 0.9988
## F-statistic: 8864 on 2 and 19 DF, p-value: < 2.2e-16
library(usdm)
## Loading required package: sp
## Loading required package: raster
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:usdm':
##
## vif
vif(fit)
## Year YearSq
## 4489.781 4489.781
setwd("C:/Users/Yuan/Desktop/R/5")
PRED=read.csv(file="PRED.csv",header=TRUE, sep=",")
#attach(PRED)
#fit.lm=lm(Population ~ Year+YearSq)
#newdata=data.frame(Year=1925, + YearSq=3705625)
#predict(fit.lm, newdata, interval="predict")
#detach(PRED)
#ERROR happens when doing this...
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.