Author: “GE CHEN”

Date: “February 19, 2015”

Output: html_document

Assignment 01 Outline

RPI

1.DATA

  1. The data is selected from 100+ datasets and part of U.S. Bureau of Statistic and U.S Census. Data contains quarterly Manufactories Sales and Manufacutories quarlerly salary
data<-read.csv("~/Desktop/All_manu.csv")
head(data,n=14L)
##      Year All_manufacturing_Sales Workers_Salary
## 1  2003Q1                 1099292          668.3
## 2  2003Q2                 1070625          668.5
## 3  2003Q3                 1104430          669.6
## 4  2003Q4                 1119380          690.5
## 5  2004Q1                 1179628          674.7
## 6  2004Q2                 1218529          689.6
## 7  2004Q3                 1245589          707.3
## 8  2004Q4                 1288895          702.8
## 9  2005Q1                 1295928          705.6
## 10 2005Q2                 1318785          706.7
## 11 2005Q3                 1377748          713.5
## 12 2005Q4                 1417411          715.6
## 13 2006Q1                 1438882          739.6
## 14 2006Q2                 1449760          735.8
  1. summary the data
summary(data)
##       Year    All_manufacturing_Sales Workers_Salary 
##  2003Q1 : 1   Min.   :1070625         Min.   :649.2  
##  2003Q2 : 1   1st Qu.:1308566         1st Qu.:690.0  
##  2003Q3 : 1   Median :1455606         Median :715.6  
##  2003Q4 : 1   Mean   :1465914         Mean   :715.9  
##  2004Q1 : 1   3rd Qu.:1650530         3rd Qu.:744.5  
##  2004Q2 : 1   Max.   :1760628         Max.   :777.6  
##  (Other):41
str(data)
## 'data.frame':    47 obs. of  3 variables:
##  $ Year                   : Factor w/ 47 levels "2003Q1","2003Q2",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ All_manufacturing_Sales: num  1099292 1070625 1104430 1119380 1179628 ...
##  $ Workers_Salary         : num  668 668 670 690 675 ...

2. Model

using Workers quarterly salary as independent variable and Manufactories Sales as dependent variable, then create the single variable regression:

attach(data)
model<-lm( data$All_manufacturing_Sales ~ data$Workers_Salary)

3.Plot the Data

  1. scatter plot
attach(data)
## The following objects are masked from data (pos = 3):
## 
##     All_manufacturing_Sales, Workers_Salary, Year
plot(data$Workers_Salary,data$All_manufacturing_Sales, xlab="Workers_Salary-Million",ylab="Manufacture Sales-Billion",pch=12, cex=0.5,bg='green', main="Workers Salary and All Manufacturies Sales ")

  1. scatter plot with regression line
plot(data$Workers_Salary,data$All_manufacturing_Sales, xlab="Workers_Salary-Million",ylab="Manufacture Sales-Million",pch=12, cex=0.5,bg='green', main="Workers Salary and All Manufacturies Sales ")
abline(model$coef, lwd=2)

  1. Set up my Hypothesis H0 : b1=0 and plot the 95% confindence interval for coefficients bo and b1
fit <- predict(model,data,interval="predict",level = 0.95)
plot(data$Workers_Salary,data$All_manufacturing_Sales, xlab="Workers_Salary-Million",ylab="Manufacture Sales-Million",pch=12, cex=0.5,bg='green', main="Workers Salary and All Manufacturies Sales ")
abline(model$coef, lwd=2)
lines(data$Workers_Salary,fit[,2],lty=2)
lines(data$Workers_Salary,fit[,3],lty=2)

fit
##        fit       lwr     upr
## 1  1263101  990737.1 1535466
## 2  1263953  991631.9 1536273
## 3  1268635  996550.5 1540720
## 4  1357606 1088974.8 1626237
## 5  1290346 1019284.7 1561407
## 6  1353774 1085035.6 1622513
## 7  1429123 1161821.7 1696423
## 8  1409966 1142437.2 1677495
## 9  1421886 1154509.7 1689262
## 10 1426568 1159242.6 1693894
## 11 1455516 1188374.9 1722657
## 12 1464455 1197328.2 1731583
## 13 1566622 1298194.5 1835050
## 14 1550446 1282401.8 1818490
## 15 1541506 1273645.6 1809367
## 16 1575136 1306479.8 1843793
## 17 1625794 1355400.5 1896188
## 18 1627071 1356625.5 1897517
## 19 1609192 1339438.4 1878945
## 20 1610043 1340258.6 1879828
## 21 1611746 1341898.6 1881594
## 22 1595995 1326701.5 1865289
## 23 1567048 1298609.2 1835487
## 24 1508302 1240944.2 1775660
## 25 1287792 1016616.1 1558967
## 26 1224363  949836.6 1498889
## 27 1182645  905392.8 1459897
## 28 1237560  963810.1 1511309
## 29 1181793  904481.6 1459105
## 30 1282258 1010828.6 1553687
## 31 1321847 1052065.0 1591630
## 32 1364843 1096405.4 1633280
## 33 1421034 1153648.6 1688420
## 34 1417203 1149771.3 1684635
## 35 1444448 1177261.5 1711634
## 36 1423589 1156231.4 1690946
## 37 1533844 1266124.2 1801563
## 38 1543209 1275315.0 1811103
## 39 1533418 1265705.9 1801130
## 40 1567899 1299438.5 1836360
## 41 1581096 1312268.6 1849923
## 42 1593867 1324643.1 1863091
## 43 1598975 1329581.4 1868369
## 44 1628348 1357850.1 1898846
## 45 1693905 1420176.6 1967634
## 46 1711785 1436994.8 1986574
## 47 1728387 1452544.1 2004229

4. Summary and Interpret the Result

summary(model)
## 
## Call:
## lm(formula = data$All_manufacturing_Sales ~ data$Workers_Salary)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -238226 -115895     691  112216  222861 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1581822.7   397057.3  -3.984 0.000245 ***
## data$Workers_Salary     4257.0      553.9   7.685 9.95e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 131200 on 45 degrees of freedom
## Multiple R-squared:  0.5675, Adjusted R-squared:  0.5579 
## F-statistic: 59.06 on 1 and 45 DF,  p-value: 9.953e-10