This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(ggplot2) library(car) library(MASS) library(corrplot)
air_train<-read.csv(“E:\ISB\Term 2 - SA2\Assignment\airfares_train.csv”,header=TRUE)
air_test<-read.csv(“E:\ISB\Term 2 - SA2\Assignment\airfares_test.csv”,header=TRUE)
attach(air_train) colnames(air_train)
air_train
nrow(air_train) version par(mfrow=c(1, 2)) boxplot(air_train\(COUPON, main="COUPON") #boxplot(air_train\)NEW, main=“NEW”) boxplot(air_train\(HI, main="HI") par(mfrow=c(3, 2)) boxplot(air_train\)S_INCOME, main=“S_INCOME”) boxplot(air_train\(E_INCOME, main="E_INCOME") boxplot(air_train\)S_POP, main=“S_POP”) boxplot(air_train\(E_POP, main="E_POP") boxplot(air_train\)DISTANCE, main=“DISTANCE”) par(mfrow=c(1, 2)) boxplot(air_train\(FARE, main="FARE") boxplot(air_train\)PAX, main=“PAX”)
dat <- data.frame(x = air_train\(FARE) ggplot(dat, aes(x=air_train\)FARE)) + geom_density(fill=“violet”)
dat <- data.frame(x = log(air_train\(FARE)) ggplot(dat, aes(x=log(air_train\)FARE))) + geom_density(fill=“violet”)
norm<-rnorm(510, mean=mean(air_train\(FARE), sd=sd(air_train\)FARE)) dat <- data.frame(cond = factor(rep(c(“FARE”,“Normal”), each=510)), x = c(air_train$FARE,norm)) ggplot(dat, aes(x, fill=cond)) + geom_density(alpha=.3)
lnorm<-rnorm(510, mean=mean(log(air_train\(FARE)), sd=sd(log(air_train\)FARE))) dat <- data.frame(cond = factor(rep(c(“LFARE”,“Log Normal”), each=510)), x = c(log(air_train$FARE),lnorm)) ggplot(dat, aes(x, fill=cond)) + geom_density(alpha=.3)
scplotdata <- air_train[, sapply(air_train, is.numeric)]
pairs(scplotdata,col=“dodgerblue4”,pch=20)
mcor<-round(cor(scplotdata),2) mcor library(corrplot) png(height=1200, width=1500, pointsize=20, file=“Correlation Matrix2.jpg”) corrplot(mcor)
updateR
model_1<-lm(air_train\(FARE ~ air_train\)COUPON+airlines_train\(NEW+air_train\)HI+ air_train\(S_INCOME+air_train\)E_INCOME+ air_train\(S_POP+air_train\)E_POP+air_train\(DISTANCE+ air_train\)PAX) summary(model_1)
qqPlot(model_1, main=“QQ Plot of residuals: Model_3”)
residual1 <- studres(model_1) hist(residual1, freq=FALSE, main=“Distribution of Studentized Residuals:Model_3”) xfit1<-seq(min(residual1),max(residual1),length=40) yfit1<-dnorm(xfit1) lines(xfit1, yfit1)
residualPlot(model_1, id.n=5)
residualPlots(model_1, id.n=5)