airquality modeling using party and lm

library(party)
## Loading required package: survival
## Loading required package: splines
## Loading required package: grid
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: coin
## Loading required package: mvtnorm
## Loading required package: zoo
## Attaching package: 'zoo'
## The following object(s) are masked from 'package:base':
## 
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: strucchange
## Loading required package: vcd
## Loading required package: MASS
## Loading required package: colorspace
data(airquality)
summary(airquality)
##      Ozone          Solar.R         Wind            Temp     
##  Min.   :  1.0   Min.   :  7   Min.   : 1.70   Min.   :56.0  
##  1st Qu.: 18.0   1st Qu.:116   1st Qu.: 7.40   1st Qu.:72.0  
##  Median : 31.5   Median :205   Median : 9.70   Median :79.0  
##  Mean   : 42.1   Mean   :186   Mean   : 9.96   Mean   :77.9  
##  3rd Qu.: 63.2   3rd Qu.:259   3rd Qu.:11.50   3rd Qu.:85.0  
##  Max.   :168.0   Max.   :334   Max.   :20.70   Max.   :97.0  
##  NA's   :37      NA's   :7                                   
##      Month           Day      
##  Min.   :5.00   Min.   : 1.0  
##  1st Qu.:6.00   1st Qu.: 8.0  
##  Median :7.00   Median :16.0  
##  Mean   :6.99   Mean   :15.8  
##  3rd Qu.:8.00   3rd Qu.:23.0  
##  Max.   :9.00   Max.   :31.0  
## 
hist(airquality$Ozone, xlim = c(0, 200), ylim = c(0, 50))

plot of chunk unnamed-chunk-1

airq <- subset(airquality, !is.na(Ozone))
airct <- ctree(Ozone ~ ., data = airq)
airq$pred.Ozone <- predict(airct, data = airq)
## default: boxplots
plot(airct)

plot of chunk unnamed-chunk-1

## change colors
plot(airct, tp_args = list(col = "blue", fill = hsv(2/3, 0.5, 1)))

plot of chunk unnamed-chunk-1

## equivalent to
plot(airct, terminal_panel = node_boxplot(airct, col = "blue", fill = hsv(2/3, 
    0.5, 1)))

plot of chunk unnamed-chunk-1

### very simple; the mean is given in each terminal node
plot(airct, type = "simple")

plot of chunk unnamed-chunk-1

### density estimates
plot(airct, terminal_panel = node_density)

plot of chunk unnamed-chunk-1

### histograms
plot(airct, terminal_panel = node_hist(airct, ymax = 0.06, xscale = c(0, 250)))

plot of chunk unnamed-chunk-1

summary(airq[, c("Ozone", "pred.Ozone")])
##      Ozone       pred.Ozone.Ozone
##  Min.   :  1.0   Min.   :18.48   
##  1st Qu.: 18.0   1st Qu.:18.48   
##  Median : 31.5   Median :31.14   
##  Mean   : 42.1   Mean   :42.13   
##  3rd Qu.: 63.2   3rd Qu.:81.63   
##  Max.   :168.0   Max.   :81.63
plot(airq$Ozone, airq$pred.Ozone, xlim = c(0, 200), ylim = c(0, 200))

plot of chunk unnamed-chunk-1

lm.result <- lm(Ozone ~ ., data = airq[, c(1:4)])
summary(lm.result)
## 
## Call:
## lm(formula = Ozone ~ ., data = airq[, c(1:4)])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40.48 -14.22  -3.55  10.10  95.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -64.3421    23.0547   -2.79   0.0062 ** 
## Solar.R       0.0598     0.0232    2.58   0.0112 *  
## Wind         -3.3336     0.6544   -5.09  1.5e-06 ***
## Temp          1.6521     0.2535    6.52  2.4e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 21.2 on 107 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared: 0.606,   Adjusted R-squared: 0.595 
## F-statistic: 54.8 on 3 and 107 DF,  p-value: <2e-16
plot(airq$Ozone, predict(lm.result, newdata = airq[, c(1:4)]), xlim = c(0, 200), 
    ylim = c(0, 200))

plot of chunk unnamed-chunk-1