library(ggplot2)
require(gridExtra)
library(stargazer)
setwd('~/desktop/Ostats')
options(digits=2)
nitrate <- read.table("Nitrate.txt", header=TRUE) ## Read in data
head(nitrate) ## Display first 6 rows of dataset
## RIVER COUNTRY DISCHARG RUNOFF PREC AREA DENSITY NO3 DEP NPREC
## 1 Adige Italy 2.2e+02 18.3 85 1220 102.0 67.0 1238 46.0
## 2 Amazon S_America 1.8e+05 24.8 181 7050000 1.0 3.0 121 2.1
## 3 Caragh Ireland 7.3e+00 45.6 105 160 7.2 3.6 86 2.6
## 4 Columbia USA 7.9e+03 11.8 99 670000 10.0 26.6 63 2.0
## 5 Danube Rumania 6.5e+03 8.1 58 805000 90.0 46.0 826 45.0
## 6 Delaware USA 3.4e+02 19.1 107 17600 100.0 61.0 852 25.0
names(nitrate) ## Retrieve names of variables from dataset
## [1] "RIVER" "COUNTRY" "DISCHARG" "RUNOFF" "PREC" "AREA"
## [7] "DENSITY" "NO3" "DEP" "NPREC"
## Add transformed variables to the data. We will consider log, sqrt, and third power.
nitrate <- cbind(nitrate,
DISCHARG.2=(nitrate$DISCHARG)^.5, # column DISCHARG.2 contains the sqrt DISCHARG values
DISCHARG.ln=log(nitrate$DISCHARG), # column DISCHARG.ln contains the natural logs of the DISCHARG values
DISCHARG.3=(nitrate$DISCHARG)^(1/3), # column DISCHARG.3 contains the DISCHARG values to the third power
RUNOFF.2=(nitrate$RUNOFF)^.5, # column RUNOFF.2 contains the sqrt RUNOFF values
RUNOFF.ln=log(nitrate$RUNOFF), # column RUNOFF.ln contains the natural logs of the RUNOFF values
RUNOFF.3=(nitrate$RUNOFF)^(1/3), # column RUNOFF.3 contains the RUNOFF values to the third power values
PREC.2=(nitrate$PREC)^.5, # column PREC.2 contains the sqrt PREC values
PREC.ln=log(nitrate$PREC), # column PREC.ln contains the natural logs of the PREC values
PREC.3=(nitrate$PREC)^(1/3), # column PREC.3 contains the PREC values to the third power
AREA.2=(nitrate$AREA)^.5, # column AREA.2 contains the sqrt AREA values
AREA.ln=log(nitrate$AREA), # column AREA.ln contains the natural logs of the AREA values
AREA.3=(nitrate$AREA)^(1/3), # column AREA.3 contains the AREA values to the third power
DENSITY.2=(nitrate$DENSITY)^.5, # column DENSITY.2 contains the sqrt DENSITY values
DENSITY.ln=log(nitrate$DENSITY), # column DENSITY.ln contains the natural logs of the DENSITY values
DENSITY.3=(nitrate$DENSITY)^(1/3), # column DENSITY.3 contains the DENSITY values to the third power
NO3.2=(nitrate$NO3)^.5, # column NO3.2 contains the sqrt NO3 values
NO3.ln=log(nitrate$NO3), # column NO3.ln contains the natural logs of the NO3 values
NO3.3=(nitrate$NO3)^(1/3), # column NO3.3 contains the NO3 values to the third power
DEP.2=(nitrate$DEP)^.5, # column DEP.2 contains the sqrt DEP values
DEP.ln=log(nitrate$DEP), # column DEP.ln contains the natural logs of the DEP values
DEP.3=(nitrate$DEP)^(1/3), # column DEP.3 contains the DEP values to the third power
NPREC.2=(nitrate$NPREC)^.5, # column NPREC.2 contains the sqrt NPREC values
NPREC.ln=log(nitrate$NPREC), # column NPREC.ln contains the natural logs of the NPREC values
NPREC.3=(nitrate$NPREC)^(1/3) # column NPREC.3 contains the NPREC values to the third power
)
## Nitrate
## Find the transformations hat best conform to the normality assumption
N <- ggplot(nitrate, aes(x=NO3)) +
geom_histogram(colour = "darkgreen", fill = "white") +
theme_bw()
N2 <- ggplot(nitrate, aes(x=NO3.2)) +
geom_histogram(colour = "darkgreen", fill = "white") +
theme_bw()
Nln <- ggplot(nitrate, aes(x=NO3.ln)) +
geom_histogram(colour = "darkgreen", fill = "white") +
theme_bw()
N3 <- ggplot(nitrate, aes(x=NO3.3)) +
geom_histogram(colour = "darkgreen", fill = "white") +
theme_bw()
grid.arrange(N, N2, Nln, N3, ncol=4)
## Nitrate Precipitation
NP <- ggplot(nitrate, aes(x=NPREC)) +
geom_histogram(colour = "darkblue", fill = "white") +
theme_bw()
NP2 <- ggplot(nitrate, aes(x=NPREC.2)) +
geom_histogram(colour = "darkblue", fill = "white") +
theme_bw()
NPln <- ggplot(nitrate, aes(x=NPREC.ln)) +
geom_histogram(colour = "darkblue", fill = "white") +
theme_bw()
NP3 <- ggplot(nitrate, aes(x=NPREC.3)) +
geom_histogram(colour = "darkblue", fill = "white") +
theme_bw()
grid.arrange(NP, NP2, NPln, NP3, ncol=4)
## Discharge
DG <- ggplot(nitrate, aes(x=DISCHARG)) +
geom_histogram(colour = "purple", fill = "white") +
theme_bw()
DG2 <- ggplot(nitrate, aes(x=DISCHARG.2)) +
geom_histogram(colour = "purple", fill = "white") +
theme_bw()
DGln <- ggplot(nitrate, aes(x=DISCHARG.ln)) +
geom_histogram(colour = "purple", fill = "white") +
theme_bw()
DG3 <- ggplot(nitrate, aes(x=DISCHARG.3)) +
geom_histogram(colour = "purple", fill = "white") +
theme_bw()
grid.arrange(DG, DG2, DGln, DG3, ncol=4)
## Runoff
R <- ggplot(nitrate, aes(x=RUNOFF)) +
geom_histogram(colour = "orange", fill = "white") +
theme_bw()
R2 <- ggplot(nitrate, aes(x=RUNOFF.2)) +
geom_histogram(colour = "orange", fill = "white") +
theme_bw()
Rln <- ggplot(nitrate, aes(x=RUNOFF.ln)) +
geom_histogram(colour = "orange", fill = "white") +
theme_bw()
R3 <- ggplot(nitrate, aes(x=RUNOFF.3)) +
geom_histogram(colour = "orange", fill = "white") +
theme_bw()
grid.arrange(R, R2, Rln, R3, ncol=4)
## Precipitation
P <- ggplot(nitrate, aes(x=PREC)) +
geom_histogram(colour = "darkred", fill = "white") +
theme_bw()
P2 <- ggplot(nitrate, aes(x=PREC.2)) +
geom_histogram(colour = "darkred", fill = "white") +
theme_bw()
Pln <- ggplot(nitrate, aes(x=PREC.ln)) +
geom_histogram(colour = "darkred", fill = "white") +
theme_bw()
P3 <- ggplot(nitrate, aes(x=PREC.3)) +
geom_histogram(colour = "darkred", fill = "white") +
theme_bw()
grid.arrange(P, P2, Pln, P3, ncol=4)
## Area
A <- ggplot(nitrate, aes(x=AREA)) +
geom_histogram(colour = "green", fill = "white") +
theme_bw()
A2 <- ggplot(nitrate, aes(x=AREA.2)) +
geom_histogram(colour = "green", fill = "white") +
theme_bw()
Aln <- ggplot(nitrate, aes(x=AREA.ln)) +
geom_histogram(colour = "green", fill = "white") +
theme_bw()
A3 <- ggplot(nitrate, aes(x=AREA.3)) +
geom_histogram(colour = "green", fill = "white") +
theme_bw()
grid.arrange(A, A2, Aln, A3, ncol=4)
## Density
Dn <- ggplot(nitrate, aes(x=DENSITY)) +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
Dn2 <- ggplot(nitrate, aes(x=DENSITY.2)) +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
Dnln <- ggplot(nitrate, aes(x=DENSITY.ln)) +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
Dn3 <- ggplot(nitrate, aes(x=DENSITY.3)) +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
grid.arrange(Dn, Dn2, Dnln, Dn3, ncol=4)
## Deposition
D <- ggplot(nitrate, aes(x=DEP)) +
geom_histogram(colour = "brown", fill = "white") +
theme_bw()
D2 <- ggplot(nitrate, aes(x=DEP.2)) +
geom_histogram(colour = "brown", fill = "white") +
theme_bw()
Dln <- ggplot(nitrate, aes(x=DEP.ln)) +
geom_histogram(colour = "brown", fill = "white") +
theme_bw()
D3 <- ggplot(nitrate, aes(x=DEP.3)) +
geom_histogram(colour = "brown", fill = "white") +
theme_bw()
grid.arrange(D, D2, Dln, D3, ncol=4)
theme_set(theme_minimal(20))
library(GGally)
set.seed(1836)
subset <- nitrate[,c(12,17,19,22,25,27,31,33)]
ggpairs(subset, diag=list(continuous="density"), axisLabels='none')
frame <- data.frame(nitrate[,c(12,17,19,22,25,27,31,33)])
cor(frame)
## DISCHARG.ln PREC.2 PREC.3 AREA.3 DENSITY.3 NO3.ln DEP.3
## DISCHARG.ln 1.00 0.285 0.276 0.798 -0.327 -0.38 -0.26
## PREC.2 0.28 1.000 0.998 -0.019 -0.044 -0.14 0.15
## PREC.3 0.28 0.998 1.000 -0.043 -0.025 -0.11 0.18
## AREA.3 0.80 -0.019 -0.043 1.000 -0.416 -0.43 -0.47
## DENSITY.3 -0.33 -0.044 -0.025 -0.416 1.000 0.90 0.68
## NO3.ln -0.38 -0.136 -0.112 -0.429 0.899 1.00 0.67
## DEP.3 -0.26 0.148 0.180 -0.472 0.678 0.67 1.00
## NPREC.ln -0.36 -0.381 -0.354 -0.376 0.650 0.69 0.85
## NPREC.ln
## DISCHARG.ln -0.36
## PREC.2 -0.38
## PREC.3 -0.35
## AREA.3 -0.38
## DENSITY.3 0.65
## NO3.ln 0.69
## DEP.3 0.85
## NPREC.ln 1.00
lm1 <- lm(NO3.ln~DISCHARG.ln+RUNOFF.3+PREC.2+AREA.3+DENSITY.3+NPREC.ln+DEP.3, data=nitrate)
stargazer(lm1,type="text",title="Regression Outputs", ci=TRUE, ci.level=0.95,single.row=TRUE)
##
## Regression Outputs
## ===============================================
## Dependent variable:
## ---------------------------
## NO3.ln
## -----------------------------------------------
## DISCHARG.ln 0.010 (-0.170, 0.190)
## RUNOFF.3 -0.440* (-0.920, 0.038)
## PREC.2 -0.093 (-0.380, 0.190)
## AREA.3 -0.004 (-0.014, 0.006)
## DENSITY.3 0.550*** (0.420, 0.670)
## NPREC.ln -0.450 (-1.800, 0.870)
## DEP.3 0.250 (-0.310, 0.820)
## Constant 2.800** (0.440, 5.100)
## -----------------------------------------------
## Observations 42
## R2 0.850
## Adjusted R2 0.810
## Residual Std. Error 0.590 (df = 34)
## F Statistic 27.000*** (df = 7; 34)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
p1 <- ggplot(nitrate, aes(AREA.3, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p2 <- ggplot(nitrate, aes(DISCHARG.ln, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p3 <- ggplot(nitrate, aes(PREC.2, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p4 <- ggplot(nitrate, aes(RUNOFF.3, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p5 <- ggplot(nitrate, aes(NPREC.ln, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p6 <- ggplot(nitrate, aes(DEP.ln, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
p7 <- ggplot(nitrate, aes(DENSITY.3, NO3.ln)) +
geom_point() +
stat_smooth(method="loess")+
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
grid.arrange(p1, p2, p3, p4, p5, p6, p7, ncol=4, nrow=2)
resid <- resid(lm1)
r1 <- ggplot(nitrate, aes(AREA.3, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r2 <- ggplot(nitrate, aes(DISCHARG.ln, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r3 <- ggplot(nitrate, aes(PREC.2, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r4 <- ggplot(nitrate, aes(RUNOFF.3, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r5 <- ggplot(nitrate, aes(NPREC.ln, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r6 <- ggplot(nitrate, aes(DEP.ln, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
r7 <- ggplot(nitrate, aes(DENSITY.3, resid)) +
geom_point() +
geom_hline(yintercept=0, col="red", linetype="dashed") +
theme_bw()+
theme(axis.title.x=element_text(size=9.5)) +
theme(axis.title.y=element_text(size=9.5))
grid.arrange(r1, r2, r3, r4, r5, r6, r7, ncol=4, nrow=2)
…