require(lmPerm)
require(ggplot2)
## Loading required package: ggplot2
x <- 1:1000
set.seed(1000)
y1 <- x*2+runif(1000,-100,100)
y1 <- y1+min(y1)
y2 <- 0.75*y1 + abs(rnorm(1000,50,10))
datos <- data.frame(x =c(x,x),y=c(y1,y2),tipo=factor(c(rep("A",1000),rep("B",1000))))
str(datos)
## 'data.frame': 2000 obs. of 3 variables:
## $ x : int 1 2 3 4 5 6 7 8 9 10 ...
## $ y : num -106.9 -18.7 -145.7 -28.3 -61.2 ...
## $ tipo: Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
summary(datos)
## x y tipo
## Min. : 1.0 Min. :-148.9 A:1000
## 1st Qu.: 250.8 1st Qu.: 394.6 B:1000
## Median : 500.5 Median : 818.4
## Mean : 500.5 Mean : 835.7
## 3rd Qu.: 750.2 3rd Qu.:1245.5
## Max. :1000.0 Max. :2010.4
ggplot(data=datos) +
geom_line(aes(x=x,y=y,color=tipo))
fit.lmp <- lmp(y~x*tipo,perm="Prob",data=datos,center=FALSE)
## [1] "Settings: unique SS "
All coefficients are in the lmp object
coef(fit.lmp)
## (Intercept) x tipo1 x:tipo1
## -37.6954176 1.7449803 -33.4742219 0.2484024
but the summary does not output the intercept:
summary(fit.lmp)
##
## Call:
## lmp(formula = y ~ x * tipo, data = datos, perm = "Prob", center = FALSE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.4308 -41.5240 0.8618 42.4212 101.7999
##
## Coefficients:
## Estimate Iter Pr(Prob)
## x 1.7450 51 1
## tipo1 -33.4742 5000 <2e-16 ***
## x:tipo1 0.2484 5000 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.2 on 1996 degrees of freedom
## Multiple R-Squared: 0.9903, Adjusted R-squared: 0.9903
## F-statistic: 6.793e+04 on 3 and 1996 DF, p-value: < 2.2e-16
while the summary of lm() does:
fit.lm <- lm(y~x*tipo, data=datos)
summary(fit.lm)
##
## Call:
## lm(formula = y ~ x * tipo, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.431 -41.524 0.862 42.421 101.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -71.169640 3.240713 -21.96 <2e-16 ***
## x 1.993383 0.005609 355.40 <2e-16 ***
## tipoB 66.948444 4.583061 14.61 <2e-16 ***
## x:tipoB -0.496805 0.007932 -62.63 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.2 on 1996 degrees of freedom
## Multiple R-squared: 0.9903, Adjusted R-squared: 0.9903
## F-statistic: 6.793e+04 on 3 and 1996 DF, p-value: < 2.2e-16
also note that center=TRUE (so Intercept=0) is the default in lmp()
The help page states “Either permutation test p-values or the usual F-test p-values will be output”, but this is not the case for R-squared
While summary.lm() uses the names of factor levels, summary.lmp() merely adds an integer to the variable name, which is very confusing
Note that the coefficients calculated by lmp() and lm() are too different in this example:
coefficients(fit.lm)
## (Intercept) x tipoB x:tipoB
## -71.1696395 1.9933827 66.9484438 -0.4968049
coefficients(fit.lmp)
## (Intercept) x tipo1 x:tipo1
## -37.6954176 1.7449803 -33.4742219 0.2484024
This is because lmp is applying contr.sum rather than contr.treatment (thanks to Zheyuan Li in http://stackoverflow.com/questions/39291265/lmpermlmpyxf-center-true-vs-lmyxf-very-different-coefficients) For lm():
coefficients(fit.lm)[1:2] # lm coefficients for Level A
## (Intercept) x
## -71.169640 1.993383
coefficients(fit.lm)[1:2] + coefficients(fit.lm)[3:4] # lm coefficients for Level B
## (Intercept) x
## -4.221196 1.496578
Which is coincident with the contr.treatment scheme
contrasts(datos$tipo)
## B
## A 0
## B 1
attributes(fit.lm$qr$qr)$contrasts
## $tipo
## [1] "contr.treatment"
contr.treatment(2)
## 2
## 1 0
## 2 1
Verify on plot
ggplot(data=datos) +
geom_line(aes(x=x,y=y,color=tipo)) +
geom_abline(intercept=coefficients(fit.lm)[1], slope=coefficients(fit.lm)[2],color="red") +
geom_abline(intercept=coefficients(fit.lm)[1]+coefficients(fit.lm)[3],
slope=coefficients(fit.lm)[2]+coefficients(fit.lm)[4],color="blue")
contr.sum is used on lmp():
attributes(fit.lmp$model$ tipo)$contrasts #got it by digging using
## #1
## A 1
## B -1
contr.sum(2)
## [,1]
## 1 1
## 2 -1
coefficients(fit.lmp)[1:2] + coefficients(fit.lmp)[3:4] # lmp coefficients for Level A
## (Intercept) x
## -71.169640 1.993383
coefficients(fit.lmp)[1:2] - coefficients(fit.lmp)[3:4] # lmp coefficients for Level B
## (Intercept) x
## -4.221196 1.496578
Verify on plot
ggplot(data=datos) +
geom_line(aes(x=x,y=y,color=tipo)) +
geom_abline(intercept=coefficients(fit.lmp)[1],
slope=coefficients(fit.lmp)[2],color="black") +
geom_abline(intercept=coefficients(fit.lmp)[1]+coefficients(fit.lmp)[3],
slope=coefficients(fit.lmp)[2]+coefficients(fit.lmp)[4],color="red") +
geom_abline(intercept=coefficients(fit.lmp)[1]-coefficients(fit.lmp)[3],
slope=coefficients(fit.lmp)[2]-coefficients(fit.lmp)[4],color="blue")
Specifying the contrast scheme, coefficients are coincident
fit2.lmp <- lmp(y~x*tipo,perm="Prob",data=datos,center=FALSE,contrasts=list(tipo="contr.treatment"))
## [1] "Settings: unique SS "
coef(fit2.lmp)
## (Intercept) tipoB x x:tipoB
## -71.1696395 66.9484438 1.9933827 -0.4968049
coef(fit2.lm)
## (Intercept) x tipoB x:tipoB
## -71.1696395 1.9933827 66.9484438 -0.4968049
Iterations stop when the “estimated standard error of the estimated p is less than Ca*p“, where Ca defaults to 0.1 and can be tuned by the user. Nevertheless, that standard error is not returned.
With one specific data set, despite setting Ca to a very low number, I get sometimes very few iterations, e.g.
# Call:
# lmp(formula = DNDVIan ~ TenureZone * year, data = selcombi, perm = "Prob",
# center = FALSE, maxiter = 5000, Ca = 1e-09)
#
# Residuals:
# Min 1Q Median 3Q Max
# -22979.9 -5543.0 -450.6 6204.9 20359.2
#
# Coefficients:
# Estimate Iter Pr(Prob)
# TenureZone1 -10900.886 51 1
# year 154.179 5000 <2e-16 ***
# TenureZone1:year 5.593 51 1
While in other runs, with the same data and arguments:
# Call:
# lmp(formula = DNDVIan ~ TenureZone * year, data = selcombi, perm = "Prob",
# center = FALSE, maxiter = 5000, Ca = 1e-09)
#
# Residuals:
# Min 1Q Median 3Q Max
# -22979.9 -5543.0 -450.6 6204.9 20359.2
#
# Coefficients:
# Estimate Iter Pr(Prob)
# TenureZone1 -10900.886 5000 0.804
# year 154.179 5000 <2e-16 ***
# TenureZone1:year 5.593 5000 0.801