How to add a label to the points in a scatterplot
####################################################################################################
I really do not remember from where I took the original codes, sorry.
You can use these codes to add a label to the points in a scatterplot.
The codes are based on a creative use of the ‘text’ function.
The lables are from the ‘rownames’ of your dataset, but you can rename any column as ‘rownames’.
Here we go….
### call the library you need (the data are in this library)
library(car)
### the data
data(cars)
head(cars)
## speed dist
## 1 4 2
## 2 4 10
## 3 7 4
## 4 7 22
## 5 8 16
## 6 9 10
str(cars)
## 'data.frame': 50 obs. of 2 variables:
## $ speed: num 4 4 7 7 8 9 10 10 10 11 ...
## $ dist : num 2 10 4 22 16 10 18 26 34 17 ...
### the model
mod <- lm(dist ~speed, data=cars)
summary(mod)
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
### outlier with car function
outlierTest(mod)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 49 3.184993 0.0025707 0.12853
######################################################################
### plot the model
######################################################################
plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using their rownames
### font = 2 is bold
text(dist ~speed, labels=rownames(cars),data=cars, cex=0.9, font=2)
######################################################################
### plot the model
######################################################################
plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in speed
text(dist ~speed, labels=speed,data=cars, cex=0.9, font=2)
######################################################################
### plot the model
######################################################################
plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in dist
text(dist~speed, labels=dist,data=cars, cex=0.9, font=2)
######################### another example #########################
library(car)
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
### the model
mod <- lm(mpg~disp, data=mtcars)
summary(mod)
##
## Call:
## lm(formula = mpg ~ disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8922 -2.2022 -0.9631 1.6272 7.2305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.599855 1.229720 24.070 < 2e-16 ***
## disp -0.041215 0.004712 -8.747 9.38e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.251 on 30 degrees of freedom
## Multiple R-squared: 0.7183, Adjusted R-squared: 0.709
## F-statistic: 76.51 on 1 and 30 DF, p-value: 9.38e-10
### outlier with car function
outlierTest(mod)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## Toyota Corolla 2.523971 0.017336 0.55475
######################################################################
### plot the model
######################################################################
plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using their rownames
### font = 2 is bold
text(mpg~disp, labels=rownames(mtcars),data=mtcars, cex=0.9, font=2)
######################################################################
### plot the model
######################################################################
plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in disp
### The “pos” option specifies the position of the text relative to the point.
### 1 = below
### 2 = left
### 3 = above
### 4 = right
text(mpg~disp, labels=disp,data=mtcars, cex=0.9, font=2, pos=4)
######################################################################
### plot the model (points with labels on the right)
######################################################################
plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in cyl
### The “pos” option specifies the position of the text relative to the point.
### 1 = below
### 2 = left
### 3 = above
### 4 = right
text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2, pos=4)
######################################################################
### plot the model
######################################################################
### colors as in cyl, with transparency
addTrans <- function(color,trans)
{
# This function adds transparancy to a color.
# Define transparancy with an integer between 0 and 255
# 0 being fully transparant and 255 being fully visable
# Works with either color and trans a vector of equal length,
# or one of the two of length 1.
if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))
num2hex <- function(x)
{
hex <- unlist(strsplit("0123456789ABCDEF",split=""))
return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
}
rgb <- rbind(col2rgb(color),trans)
res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
return(res)
}
plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in cyl
text(mpg~disp, labels=cyl,data=mtcars, cex=1.5, font=2)
##############################################################################
##### more clear with paired plots
##############################################################################
par(mfrow=c(1,2))
### first plot (only the points)
plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### second plot (only the labels)
plot(mpg~disp, col="white", pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in cyl
text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2)
par(mfrow=c(1,1))
### done
##############################################################################
##### best
##############################################################################
### first plot (only the points)
plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)
abline(mod, col="red", lwd=3)
### this add the labels to the points using values in cyl
text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2, pos=4)
I hope you’ve enjoyed this!
Have a nice day!