How to add a label to the points in a scatterplot

June 12, 2016

####################################################################################################

I really do not remember from where I took the original codes, sorry.

You can use these codes to add a label to the points in a scatterplot.

The codes are based on a creative use of the ‘text’ function.

The lables are from the ‘rownames’ of your dataset, but you can rename any column as ‘rownames’.

Here we go….

### call the library you need (the data are in this library)

library(car)

### the data

data(cars)

head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
str(cars)
## 'data.frame':    50 obs. of  2 variables:
##  $ speed: num  4 4 7 7 8 9 10 10 10 11 ...
##  $ dist : num  2 10 4 22 16 10 18 26 34 17 ...
### the model

mod <- lm(dist ~speed, data=cars)

summary(mod)
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## speed         3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
### outlier with car function

outlierTest(mod)
## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 49 3.184993          0.0025707      0.12853
######################################################################
### plot the model
######################################################################

plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)

abline(mod, col="red", lwd=3)

### this add the labels to the points using their rownames
### font = 2 is bold

text(dist ~speed, labels=rownames(cars),data=cars, cex=0.9, font=2)

######################################################################
### plot the model
######################################################################

plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)

abline(mod, col="red", lwd=3)

### this add the labels to the points using values in speed

text(dist ~speed, labels=speed,data=cars, cex=0.9, font=2)

######################################################################
### plot the model
######################################################################

plot(dist ~speed, col="lightblue", pch=19, cex=2,data=cars)

abline(mod, col="red", lwd=3)


### this add the labels to the points using values in dist

text(dist~speed, labels=dist,data=cars, cex=0.9, font=2)

######################### another example #########################

library(car)

data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
### the model

mod <- lm(mpg~disp, data=mtcars) 

summary(mod)
## 
## Call:
## lm(formula = mpg ~ disp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8922 -2.2022 -0.9631  1.6272  7.2305 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 29.599855   1.229720  24.070  < 2e-16 ***
## disp        -0.041215   0.004712  -8.747 9.38e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.251 on 30 degrees of freedom
## Multiple R-squared:  0.7183, Adjusted R-squared:  0.709 
## F-statistic: 76.51 on 1 and 30 DF,  p-value: 9.38e-10
### outlier with car function

outlierTest(mod)
## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##                rstudent unadjusted p-value Bonferonni p
## Toyota Corolla 2.523971           0.017336      0.55475
######################################################################
### plot the model
######################################################################

plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)

### this add the labels to the points using their rownames
### font = 2 is bold

text(mpg~disp, labels=rownames(mtcars),data=mtcars, cex=0.9, font=2)

######################################################################
### plot the model
######################################################################

plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)


### this add the labels to the points using values in disp
### The “pos” option specifies the position of the text relative to the point.  

### 1 = below
### 2 = left
### 3 = above
### 4 = right

text(mpg~disp, labels=disp,data=mtcars, cex=0.9, font=2, pos=4)

######################################################################
### plot the model (points with labels on the right)
######################################################################

plot(mpg~disp, col="lightblue", pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)

### this add the labels to the points using values in cyl
### The “pos” option specifies the position of the text relative to the point.  

### 1 = below
### 2 = left
### 3 = above
### 4 = right

text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2, pos=4)

######################################################################
### plot the model
######################################################################

### colors as in cyl, with transparency

addTrans <- function(color,trans)
{
  # This function adds transparancy to a color.
  # Define transparancy with an integer between 0 and 255
  # 0 being fully transparant and 255 being fully visable
  # Works with either color and trans a vector of equal length,
  # or one of the two of length 1.

  if (length(color)!=length(trans)&!any(c(length(color),length(trans))==1)) stop("Vector lengths not correct")
  if (length(color)==1 & length(trans)>1) color <- rep(color,length(trans))
  if (length(trans)==1 & length(color)>1) trans <- rep(trans,length(color))

  num2hex <- function(x)
  {
    hex <- unlist(strsplit("0123456789ABCDEF",split=""))
    return(paste(hex[(x-x%%16)/16+1],hex[x%%16+1],sep=""))
  }
  rgb <- rbind(col2rgb(color),trans)
  res <- paste("#",apply(apply(rgb,2,num2hex),2,paste,collapse=""),sep="")
  return(res)
}


plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)


### this add the labels to the points using values in cyl

text(mpg~disp, labels=cyl,data=mtcars, cex=1.5, font=2)

##############################################################################
##### more clear with paired plots
##############################################################################

par(mfrow=c(1,2))

### first plot (only the points)

plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)


### second plot (only the labels)

plot(mpg~disp, col="white", pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)


### this add the labels to the points using values in cyl

text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2)

par(mfrow=c(1,1))
### done



##############################################################################
##### best
##############################################################################


### first plot (only the points)

plot(mpg~disp, col=addTrans(as.numeric(cyl),100), pch=19, cex=2,data=mtcars)

abline(mod, col="red", lwd=3)

### this add the labels to the points using values in cyl

text(mpg~disp, labels=cyl,data=mtcars, cex=0.9, font=2, pos=4)

I hope you’ve enjoyed this!

Have a nice day!

Alt text

Twitter: @AntoViral

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents.

For more details on using R Markdown see http://rmarkdown.rstudio.com.