GEOG 515 - spring 15: Introduction to R

This is an introduction to R. It describes basic operations of data input/output, making graphs and simple statistics.

Getting Start

Some of R packages (i.e., libraries) are part of the basic installation. Others can be downloaded from CRAN which currently hosts over 1000 packages for various purposes. If you want to list all installed package, type:

library()

A package is loaded into R using the library command, so to load the “diagram” package you should enter:

library(diagram)

To list of functions in the diagram package with brief descriptions, enter:

help(package=diagram)

To view the vignette, type:

vignette(“diagram”)

To look at the documentation for the function plotmat(), type:

??plotmat

Examples of function option()

To view defaults, type:

options()

To know the default of the number of digits, type:

getOption(“digits”)

If you want to change the maximum number of digits printed from 7 (default) to 10, type:

options(digits=10)

Using R as a calculator

sqrt(16)+6^3

## [1] 220

a=12^2
a

## [1] 144

Vectors

# Examples of creating vectors:
v1 <-  c(2.5, 4,  7.3, 0.1)
v2 <-  c("A", "B", "C", "D")
v3 <-  -3:3
v4<- seq(0,   2,  by=0.3)
v5<- seq(0,   2,  len=6)
v6<- rep(1:5, each=2)
v7<- rep(1:5,  times=2)
v1

## [1] 2.5 4.0 7.3 0.1

v2

## [1] "A" "B" "C" "D"

v3

## [1] -3 -2 -1  0  1  2  3

v4

## [1] 0.0 0.3 0.6 0.9 1.2 1.5 1.8

v5

## [1] 0.0 0.4 0.8 1.2 1.6 2.0

v6

##  [1] 1 1 2 2 3 3 4 4 5 5

v7

##  [1] 1 2 3 4 5 1 2 3 4 5

Examples of referencing vector elements

x <-  c(4, 9,  2,  12,  11,  3)
x[4]

## [1] 12

x[1:3]

## [1] 4 9 2

x[c(2,5,6)]

## [1]  9 11  3

x[-3]

## [1]  4  9 12 11  3

x[-c(4,5)]

## [1] 4 9 2 3

x[x>4]

## [1]  9 12 11

## [1]  4  9  2 12 11  3

Examples of vector operations:

x <-  c(2, 4, 6, 0)
y <-  x^2 + 1
y

## [1]  5 17 37  1

x*y

## [1]  10  68 222   0

Example of matrix:

x <-  matrix(c(1,2,3,4,5,6), nrow=2,  ncol=3, byrow=TRUE, dimnames=list(rows=c("row1", "row2"), cols=c("col1", "col2", "col3")))
x

##       cols
## rows   col1 col2 col3
##   row1    1    2    3
##   row2    4    5    6

Example of list:

z <-  list(c(1,2,3), "Alisa", x)
z

## [[1]]
## [1] 1 2 3
## 
## [[2]]
## [1] "Alisa"
## 
## [[3]]
##       cols
## rows   col1 col2 col3
##   row1    1    2    3
##   row2    4    5    6

z <-  list(num=c(1,2,3), name="Alisa", account=x)
z

## $num
## [1] 1 2 3
## 
## $name
## [1] "Alisa"
## 
## $account
##       cols
## rows   col1 col2 col3
##   row1    1    2    3
##   row2    4    5    6

Example of dataframe

data()
data  <-  data.frame(chickwts) 
data

##    weight      feed
## 1     179 horsebean
## 2     160 horsebean
## 3     136 horsebean
## 4     227 horsebean
## 5     217 horsebean
## 6     168 horsebean
## 7     108 horsebean
## 8     124 horsebean
## 9     143 horsebean
## 10    140 horsebean
## 11    309   linseed
## 12    229   linseed
## 13    181   linseed
## 14    141   linseed
## 15    260   linseed
## 16    203   linseed
## 17    148   linseed
## 18    169   linseed
## 19    213   linseed
## 20    257   linseed
## 21    244   linseed
## 22    271   linseed
## 23    243   soybean
## 24    230   soybean
## 25    248   soybean
## 26    327   soybean
## 27    329   soybean
## 28    250   soybean
## 29    193   soybean
## 30    271   soybean
## 31    316   soybean
## 32    267   soybean
## 33    199   soybean
## 34    171   soybean
## 35    158   soybean
## 36    248   soybean
## 37    423 sunflower
## 38    340 sunflower
## 39    392 sunflower
## 40    339 sunflower
## 41    341 sunflower
## 42    226 sunflower
## 43    320 sunflower
## 44    295 sunflower
## 45    334 sunflower
## 46    322 sunflower
## 47    297 sunflower
## 48    318 sunflower
## 49    325  meatmeal
## 50    257  meatmeal
## 51    303  meatmeal
## 52    315  meatmeal
## 53    380  meatmeal
## 54    153  meatmeal
## 55    263  meatmeal
## 56    242  meatmeal
## 57    206  meatmeal
## 58    344  meatmeal
## 59    258  meatmeal
## 60    368    casein
## 61    390    casein
## 62    379    casein
## 63    260    casein
## 64    404    casein
## 65    318    casein
## 66    352    casein
## 67    359    casein
## 68    216    casein
## 69    222    casein
## 70    283    casein
## 71    332    casein

head(data)       # Print the  first few rows

##   weight      feed
## 1    179 horsebean
## 2    160 horsebean
## 3    136 horsebean
## 4    227 horsebean
## 5    217 horsebean
## 6    168 horsebean

tail(data)  # Print the  last few rowr

##    weight   feed
## 66    352 casein
## 67    359 casein
## 68    216 casein
## 69    222 casein
## 70    283 casein
## 71    332 casein

names(data)     # Column names colnames(data); rownames(data)    # Column and row names

## [1] "weight" "feed"

dim(data)   # Dimension of  the  dataframe

## [1] 71  2

data[ ,c("weight",  "feed")]    # "weight" and "feed"  columns

##    weight      feed
## 1     179 horsebean
## 2     160 horsebean
## 3     136 horsebean
## 4     227 horsebean
## 5     217 horsebean
## 6     168 horsebean
## 7     108 horsebean
## 8     124 horsebean
## 9     143 horsebean
## 10    140 horsebean
## 11    309   linseed
## 12    229   linseed
## 13    181   linseed
## 14    141   linseed
## 15    260   linseed
## 16    203   linseed
## 17    148   linseed
## 18    169   linseed
## 19    213   linseed
## 20    257   linseed
## 21    244   linseed
## 22    271   linseed
## 23    243   soybean
## 24    230   soybean
## 25    248   soybean
## 26    327   soybean
## 27    329   soybean
## 28    250   soybean
## 29    193   soybean
## 30    271   soybean
## 31    316   soybean
## 32    267   soybean
## 33    199   soybean
## 34    171   soybean
## 35    158   soybean
## 36    248   soybean
## 37    423 sunflower
## 38    340 sunflower
## 39    392 sunflower
## 40    339 sunflower
## 41    341 sunflower
## 42    226 sunflower
## 43    320 sunflower
## 44    295 sunflower
## 45    334 sunflower
## 46    322 sunflower
## 47    297 sunflower
## 48    318 sunflower
## 49    325  meatmeal
## 50    257  meatmeal
## 51    303  meatmeal
## 52    315  meatmeal
## 53    380  meatmeal
## 54    153  meatmeal
## 55    263  meatmeal
## 56    242  meatmeal
## 57    206  meatmeal
## 58    344  meatmeal
## 59    258  meatmeal
## 60    368    casein
## 61    390    casein
## 62    379    casein
## 63    260    casein
## 64    404    casein
## 65    318    casein
## 66    352    casein
## 67    359    casein
## 68    216    casein
## 69    222    casein
## 70    283    casein
## 71    332    casein

data$weight     # Get the  column "weight"

##  [1] 179 160 136 227 217 168 108 124 143 140 309 229 181 141 260 203 148
## [18] 169 213 257 244 271 243 230 248 327 329 250 193 271 316 267 199 171
## [35] 158 248 423 340 392 339 341 226 320 295 334 322 297 318 325 257 303
## [52] 315 380 153 263 242 206 344 258 368 390 379 260 404 318 352 359 216
## [69] 222 283 332

data[1:5, ] # Get the  first five rows

##   weight      feed
## 1    179 horsebean
## 2    160 horsebean
## 3    136 horsebean
## 4    227 horsebean
## 5    217 horsebean

# Example of importing data into R

setwd("C:/Liem/GEOG515/Spring15/Labs/")
getwd()

## [1] "C:/Liem/GEOG515/Spring15/Labs"

data  <-  read.table("DuhramNCdata.txt", header=TRUE,  sep="\t")
str(data) # Gives  the  structure of  data

## 'data.frame':    193 obs. of  97 variables:
##  $ OBJECTID  : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Block_Grou: num  3.71e+11 3.71e+11 3.71e+11 3.71e+11 3.71e+11 ...
##  $ NO2_Hospit: num  105.6 166.5 215.2 152.6 61.7 ...
##  $ NO2_Asthma: num  2.166 3.414 3.424 2.429 0.824 ...
##  $ NO2_Asth_1: num  181.6 286.3 286.3 203.1 68.7 ...
##  $ O3_Acute_R: num  2.5 3.94 4.86 3.45 1.72 ...
##  $ O3_Mortali: num  8737 13772 16983 12048 5218 ...
##  $ O3_School_: num  1.151 1.815 1.793 1.272 0.433 ...
##  $ O3_School1: num  113.1 178.2 176 124.9 42.5 ...
##  $ PM25_Acute: num  1.337 2.107 2.619 1.858 0.977 ...
##  $ PM25_Morta: num  27121 42753 54756 38844 19114 ...
##  $ PM25_Work_: num  0.231 0.364 0.452 0.321 0.169 ...
##  $ PM25_Work1: num  38.8 61.1 76 53.9 28.3 ...
##  $ SO2_Asthma: num  0.1565 0.2467 0.2749 0.185 0.0663 ...
##  $ SO2_Asth_1: num  12.36 19.48 21.69 14.61 5.23 ...
##  $ SO2_Hospit: num  16 25.3 33.2 22.9 9.2 ...
##  $ POP5PCT   : num  27 34.4 73.2 22.2 45 ...
##  $ PCT5PCT   : num  2.14 1.82 2.83 1.14 5.23 ...
##  $ RB50_LAREA: num  238218 197189 409639 325096 412234 ...
##  $ RB50_LABGP: num  12.8 12.7 25.7 13.5 22.3 ...
##  $ RB50_IMPP : num  11.7 14.4 20.6 12.7 19.6 ...
##  $ RB50_FORP : num  82.1 79.7 68.2 85.1 69.5 ...
##  $ RB50_VEGP : num  86.5 85.6 79.3 87.3 79.9 ...
##  $ RB15_LAREA: num  73033 59555 130752 99220 123581 ...
##  $ RB15_LABGP: num  3.93 3.84 8.19 4.11 6.68 ...
##  $ RB15_IMPP : num  10.55 5.01 17.26 4.48 10.8 ...
##  $ RB15_FORP : num  85.4 91.8 73.1 94.7 78.5 ...
##  $ RB15_VEGP : num  89.5 95 82.7 95.5 88.9 ...
##  $ TREE_POP  : num  883 517 351 662 1127 ...
##  $ TREE_PCT  : num  60 63.1 56.9 53.5 52.5 ...
##  $ GREEN_POP : num  1000 601 433 809 1392 ...
##  $ GREEN_PCT : num  67.9 73.3 70.1 65.3 64.8 ...
##  $ IMP_POP   : num  393 210 181 391 744 ...
##  $ IMP_PCT   : num  26.7 25.6 29.4 31.6 34.7 ...
##  $ DRK_PCTIMP: num  75.6 80 66.6 78.3 50.6 ...
##  $ SUM_HOUSIN: num  582 727 1109 843 477 ...
##  $ SUM_POP10 : num  1261 1891 2585 1950 861 ...
##  $ under_1   : num  24 33 79 22 16 15 17 21 13 12 ...
##  $ under_1pct: num  1.9 1.75 3.06 1.13 1.86 ...
##  $ under_13  : num  211 389 564 283 143 181 136 186 131 117 ...
##  $ under_13pc: num  16.7 20.6 21.8 14.5 16.6 ...
##  $ over_70   : num  88 104 121 282 42 32 61 43 30 23 ...
##  $ over_70pct: num  6.98 5.5 4.68 14.46 4.88 ...
##  $ Density   : num  0.679 1.219 1.62 0.808 0.466 ...
##  $ PLx2_Pop  : num  634 800 988 668 571 470 305 613 179 122 ...
##  $ PLx2_Pct  : num  56.5 42.8 52.4 41.4 62.7 ...
##  $ NonWhite  : num  715 1597 1939 1071 568 ...
##  $ NonWt_Pct : num  56.7 84.5 75 54.9 66 ...
##  $ K12_COUNT : int  1 0 2 0 0 1 0 1 1 0 ...
##  $ DAY_COUNT : int  2 2 3 1 0 0 0 0 0 1 ...
##  $ K12_LOW   : int  0 999 0 999 999 0 999 1 0 999 ...
##  $ DAY_LOW   : int  1 0 0 0 999 999 999 999 999 0 ...
##  $ KGCSTOR   : num  8562261 7523911 6982028 9931958 7459169 ...
##  $ KGCSEQ    : num  347390 305261 283276 402961 302635 ...
##  $ DOLCSTOR  : num  672138 590627 548089 779659 585545 ...
##  $ DOLCSEQ   : num  27270 23963 22237 31632 23757 ...
##  $ maxtempred: num  -1.08 -1.13 -1.01 -0.95 -0.94 -0.88 -1.24 -0.6 -1.12 -1.21 ...
##  $ maxtempr_1: num  -2.01 -2.11 -1.92 -1.77 -1.74 -1.59 -2.26 -1.09 -2.09 -2.27 ...
##  $ CORemoval : num  181 159 148 210 158 ...
##  $ NO2Removal: num  866 761 706 1004 754 ...
##  $ O3Removal : num  6113 5371 4985 7090 5325 ...
##  $ PM25Remova: num  446 392 365 520 412 ...
##  $ SO2Removal: num  321 282 313 372 279 ...
##  $ PM10Remove: num  990 870 802 1140 772 ...
##  $ PM10Value : num  7023 6171 5689 8092 5480 ...
##  $ COValue   : num  274 241 223 318 239 ...
##  $ Change    : num  3.32 3.47 3.17 3 2.89 2.67 3.75 2.19 3.45 3.7 ...
##  $ Runoff    : num  22637 19617 18897 28081 21556 ...
##  $ TSSmed    : num  1234 1069 1030 1530 1175 ...
##  $ BODmed    : num  260 226 217 323 248 ...
##  $ CODmed    : num  1012 877 845 1255 964 ...
##  $ TPmed     : num  5.86 5.08 4.89 7.27 5.58 1.43 2.31 2.54 0.89 0.93 ...
##  $ SolPmed   : num  2.33 2.02 1.95 2.89 2.22 0.57 0.92 1.01 0.35 0.37 ...
##  $ TKNmed    : num  33.3 28.8 27.8 41.3 31.7 ...
##  $ NO2_3med  : num  12.1 10.5 10.1 15 11.5 ...
##  $ Cumed     : num  0.25 0.22 0.21 0.31 0.24 0.06 0.1 0.11 0.04 0.04 ...
##  $ TSSmean   : num  1775 1538 1482 2202 1690 ...
##  $ BODmean   : num  319 277 266 396 304 ...
##  $ CODmean   : num  1195 1036 998 1483 1138 ...
##  $ TPmean    : num  7.13 6.18 5.95 8.85 6.79 1.74 2.81 3.09 1.08 1.13 ...
##  $ SolPmean  : num  2.92 2.53 2.44 3.62 2.78 0.71 1.15 1.26 0.44 0.46 ...
##  $ TKNmean   : num  39.2 33.9 32.7 48.6 37.3 ...
##  $ NO23mean  : num  14.9 12.9 12.4 18.5 14.2 ...
##  $ Cumean    : num  0.31 0.26 0.26 0.38 0.29 0.07 0.12 0.13 0.05 0.05 ...
##  $ MTCSTOR   : num  8562 7524 6982 9932 7459 ...
##  $ MTCSEQ    : num  347 305 283 403 303 ...
##  $ Pop_Not   : num  929 594 695 815 445 ...
##  $ Pop_Suff  : num  277 370 691 425 320 ...
##  $ Pop_All   : num  1206 964 1386 1240 765 ...
##  $ Pct_All   : num  95.6 51 53.6 63.6 88.8 ...
##  $ Lane_PctN : num  52.3 56.3 25.4 44.6 46.8 ...
##  $ Lane_PctY : num  47.7 43.7 74.6 55.4 53.2 ...
##  $ POPPARK   : num  345.8 183.8 466.2 717.7 72.6 ...
##  $ POPNON    : num  915 1707 2119 1232 788 ...
##  $ PCTPARK   : num  27.42 9.72 18.03 36.8 8.43 ...
##  $ PCTNON    : num  72.6 90.3 82 63.2 91.6 ...
##  $ DWDBYBGDNC: num  91.5 137 186.7 141.3 62.4 ...

# Load data  using  file.choose()
# data  <-  read.table(file.choose(), header=TRUE,  sep="\t")

# Load a csv file:
# read.csv(file.choose(), dec='.', sep=',')

Simple summary statistics

dry <- c(77, 93, 92, 68, 88, 75, 100)
dry

## [1]  77  93  92  68  88  75 100

sum(dry)

## [1] 593

length(dry)

## [1] 7

mean(dry)

## [1] 84.71429

sum(dry)/length(dry) ## Checking

## [1] 84.71429

median(dry)

## [1] 88

sd(dry)

## [1] 11.54288

var(dry)

## [1] 133.2381

sd(dry)^2

## [1] 133.2381

sum((dry-mean(dry))^2) / (length(dry)-1) ## Checking

## [1] 133.2381

min(dry)

## [1] 68

max(dry)

## [1] 100

summary(dry)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   68.00   76.00   88.00   84.71   92.50  100.00

Basic Scatterplot

x <-  c(1:20)
y <-  c(1:10,9:0)
plot(x, y)

Add some features:

plot(x,  y, xlab="X label", ylab="Y label", main="my first graph in R",  pch=15,  col="red")

And more features:

plot(x, y,  main = "My Chart Title", type = "b", xlim = c(0,20), 
     ylim = c(0,11), xlab ="X", ylab = "Y", pch=16,   col = "red",       
     bty = "n", xaxs="i", yaxs = "i",   las=1) 
# Add text:
text(6,7, "My text", col = "green" )

# Distinguish between two separate  groups 
plot(x,  y, xlab="X", ylab="Y", main="Y vs  X", 
     pch=ifelse(x%%2==1, 5,  19), 
     col=ifelse(x%%2==1, "red", "blue"))

## Example of 0uter Margin annotation 
my_date <- format(Sys.time(), "%m/%d/%y") 
my_text <-"manuscript 1"
mtext(my_text, side = 1, line = .75, cex=0.7, outer = T, adj = 0) 
mtext(my_date, side = 1, line =.75, cex = 0.7, outer = T, adj = 1)

Basic Line Graphs

plot(x, y, type="l", lty=2, lwd=2,  col="blue")

# The  lines argument can be,  (1) two separate  vectors
# where one vector is the  x-coordinates and the  other   is the
# y-coordinates (2) a two-column  matrix or  (3) a two-element  list
# with  x and y components. 
plot(x,  y, type="n") 
lines(x, y, type="b")

plot(x,  y, type="n") 
lines(cbind(x,y),  type="l", lty=1, col="blue")

# If there   is only  one component then  the  argument is plotted against
# its index  (same with  plot and points)
plot(sort(x),  type="n")
lines(x, type="b", pch=8,  col="red")
lines(y,  type="l", lty=6, col="blue")

Example of adding lines in plot area

x <- 0:100
y <- x^2
x_pts <- c(15, 35, 35, 15, 15)
y_pts <- c(8300, 8300, 6500, 6500,8300)
plot(x,y, type = "n", main="Add Lines, Points, Arrows Examples")
points(x, y,col = "black", lty=1, type = "l")
grid(col="lightgrey", lty=1)
abline(h=2500, col="red")
abline(v=50, col = "blue")
arrows(65,2400, 85,2400, code=3,  col="orange", length = 0.1)
lines(x_pts, y_pts, lty=1, col = "green")
points(x_pts,y_pts, type = "p",  col ="red",pch=19)

More Graphical Tools with R

# now we use the trees dataset in R
data<- trees
data

##    Girth Height Volume
## 1    8.3     70   10.3
## 2    8.6     65   10.3
## 3    8.8     63   10.2
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 18  13.3     86   27.4
## 19  13.7     71   25.7
## 20  13.8     64   24.9
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0
## 31  20.6     87   77.0

The trees dataset has three fields: Girth, Height, and Volume. There are different ways to use the fields of the trees dataset: . Attach the dataset as a data frame: attach(trees) (we learned this way last week) . Use the “data=trees” syntax, . Wrap the command in the “with” command, or . Use the syntax dataset$field for each variable.

The following commands will produce the same result:

#Pay attention to the labels of the x and y axes.
plot(Height~Girth, data=trees)       # you do not need to "attach" "trees" before this command
with(trees, plot(Height~Girth))   # you do not need to "attach" "trees" before this command

plot(trees$Girth, trees$Height)

Now try this command:

plot(Height ~Girth, data=trees, xlab= "GIRTH", ylab= "HEIGHT",main=  "Scatter Plot\n Height vs. Girth of Black Cherry Trees")

# Compare the labels with those in the previous plot and pay attention to the plot title. (\n: go to the next line).

Now we use the swiss dataset. Try and observe the output:

swiss

##              Fertility Agriculture Examination Education Catholic
## Courtelary        80.2        17.0          15        12     9.96
## Delemont          83.1        45.1           6         9    84.84
## Franches-Mnt      92.5        39.7           5         5    93.40
## Moutier           85.8        36.5          12         7    33.77
## Neuveville        76.9        43.5          17        15     5.16
## Porrentruy        76.1        35.3           9         7    90.57
## Broye             83.8        70.2          16         7    92.85
## Glane             92.4        67.8          14         8    97.16
## Gruyere           82.4        53.3          12         7    97.67
## Sarine            82.9        45.2          16        13    91.38
## Veveyse           87.1        64.5          14         6    98.61
## Aigle             64.1        62.0          21        12     8.52
## Aubonne           66.9        67.5          14         7     2.27
## Avenches          68.9        60.7          19        12     4.43
## Cossonay          61.7        69.3          22         5     2.82
## Echallens         68.3        72.6          18         2    24.20
## Grandson          71.7        34.0          17         8     3.30
## Lausanne          55.7        19.4          26        28    12.11
## La Vallee         54.3        15.2          31        20     2.15
## Lavaux            65.1        73.0          19         9     2.84
## Morges            65.5        59.8          22        10     5.23
## Moudon            65.0        55.1          14         3     4.52
## Nyone             56.6        50.9          22        12    15.14
## Orbe              57.4        54.1          20         6     4.20
## Oron              72.5        71.2          12         1     2.40
## Payerne           74.2        58.1          14         8     5.23
## Paysd'enhaut      72.0        63.5           6         3     2.56
## Rolle             60.5        60.8          16        10     7.72
## Vevey             58.3        26.8          25        19    18.46
## Yverdon           65.4        49.5          15         8     6.10
## Conthey           75.5        85.9           3         2    99.71
## Entremont         69.3        84.9           7         6    99.68
## Herens            77.3        89.7           5         2   100.00
## Martigwy          70.5        78.2          12         6    98.96
## Monthey           79.4        64.9           7         3    98.22
## St Maurice        65.0        75.9           9         9    99.06
## Sierre            92.2        84.6           3         3    99.46
## Sion              79.3        63.1          13        13    96.83
## Boudry            70.4        38.4          26        12     5.62
## La Chauxdfnd      65.7         7.7          29        11    13.79
## Le Locle          72.7        16.7          22        13    11.22
## Neuchatel         64.4        17.6          35        32    16.92
## Val de Ruz        77.6        37.6          15         7     4.97
## ValdeTravers      67.6        18.7          25         7     8.65
## V. De Geneve      35.0         1.2          37        53    42.34
## Rive Droite       44.7        46.6          16        29    50.43
## Rive Gauche       42.8        27.7          22        29    58.33
##              Infant.Mortality
## Courtelary               22.2
## Delemont                 22.2
## Franches-Mnt             20.2
## Moutier                  20.3
## Neuveville               20.6
## Porrentruy               26.6
## Broye                    23.6
## Glane                    24.9
## Gruyere                  21.0
## Sarine                   24.4
## Veveyse                  24.5
## Aigle                    16.5
## Aubonne                  19.1
## Avenches                 22.7
## Cossonay                 18.7
## Echallens                21.2
## Grandson                 20.0
## Lausanne                 20.2
## La Vallee                10.8
## Lavaux                   20.0
## Morges                   18.0
## Moudon                   22.4
## Nyone                    16.7
## Orbe                     15.3
## Oron                     21.0
## Payerne                  23.8
## Paysd'enhaut             18.0
## Rolle                    16.3
## Vevey                    20.9
## Yverdon                  22.5
## Conthey                  15.1
## Entremont                19.8
## Herens                   18.3
## Martigwy                 19.4
## Monthey                  20.2
## St Maurice               17.8
## Sierre                   16.3
## Sion                     18.1
## Boudry                   20.3
## La Chauxdfnd             20.5
## Le Locle                 18.9
## Neuchatel                23.0
## Val de Ruz               20.0
## ValdeTravers             19.5
## V. De Geneve             18.0
## Rive Droite              18.2
## Rive Gauche              19.3

plot(swiss)

Now try and compare the two graphs below:

plot(~ Fertility + Education + Catholic, data = swiss)

pairs(~ Fertility + Education + Catholic, data = swiss)

Try these to see how to add a fitted curve to a scatter plot:

plot(trees$Height,trees$Girth)
 lines(lowess(trees$Girth~trees$Height))

Plot two scatter plots on the same graph:

plot(trees$Volume~trees$Height,col="green",xlab="Height",ylab="Girth/Volume")
points(trees$Girth~trees$Height,col="red")   # "points" adds a new series to an existing graph

Barplot

Now we use the VADeaths dataset (Death Rates in Virginia (1940)). Type the following functions and compare the graphs (delete each graph before typing the next function.

barplot(VADeaths, main="Death Rates in Virginia (1940)")

barplot(VADeaths,col=rainbow(5), main="Death Rates in Virginia \n(1940)")

barplot(VADeaths,col= heat.colors(5), main="Death Rates in Virginia \n(1940)")

barplot(VADeaths,beside=TRUE,col=rainbow(4))
legend("topleft",  cex=1.0,rownames(VADeaths),bty="n", fill=rainbow(4))

Delete the graph and try:

barplot(VADeaths, beside=TRUE)
legend("topleft",  cex=0.7,rownames(VADeaths),bty="y", fill=rainbow(4))
title(main = list("Death Rates in Virginia (1940)", font = 4))

Boxplot

A box plot is the standard box and whiskers plot. Usage: boxplot(x, …) x can be a vector or a formula (y ~ grp), where grp is a factor. Now we use the chickwts dataset with two fields: weight and feed.

boxplot(chickwts$weight,col="blue")

boxplot(weight ~ feed, data=chickwts, col="red")

boxplot(weight ~ feed, data=chickwts, col=rainbow(4), horizontal=TRUE)

Histogram

The function hist() computes a histogram of the given data values: hist(x, …) x is the vector of values to be in the histogram.

hist(chickwts$weight, plot=FALSE)

## $breaks
## [1] 100 150 200 250 300 350 400 450
## 
## $counts
## [1]  7 10 16 12 17  7  2
## 
## $density
## [1] 0.0019718310 0.0028169014 0.0045070423 0.0033802817 0.0047887324
## [6] 0.0019718310 0.0005633803
## 
## $mids
## [1] 125 175 225 275 325 375 425
## 
## $xname
## [1] "chickwts$weight"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

hist(chickwts$weight, plot=TRUE)

h<- hist(chickwts$weight, plot=FALSE)

# compute the frequency polygon
diffBreaks <- h$mids[2] - h$mids[1]
xx <- c( h$mids[1]-diffBreaks, h$mids, tail(h$mids,1)+diffBreaks ) 
yy <- c(0, h$density, 0) 
# draw the histogram
hist(chickwts$weight, prob = TRUE, xlim=range(xx), border="gray", col="green",xlab="weight",main="Histogram of Chicken Weight") 
# adds the frequency polygon
lines(xx, yy, lwd=2, col = "blue")
# adds the densily curve if you want
lines(density(chickwts$weight),col="red")

Pie charts

function pie(x, …) Example:

sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12) 
names<- c("Blueberry", "Cherry", "Apple", "Boston Cream", "Other", "Vanilla Cream")
pie(sales,labels=names,col=rainbow(length(names)), clockwise=TRUE)

Line Chart

We will use the Orange dataset in R

Orange

##    Tree  age circumference
## 1     1  118            30
## 2     1  484            58
## 3     1  664            87
## 4     1 1004           115
## 5     1 1231           120
## 6     1 1372           142
## 7     1 1582           145
## 8     2  118            33
## 9     2  484            69
## 10    2  664           111
## 11    2 1004           156
## 12    2 1231           172
## 13    2 1372           203
## 14    2 1582           203
## 15    3  118            30
## 16    3  484            51
## 17    3  664            75
## 18    3 1004           108
## 19    3 1231           115
## 20    3 1372           139
## 21    3 1582           140
## 22    4  118            32
## 23    4  484            62
## 24    4  664           112
## 25    4 1004           167
## 26    4 1231           179
## 27    4 1372           209
## 28    4 1582           214
## 29    5  118            30
## 30    5  484            49
## 31    5  664            81
## 32    5 1004           125
## 33    5 1231           142
## 34    5 1372           174
## 35    5 1582           177

# convert factor to numeric for convenience 
Orange$Tree <- as.numeric(Orange$Tree) 
ntrees <- max(Orange$Tree)

# get the range for the x and y axis 
xrange <- range(Orange$age) 
yrange <- range(Orange$circumference) 

# set up the plot 
plot(xrange, yrange, type="n", xlab="Age (days)",
    ylab="Circumference (mm)" ) 
colors <- rainbow(ntrees) 
linetype <- c(1:ntrees) 
plotchar <- seq(18,18+ntrees,1)

# add lines 
for (i in 1:ntrees) { 
  tree <- subset(Orange, Tree==i) 
  lines(tree$age, tree$circumference, type="b", lwd=1.5,
    lty=linetype[i], col=colors[i], pch=plotchar[i]) 
} 

# add a title and subtitle 
title("Tree Growth", "(created by Pokemon)")

# add a legend 
legend(xrange[1], yrange[2], 1:ntrees, cex=0.8, col=colors,
    pch=plotchar, lty=linetype, title="Tree")