This is an introduction to R. It describes basic operations of data input/output, making graphs and simple statistics.
Some of R packages (i.e., libraries) are part of the basic installation. Others can be downloaded from CRAN which currently hosts over 1000 packages for various purposes. If you want to list all installed package, type:
A package is loaded into R using the library command, so to load the “diagram” package you should enter:
To list of functions in the diagram package with brief descriptions, enter:
To view the vignette, type:
To look at the documentation for the function plotmat(), type:
To view defaults, type:
To know the default of the number of digits, type:
If you want to change the maximum number of digits printed from 7 (default) to 10, type:
sqrt(16)+6^3
## [1] 220
a=12^2
a
## [1] 144
# Examples of creating vectors:
v1 <- c(2.5, 4, 7.3, 0.1)
v2 <- c("A", "B", "C", "D")
v3 <- -3:3
v4<- seq(0, 2, by=0.3)
v5<- seq(0, 2, len=6)
v6<- rep(1:5, each=2)
v7<- rep(1:5, times=2)
v1
## [1] 2.5 4.0 7.3 0.1
v2
## [1] "A" "B" "C" "D"
v3
## [1] -3 -2 -1 0 1 2 3
v4
## [1] 0.0 0.3 0.6 0.9 1.2 1.5 1.8
v5
## [1] 0.0 0.4 0.8 1.2 1.6 2.0
v6
## [1] 1 1 2 2 3 3 4 4 5 5
v7
## [1] 1 2 3 4 5 1 2 3 4 5
x <- c(4, 9, 2, 12, 11, 3)
x[4]
## [1] 12
x[1:3]
## [1] 4 9 2
x[c(2,5,6)]
## [1] 9 11 3
x[-3]
## [1] 4 9 12 11 3
x[-c(4,5)]
## [1] 4 9 2 3
x[x>4]
## [1] 9 12 11
x
## [1] 4 9 2 12 11 3
x <- c(2, 4, 6, 0)
y <- x^2 + 1
y
## [1] 5 17 37 1
x*y
## [1] 10 68 222 0
x <- matrix(c(1,2,3,4,5,6), nrow=2, ncol=3, byrow=TRUE, dimnames=list(rows=c("row1", "row2"), cols=c("col1", "col2", "col3")))
x
## cols
## rows col1 col2 col3
## row1 1 2 3
## row2 4 5 6
z <- list(c(1,2,3), "Alisa", x)
z
## [[1]]
## [1] 1 2 3
##
## [[2]]
## [1] "Alisa"
##
## [[3]]
## cols
## rows col1 col2 col3
## row1 1 2 3
## row2 4 5 6
z <- list(num=c(1,2,3), name="Alisa", account=x)
z
## $num
## [1] 1 2 3
##
## $name
## [1] "Alisa"
##
## $account
## cols
## rows col1 col2 col3
## row1 1 2 3
## row2 4 5 6
data()
data <- data.frame(chickwts)
data
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
## 6 168 horsebean
## 7 108 horsebean
## 8 124 horsebean
## 9 143 horsebean
## 10 140 horsebean
## 11 309 linseed
## 12 229 linseed
## 13 181 linseed
## 14 141 linseed
## 15 260 linseed
## 16 203 linseed
## 17 148 linseed
## 18 169 linseed
## 19 213 linseed
## 20 257 linseed
## 21 244 linseed
## 22 271 linseed
## 23 243 soybean
## 24 230 soybean
## 25 248 soybean
## 26 327 soybean
## 27 329 soybean
## 28 250 soybean
## 29 193 soybean
## 30 271 soybean
## 31 316 soybean
## 32 267 soybean
## 33 199 soybean
## 34 171 soybean
## 35 158 soybean
## 36 248 soybean
## 37 423 sunflower
## 38 340 sunflower
## 39 392 sunflower
## 40 339 sunflower
## 41 341 sunflower
## 42 226 sunflower
## 43 320 sunflower
## 44 295 sunflower
## 45 334 sunflower
## 46 322 sunflower
## 47 297 sunflower
## 48 318 sunflower
## 49 325 meatmeal
## 50 257 meatmeal
## 51 303 meatmeal
## 52 315 meatmeal
## 53 380 meatmeal
## 54 153 meatmeal
## 55 263 meatmeal
## 56 242 meatmeal
## 57 206 meatmeal
## 58 344 meatmeal
## 59 258 meatmeal
## 60 368 casein
## 61 390 casein
## 62 379 casein
## 63 260 casein
## 64 404 casein
## 65 318 casein
## 66 352 casein
## 67 359 casein
## 68 216 casein
## 69 222 casein
## 70 283 casein
## 71 332 casein
head(data) # Print the first few rows
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
## 6 168 horsebean
tail(data) # Print the last few rowr
## weight feed
## 66 352 casein
## 67 359 casein
## 68 216 casein
## 69 222 casein
## 70 283 casein
## 71 332 casein
names(data) # Column names colnames(data); rownames(data) # Column and row names
## [1] "weight" "feed"
dim(data) # Dimension of the dataframe
## [1] 71 2
data[ ,c("weight", "feed")] # "weight" and "feed" columns
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
## 6 168 horsebean
## 7 108 horsebean
## 8 124 horsebean
## 9 143 horsebean
## 10 140 horsebean
## 11 309 linseed
## 12 229 linseed
## 13 181 linseed
## 14 141 linseed
## 15 260 linseed
## 16 203 linseed
## 17 148 linseed
## 18 169 linseed
## 19 213 linseed
## 20 257 linseed
## 21 244 linseed
## 22 271 linseed
## 23 243 soybean
## 24 230 soybean
## 25 248 soybean
## 26 327 soybean
## 27 329 soybean
## 28 250 soybean
## 29 193 soybean
## 30 271 soybean
## 31 316 soybean
## 32 267 soybean
## 33 199 soybean
## 34 171 soybean
## 35 158 soybean
## 36 248 soybean
## 37 423 sunflower
## 38 340 sunflower
## 39 392 sunflower
## 40 339 sunflower
## 41 341 sunflower
## 42 226 sunflower
## 43 320 sunflower
## 44 295 sunflower
## 45 334 sunflower
## 46 322 sunflower
## 47 297 sunflower
## 48 318 sunflower
## 49 325 meatmeal
## 50 257 meatmeal
## 51 303 meatmeal
## 52 315 meatmeal
## 53 380 meatmeal
## 54 153 meatmeal
## 55 263 meatmeal
## 56 242 meatmeal
## 57 206 meatmeal
## 58 344 meatmeal
## 59 258 meatmeal
## 60 368 casein
## 61 390 casein
## 62 379 casein
## 63 260 casein
## 64 404 casein
## 65 318 casein
## 66 352 casein
## 67 359 casein
## 68 216 casein
## 69 222 casein
## 70 283 casein
## 71 332 casein
data$weight # Get the column "weight"
## [1] 179 160 136 227 217 168 108 124 143 140 309 229 181 141 260 203 148
## [18] 169 213 257 244 271 243 230 248 327 329 250 193 271 316 267 199 171
## [35] 158 248 423 340 392 339 341 226 320 295 334 322 297 318 325 257 303
## [52] 315 380 153 263 242 206 344 258 368 390 379 260 404 318 352 359 216
## [69] 222 283 332
data[1:5, ] # Get the first five rows
## weight feed
## 1 179 horsebean
## 2 160 horsebean
## 3 136 horsebean
## 4 227 horsebean
## 5 217 horsebean
setwd("C:/Liem/GEOG515/Spring15/Labs/")
getwd()
## [1] "C:/Liem/GEOG515/Spring15/Labs"
data <- read.table("DuhramNCdata.txt", header=TRUE, sep="\t")
str(data) # Gives the structure of data
## 'data.frame': 193 obs. of 97 variables:
## $ OBJECTID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Block_Grou: num 3.71e+11 3.71e+11 3.71e+11 3.71e+11 3.71e+11 ...
## $ NO2_Hospit: num 105.6 166.5 215.2 152.6 61.7 ...
## $ NO2_Asthma: num 2.166 3.414 3.424 2.429 0.824 ...
## $ NO2_Asth_1: num 181.6 286.3 286.3 203.1 68.7 ...
## $ O3_Acute_R: num 2.5 3.94 4.86 3.45 1.72 ...
## $ O3_Mortali: num 8737 13772 16983 12048 5218 ...
## $ O3_School_: num 1.151 1.815 1.793 1.272 0.433 ...
## $ O3_School1: num 113.1 178.2 176 124.9 42.5 ...
## $ PM25_Acute: num 1.337 2.107 2.619 1.858 0.977 ...
## $ PM25_Morta: num 27121 42753 54756 38844 19114 ...
## $ PM25_Work_: num 0.231 0.364 0.452 0.321 0.169 ...
## $ PM25_Work1: num 38.8 61.1 76 53.9 28.3 ...
## $ SO2_Asthma: num 0.1565 0.2467 0.2749 0.185 0.0663 ...
## $ SO2_Asth_1: num 12.36 19.48 21.69 14.61 5.23 ...
## $ SO2_Hospit: num 16 25.3 33.2 22.9 9.2 ...
## $ POP5PCT : num 27 34.4 73.2 22.2 45 ...
## $ PCT5PCT : num 2.14 1.82 2.83 1.14 5.23 ...
## $ RB50_LAREA: num 238218 197189 409639 325096 412234 ...
## $ RB50_LABGP: num 12.8 12.7 25.7 13.5 22.3 ...
## $ RB50_IMPP : num 11.7 14.4 20.6 12.7 19.6 ...
## $ RB50_FORP : num 82.1 79.7 68.2 85.1 69.5 ...
## $ RB50_VEGP : num 86.5 85.6 79.3 87.3 79.9 ...
## $ RB15_LAREA: num 73033 59555 130752 99220 123581 ...
## $ RB15_LABGP: num 3.93 3.84 8.19 4.11 6.68 ...
## $ RB15_IMPP : num 10.55 5.01 17.26 4.48 10.8 ...
## $ RB15_FORP : num 85.4 91.8 73.1 94.7 78.5 ...
## $ RB15_VEGP : num 89.5 95 82.7 95.5 88.9 ...
## $ TREE_POP : num 883 517 351 662 1127 ...
## $ TREE_PCT : num 60 63.1 56.9 53.5 52.5 ...
## $ GREEN_POP : num 1000 601 433 809 1392 ...
## $ GREEN_PCT : num 67.9 73.3 70.1 65.3 64.8 ...
## $ IMP_POP : num 393 210 181 391 744 ...
## $ IMP_PCT : num 26.7 25.6 29.4 31.6 34.7 ...
## $ DRK_PCTIMP: num 75.6 80 66.6 78.3 50.6 ...
## $ SUM_HOUSIN: num 582 727 1109 843 477 ...
## $ SUM_POP10 : num 1261 1891 2585 1950 861 ...
## $ under_1 : num 24 33 79 22 16 15 17 21 13 12 ...
## $ under_1pct: num 1.9 1.75 3.06 1.13 1.86 ...
## $ under_13 : num 211 389 564 283 143 181 136 186 131 117 ...
## $ under_13pc: num 16.7 20.6 21.8 14.5 16.6 ...
## $ over_70 : num 88 104 121 282 42 32 61 43 30 23 ...
## $ over_70pct: num 6.98 5.5 4.68 14.46 4.88 ...
## $ Density : num 0.679 1.219 1.62 0.808 0.466 ...
## $ PLx2_Pop : num 634 800 988 668 571 470 305 613 179 122 ...
## $ PLx2_Pct : num 56.5 42.8 52.4 41.4 62.7 ...
## $ NonWhite : num 715 1597 1939 1071 568 ...
## $ NonWt_Pct : num 56.7 84.5 75 54.9 66 ...
## $ K12_COUNT : int 1 0 2 0 0 1 0 1 1 0 ...
## $ DAY_COUNT : int 2 2 3 1 0 0 0 0 0 1 ...
## $ K12_LOW : int 0 999 0 999 999 0 999 1 0 999 ...
## $ DAY_LOW : int 1 0 0 0 999 999 999 999 999 0 ...
## $ KGCSTOR : num 8562261 7523911 6982028 9931958 7459169 ...
## $ KGCSEQ : num 347390 305261 283276 402961 302635 ...
## $ DOLCSTOR : num 672138 590627 548089 779659 585545 ...
## $ DOLCSEQ : num 27270 23963 22237 31632 23757 ...
## $ maxtempred: num -1.08 -1.13 -1.01 -0.95 -0.94 -0.88 -1.24 -0.6 -1.12 -1.21 ...
## $ maxtempr_1: num -2.01 -2.11 -1.92 -1.77 -1.74 -1.59 -2.26 -1.09 -2.09 -2.27 ...
## $ CORemoval : num 181 159 148 210 158 ...
## $ NO2Removal: num 866 761 706 1004 754 ...
## $ O3Removal : num 6113 5371 4985 7090 5325 ...
## $ PM25Remova: num 446 392 365 520 412 ...
## $ SO2Removal: num 321 282 313 372 279 ...
## $ PM10Remove: num 990 870 802 1140 772 ...
## $ PM10Value : num 7023 6171 5689 8092 5480 ...
## $ COValue : num 274 241 223 318 239 ...
## $ Change : num 3.32 3.47 3.17 3 2.89 2.67 3.75 2.19 3.45 3.7 ...
## $ Runoff : num 22637 19617 18897 28081 21556 ...
## $ TSSmed : num 1234 1069 1030 1530 1175 ...
## $ BODmed : num 260 226 217 323 248 ...
## $ CODmed : num 1012 877 845 1255 964 ...
## $ TPmed : num 5.86 5.08 4.89 7.27 5.58 1.43 2.31 2.54 0.89 0.93 ...
## $ SolPmed : num 2.33 2.02 1.95 2.89 2.22 0.57 0.92 1.01 0.35 0.37 ...
## $ TKNmed : num 33.3 28.8 27.8 41.3 31.7 ...
## $ NO2_3med : num 12.1 10.5 10.1 15 11.5 ...
## $ Cumed : num 0.25 0.22 0.21 0.31 0.24 0.06 0.1 0.11 0.04 0.04 ...
## $ TSSmean : num 1775 1538 1482 2202 1690 ...
## $ BODmean : num 319 277 266 396 304 ...
## $ CODmean : num 1195 1036 998 1483 1138 ...
## $ TPmean : num 7.13 6.18 5.95 8.85 6.79 1.74 2.81 3.09 1.08 1.13 ...
## $ SolPmean : num 2.92 2.53 2.44 3.62 2.78 0.71 1.15 1.26 0.44 0.46 ...
## $ TKNmean : num 39.2 33.9 32.7 48.6 37.3 ...
## $ NO23mean : num 14.9 12.9 12.4 18.5 14.2 ...
## $ Cumean : num 0.31 0.26 0.26 0.38 0.29 0.07 0.12 0.13 0.05 0.05 ...
## $ MTCSTOR : num 8562 7524 6982 9932 7459 ...
## $ MTCSEQ : num 347 305 283 403 303 ...
## $ Pop_Not : num 929 594 695 815 445 ...
## $ Pop_Suff : num 277 370 691 425 320 ...
## $ Pop_All : num 1206 964 1386 1240 765 ...
## $ Pct_All : num 95.6 51 53.6 63.6 88.8 ...
## $ Lane_PctN : num 52.3 56.3 25.4 44.6 46.8 ...
## $ Lane_PctY : num 47.7 43.7 74.6 55.4 53.2 ...
## $ POPPARK : num 345.8 183.8 466.2 717.7 72.6 ...
## $ POPNON : num 915 1707 2119 1232 788 ...
## $ PCTPARK : num 27.42 9.72 18.03 36.8 8.43 ...
## $ PCTNON : num 72.6 90.3 82 63.2 91.6 ...
## $ DWDBYBGDNC: num 91.5 137 186.7 141.3 62.4 ...
# Load data using file.choose()
# data <- read.table(file.choose(), header=TRUE, sep="\t")
# Load a csv file:
# read.csv(file.choose(), dec='.', sep=',')
dry <- c(77, 93, 92, 68, 88, 75, 100)
dry
## [1] 77 93 92 68 88 75 100
sum(dry)
## [1] 593
length(dry)
## [1] 7
mean(dry)
## [1] 84.71429
sum(dry)/length(dry) ## Checking
## [1] 84.71429
median(dry)
## [1] 88
sd(dry)
## [1] 11.54288
var(dry)
## [1] 133.2381
sd(dry)^2
## [1] 133.2381
sum((dry-mean(dry))^2) / (length(dry)-1) ## Checking
## [1] 133.2381
min(dry)
## [1] 68
max(dry)
## [1] 100
summary(dry)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 68.00 76.00 88.00 84.71 92.50 100.00
x <- c(1:20)
y <- c(1:10,9:0)
plot(x, y)
plot(x, y, xlab="X label", ylab="Y label", main="my first graph in R", pch=15, col="red")
plot(x, y, main = "My Chart Title", type = "b", xlim = c(0,20),
ylim = c(0,11), xlab ="X", ylab = "Y", pch=16, col = "red",
bty = "n", xaxs="i", yaxs = "i", las=1)
# Add text:
text(6,7, "My text", col = "green" )
# Distinguish between two separate groups
plot(x, y, xlab="X", ylab="Y", main="Y vs X",
pch=ifelse(x%%2==1, 5, 19),
col=ifelse(x%%2==1, "red", "blue"))
## Example of 0uter Margin annotation
my_date <- format(Sys.time(), "%m/%d/%y")
my_text <-"manuscript 1"
mtext(my_text, side = 1, line = .75, cex=0.7, outer = T, adj = 0)
mtext(my_date, side = 1, line =.75, cex = 0.7, outer = T, adj = 1)
plot(x, y, type="l", lty=2, lwd=2, col="blue")
# The lines argument can be, (1) two separate vectors
# where one vector is the x-coordinates and the other is the
# y-coordinates (2) a two-column matrix or (3) a two-element list
# with x and y components.
plot(x, y, type="n")
lines(x, y, type="b")
plot(x, y, type="n")
lines(cbind(x,y), type="l", lty=1, col="blue")
# If there is only one component then the argument is plotted against
# its index (same with plot and points)
plot(sort(x), type="n")
lines(x, type="b", pch=8, col="red")
lines(y, type="l", lty=6, col="blue")
x <- 0:100
y <- x^2
x_pts <- c(15, 35, 35, 15, 15)
y_pts <- c(8300, 8300, 6500, 6500,8300)
plot(x,y, type = "n", main="Add Lines, Points, Arrows Examples")
points(x, y,col = "black", lty=1, type = "l")
grid(col="lightgrey", lty=1)
abline(h=2500, col="red")
abline(v=50, col = "blue")
arrows(65,2400, 85,2400, code=3, col="orange", length = 0.1)
lines(x_pts, y_pts, lty=1, col = "green")
points(x_pts,y_pts, type = "p", col ="red",pch=19)
# now we use the trees dataset in R
data<- trees
data
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 18 13.3 86 27.4
## 19 13.7 71 25.7
## 20 13.8 64 24.9
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
## 31 20.6 87 77.0
The trees dataset has three fields: Girth, Height, and Volume. There are different ways to use the fields of the trees dataset: . Attach the dataset as a data frame: attach(trees) (we learned this way last week) . Use the “data=trees” syntax, . Wrap the command in the “with” command, or . Use the syntax dataset$field for each variable.
The following commands will produce the same result:
#Pay attention to the labels of the x and y axes.
plot(Height~Girth, data=trees) # you do not need to "attach" "trees" before this command
with(trees, plot(Height~Girth)) # you do not need to "attach" "trees" before this command
plot(trees$Girth, trees$Height)
Now try this command:
plot(Height ~Girth, data=trees, xlab= "GIRTH", ylab= "HEIGHT",main= "Scatter Plot\n Height vs. Girth of Black Cherry Trees")
# Compare the labels with those in the previous plot and pay attention to the plot title. (\n: go to the next line).
Now we use the swiss dataset. Try and observe the output:
swiss
## Fertility Agriculture Examination Education Catholic
## Courtelary 80.2 17.0 15 12 9.96
## Delemont 83.1 45.1 6 9 84.84
## Franches-Mnt 92.5 39.7 5 5 93.40
## Moutier 85.8 36.5 12 7 33.77
## Neuveville 76.9 43.5 17 15 5.16
## Porrentruy 76.1 35.3 9 7 90.57
## Broye 83.8 70.2 16 7 92.85
## Glane 92.4 67.8 14 8 97.16
## Gruyere 82.4 53.3 12 7 97.67
## Sarine 82.9 45.2 16 13 91.38
## Veveyse 87.1 64.5 14 6 98.61
## Aigle 64.1 62.0 21 12 8.52
## Aubonne 66.9 67.5 14 7 2.27
## Avenches 68.9 60.7 19 12 4.43
## Cossonay 61.7 69.3 22 5 2.82
## Echallens 68.3 72.6 18 2 24.20
## Grandson 71.7 34.0 17 8 3.30
## Lausanne 55.7 19.4 26 28 12.11
## La Vallee 54.3 15.2 31 20 2.15
## Lavaux 65.1 73.0 19 9 2.84
## Morges 65.5 59.8 22 10 5.23
## Moudon 65.0 55.1 14 3 4.52
## Nyone 56.6 50.9 22 12 15.14
## Orbe 57.4 54.1 20 6 4.20
## Oron 72.5 71.2 12 1 2.40
## Payerne 74.2 58.1 14 8 5.23
## Paysd'enhaut 72.0 63.5 6 3 2.56
## Rolle 60.5 60.8 16 10 7.72
## Vevey 58.3 26.8 25 19 18.46
## Yverdon 65.4 49.5 15 8 6.10
## Conthey 75.5 85.9 3 2 99.71
## Entremont 69.3 84.9 7 6 99.68
## Herens 77.3 89.7 5 2 100.00
## Martigwy 70.5 78.2 12 6 98.96
## Monthey 79.4 64.9 7 3 98.22
## St Maurice 65.0 75.9 9 9 99.06
## Sierre 92.2 84.6 3 3 99.46
## Sion 79.3 63.1 13 13 96.83
## Boudry 70.4 38.4 26 12 5.62
## La Chauxdfnd 65.7 7.7 29 11 13.79
## Le Locle 72.7 16.7 22 13 11.22
## Neuchatel 64.4 17.6 35 32 16.92
## Val de Ruz 77.6 37.6 15 7 4.97
## ValdeTravers 67.6 18.7 25 7 8.65
## V. De Geneve 35.0 1.2 37 53 42.34
## Rive Droite 44.7 46.6 16 29 50.43
## Rive Gauche 42.8 27.7 22 29 58.33
## Infant.Mortality
## Courtelary 22.2
## Delemont 22.2
## Franches-Mnt 20.2
## Moutier 20.3
## Neuveville 20.6
## Porrentruy 26.6
## Broye 23.6
## Glane 24.9
## Gruyere 21.0
## Sarine 24.4
## Veveyse 24.5
## Aigle 16.5
## Aubonne 19.1
## Avenches 22.7
## Cossonay 18.7
## Echallens 21.2
## Grandson 20.0
## Lausanne 20.2
## La Vallee 10.8
## Lavaux 20.0
## Morges 18.0
## Moudon 22.4
## Nyone 16.7
## Orbe 15.3
## Oron 21.0
## Payerne 23.8
## Paysd'enhaut 18.0
## Rolle 16.3
## Vevey 20.9
## Yverdon 22.5
## Conthey 15.1
## Entremont 19.8
## Herens 18.3
## Martigwy 19.4
## Monthey 20.2
## St Maurice 17.8
## Sierre 16.3
## Sion 18.1
## Boudry 20.3
## La Chauxdfnd 20.5
## Le Locle 18.9
## Neuchatel 23.0
## Val de Ruz 20.0
## ValdeTravers 19.5
## V. De Geneve 18.0
## Rive Droite 18.2
## Rive Gauche 19.3
plot(swiss)
Now try and compare the two graphs below:
plot(~ Fertility + Education + Catholic, data = swiss)
pairs(~ Fertility + Education + Catholic, data = swiss)
Try these to see how to add a fitted curve to a scatter plot:
plot(trees$Height,trees$Girth)
lines(lowess(trees$Girth~trees$Height))
Plot two scatter plots on the same graph:
plot(trees$Volume~trees$Height,col="green",xlab="Height",ylab="Girth/Volume")
points(trees$Girth~trees$Height,col="red") # "points" adds a new series to an existing graph
Now we use the VADeaths dataset (Death Rates in Virginia (1940)). Type the following functions and compare the graphs (delete each graph before typing the next function.
barplot(VADeaths, main="Death Rates in Virginia (1940)")
barplot(VADeaths,col=rainbow(5), main="Death Rates in Virginia \n(1940)")
barplot(VADeaths,col= heat.colors(5), main="Death Rates in Virginia \n(1940)")
barplot(VADeaths,beside=TRUE,col=rainbow(4))
legend("topleft", cex=1.0,rownames(VADeaths),bty="n", fill=rainbow(4))
Delete the graph and try:
barplot(VADeaths, beside=TRUE)
legend("topleft", cex=0.7,rownames(VADeaths),bty="y", fill=rainbow(4))
title(main = list("Death Rates in Virginia (1940)", font = 4))
A box plot is the standard box and whiskers plot. Usage: boxplot(x, …) x can be a vector or a formula (y ~ grp), where grp is a factor. Now we use the chickwts dataset with two fields: weight and feed.
boxplot(chickwts$weight,col="blue")
boxplot(weight ~ feed, data=chickwts, col="red")
boxplot(weight ~ feed, data=chickwts, col=rainbow(4), horizontal=TRUE)
The function hist() computes a histogram of the given data values: hist(x, …) x is the vector of values to be in the histogram.
hist(chickwts$weight, plot=FALSE)
## $breaks
## [1] 100 150 200 250 300 350 400 450
##
## $counts
## [1] 7 10 16 12 17 7 2
##
## $density
## [1] 0.0019718310 0.0028169014 0.0045070423 0.0033802817 0.0047887324
## [6] 0.0019718310 0.0005633803
##
## $mids
## [1] 125 175 225 275 325 375 425
##
## $xname
## [1] "chickwts$weight"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hist(chickwts$weight, plot=TRUE)
h<- hist(chickwts$weight, plot=FALSE)
# compute the frequency polygon
diffBreaks <- h$mids[2] - h$mids[1]
xx <- c( h$mids[1]-diffBreaks, h$mids, tail(h$mids,1)+diffBreaks )
yy <- c(0, h$density, 0)
# draw the histogram
hist(chickwts$weight, prob = TRUE, xlim=range(xx), border="gray", col="green",xlab="weight",main="Histogram of Chicken Weight")
# adds the frequency polygon
lines(xx, yy, lwd=2, col = "blue")
# adds the densily curve if you want
lines(density(chickwts$weight),col="red")
function pie(x, …) Example:
sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
names<- c("Blueberry", "Cherry", "Apple", "Boston Cream", "Other", "Vanilla Cream")
pie(sales,labels=names,col=rainbow(length(names)), clockwise=TRUE)
We will use the Orange dataset in R
Orange
## Tree age circumference
## 1 1 118 30
## 2 1 484 58
## 3 1 664 87
## 4 1 1004 115
## 5 1 1231 120
## 6 1 1372 142
## 7 1 1582 145
## 8 2 118 33
## 9 2 484 69
## 10 2 664 111
## 11 2 1004 156
## 12 2 1231 172
## 13 2 1372 203
## 14 2 1582 203
## 15 3 118 30
## 16 3 484 51
## 17 3 664 75
## 18 3 1004 108
## 19 3 1231 115
## 20 3 1372 139
## 21 3 1582 140
## 22 4 118 32
## 23 4 484 62
## 24 4 664 112
## 25 4 1004 167
## 26 4 1231 179
## 27 4 1372 209
## 28 4 1582 214
## 29 5 118 30
## 30 5 484 49
## 31 5 664 81
## 32 5 1004 125
## 33 5 1231 142
## 34 5 1372 174
## 35 5 1582 177
# convert factor to numeric for convenience
Orange$Tree <- as.numeric(Orange$Tree)
ntrees <- max(Orange$Tree)
# get the range for the x and y axis
xrange <- range(Orange$age)
yrange <- range(Orange$circumference)
# set up the plot
plot(xrange, yrange, type="n", xlab="Age (days)",
ylab="Circumference (mm)" )
colors <- rainbow(ntrees)
linetype <- c(1:ntrees)
plotchar <- seq(18,18+ntrees,1)
# add lines
for (i in 1:ntrees) {
tree <- subset(Orange, Tree==i)
lines(tree$age, tree$circumference, type="b", lwd=1.5,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
# add a title and subtitle
title("Tree Growth", "(created by Pokemon)")
# add a legend
legend(xrange[1], yrange[2], 1:ntrees, cex=0.8, col=colors,
pch=plotchar, lty=linetype, title="Tree")