use <- to assign a value to a variable
# Numbers
dusp_course <- 11
# Text
city_name <- "Cambridge"
# Boolean (true or false)
sanctuary_city <- TRUE
# Putting a variable on a line by itself will display the contents of that variable no matter what type it is.
dusp_course
## [1] 11
city_name
## [1] "Cambridge"
sanctuary_city
## [1] TRUE
# A traditional mathematical function, square root
x <- 27
y <- sqrt(x)
y
## [1] 5.196152
# We still have xfrom before
x
## [1] 27
# multiply by 3
y <- x * 3
y
## [1] 81
# Put the result of a calculation into the variable x
x <- 308 * 0.415
x
## [1] 127.82
# Round to the nearest whole number (0 digits from the decimal point)
y < round(x, digits=0)
## [1] TRUE
y
## [1] 81
# We have this from before but as a number
dusp_course
## [1] 11
# Change it into text, and store it into a new variable
dusp_course2 <- as.character(dusp_course)
dusp_course2
## [1] "11"
dusp_text <- "At MIT, DUSP is course: "
fulltext <- paste0(dusp_text, dusp_course2)
fulltext
## [1] "At MIT, DUSP is course: 11"
x <- c(5,3,8,7,3)
x
## [1] 5 3 8 7 3
states <- c("MA", "UT", "MD", "OH", "ND")
states
## [1] "MA" "UT" "MD" "OH" "ND"
# we have x from before
x
## [1] 5 3 8 7 3
# Multiply each item by 10
x * 10
## [1] 50 30 80 70 30
# The colun says make a list of numbers between 2 and 10
t <- 2:10
t
## [1] 2 3 4 5 6 7 8 9 10
w <- c(2,4,6,8)
w2 <- seq(2,100,by=2)
w2
## [1] 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34
## [18] 36 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68
## [35] 70 72 74 76 78 80 82 84 86 88 90 92 94 96 98 100
# Count the number of items in a vector
length(x)
## [1] 5
#calculate the average (mean) of the values
m <- mean(x)
m
## [1] 5.2
# Calculate the total
t <- sum(x)
t
## [1] 26
#calculate the percentage of the total of each value in the vector
p <- x/t
p
## [1] 0.1923077 0.1153846 0.3076923 0.2692308 0.1153846
# we have the variables x and states from before. There needs to be the same number of items in both vectors
x
## [1] 5 3 8 7 3
# assign the states as names to the items in the vector
names(x) <- states
x
## MA UT MD OH ND
## 5 3 8 7 3
# now that the values have names associates with them, they will show up on the x-axis as labels
barplot(x)
# Rivers is a built-in dataset which is a vector of numbers:
# The length (in miles) of 141 "major" rivers in North America, as compiled by the US Geological Survey.
rivers
## [1] 735 320 325 392 524 450 1459 135 465 600 330 336 280 315
## [15] 870 906 202 329 290 1000 600 505 1450 840 1243 890 350 407
## [29] 286 280 525 720 390 250 327 230 265 850 210 630 260 230
## [43] 360 730 600 306 390 420 291 710 340 217 281 352 259 250
## [57] 470 680 570 350 300 560 900 625 332 2348 1171 3710 2315 2533
## [71] 780 280 410 460 260 255 431 350 760 618 338 981 1306 500
## [85] 696 605 250 411 1054 735 233 435 490 310 460 383 375 1270
## [99] 545 445 1885 380 300 380 377 425 276 210 800 420 350 360
## [113] 538 1100 1205 314 237 610 360 540 1038 424 310 300 444 301
## [127] 268 620 215 652 900 525 246 360 529 500 720 270 430 671
## [141] 1770
# You should think of a "dataframe" as an excel spreadsheet with columns and rows
mydata <- data.frame(rivers)
mydata
## rivers
## 1 735
## 2 320
## 3 325
## 4 392
## 5 524
## 6 450
## 7 1459
## 8 135
## 9 465
## 10 600
## 11 330
## 12 336
## 13 280
## 14 315
## 15 870
## 16 906
## 17 202
## 18 329
## 19 290
## 20 1000
## 21 600
## 22 505
## 23 1450
## 24 840
## 25 1243
## 26 890
## 27 350
## 28 407
## 29 286
## 30 280
## 31 525
## 32 720
## 33 390
## 34 250
## 35 327
## 36 230
## 37 265
## 38 850
## 39 210
## 40 630
## 41 260
## 42 230
## 43 360
## 44 730
## 45 600
## 46 306
## 47 390
## 48 420
## 49 291
## 50 710
## 51 340
## 52 217
## 53 281
## 54 352
## 55 259
## 56 250
## 57 470
## 58 680
## 59 570
## 60 350
## 61 300
## 62 560
## 63 900
## 64 625
## 65 332
## 66 2348
## 67 1171
## 68 3710
## 69 2315
## 70 2533
## 71 780
## 72 280
## 73 410
## 74 460
## 75 260
## 76 255
## 77 431
## 78 350
## 79 760
## 80 618
## 81 338
## 82 981
## 83 1306
## 84 500
## 85 696
## 86 605
## 87 250
## 88 411
## 89 1054
## 90 735
## 91 233
## 92 435
## 93 490
## 94 310
## 95 460
## 96 383
## 97 375
## 98 1270
## 99 545
## 100 445
## 101 1885
## 102 380
## 103 300
## 104 380
## 105 377
## 106 425
## 107 276
## 108 210
## 109 800
## 110 420
## 111 350
## 112 360
## 113 538
## 114 1100
## 115 1205
## 116 314
## 117 237
## 118 610
## 119 360
## 120 540
## 121 1038
## 122 424
## 123 310
## 124 300
## 125 444
## 126 301
## 127 268
## 128 620
## 129 215
## 130 652
## 131 900
## 132 525
## 133 246
## 134 360
## 135 529
## 136 500
## 137 720
## 138 270
## 139 430
## 140 671
## 141 1770
nrow(mydata)
## [1] 141
# display the first six rows
head(mydata)
## rivers
## 1 735
## 2 320
## 3 325
## 4 392
## 5 524
## 6 450
# display the first ten rows
head(mydata,10)
## rivers
## 1 735
## 2 320
## 3 325
## 4 392
## 5 524
## 6 450
## 7 1459
## 8 135
## 9 465
## 10 600
# Display the last six rows
tail(mydata)
## rivers
## 136 500
## 137 720
## 138 270
## 139 430
## 140 671
## 141 1770
summary(mydata)
## rivers
## Min. : 135.0
## 1st Qu.: 310.0
## Median : 425.0
## Mean : 591.2
## 3rd Qu.: 680.0
## Max. :3710.0
# Send one column to the histogram function, let the function
#determine the number of buckets itself
hist(mydata$rivers)
# Choose the number of buckets yourself
hist(mydata$rivers, breaks=4)
# Create a new columns with kilometers instead of miles
mydata$riverskm <- mydata$rivers * 1.609
head(mydata)
## rivers riverskm
## 1 735 1182.615
## 2 320 514.880
## 3 325 522.925
## 4 392 630.728
## 5 524 843.116
## 6 450 724.050
# The water quality for each river is evluated on a scale of 1 to 100.
# Let's assign a random value for each river. We need 141 random numbers.
s <- sample(1:100,141,replace=T)
s
## [1] 8 5 68 30 40 18 34 92 37 59 64 84 81 83 2 65 19 22 13 37 7 11 8
## [24] 51 54 97 34 4 12 21 15 22 28 61 66 14 76 48 79 80 51 87 84 18 86 42
## [47] 5 12 39 7 38 83 81 55 7 85 17 46 85 31 82 97 50 41 54 74 36 88 8
## [70] 80 57 24 4 12 74 23 71 69 97 85 69 39 66 94 79 56 73 1 89 95 99 2
## [93] 96 12 27 81 66 62 44 84 64 28 87 78 28 33 43 23 9 28 76 79 28 91 76
## [116] 40 33 25 33 15 9 88 5 83 9 9 51 41 23 42 82 24 75 17 2 9 48 35
## [139] 24 93 3
# Make a new column in the dataframe with water quality
mydata$quality <- s
head(mydata)
## rivers riverskm quality
## 1 735 1182.615 8
## 2 320 514.880 5
## 3 325 522.925 68
## 4 392 630.728 30
## 5 524 843.116 40
## 6 450 724.050 18
# We want to investigate whether there is a correlation between the lenght of a river and its water quality.
# To do that, we want to make a scatter plot of the river lengths and the water quality.
plot(mydata$rivers,mydata$quality)