# Modifiying basic properties of the plotggplot(crickets, aes(x = temp, y = rate)) +geom_point(color ="red",size =2,alpha = .3,shape ="square") +labs(x ="Temperature",y ="Chirp rate",title ="Cricket chirps",caption ="Source: McDonald (2009)")
# Learn more about the options for the geom_abline()# with ?geom_point# Adding another layerggplot(crickets, aes(x = temp, y = rate)) +geom_point() +geom_smooth(method ="lm",se =FALSE) +labs(x ="Temperature",y ="Chirp rate",title ="Cricket chirps",caption ="Source: McDonald (2009)")
#Include breaks(1+2+3)+ (4+5+6)+##notice the indentation after the first line (7+8+9) + (10+11+12) + (13+14+15) + (16+17+18)
[1] 171
#This is a comment#Comments don't get evaluated as code by r##Problem #11+2+3# sum of 1,2,and3
[1] 6
##Problem #21-2-3#difference of 1,2,then 3
[1] -4
## Problem #3"hello"
[1] "hello"
##Problem #4#4+5+7##Problem #5#hello
#creating an object named "student.names" that contains 3 namesstudent.names <-c("matt", "remi", "wendy", "craig")#creating an object named "fav.color"fav.color <-c ("red", "purple", "teal", "yellow")#creating an object named "age"age <-c(29,2,24,15)#height in centimetersheight <-c (117.8, 38.1, 170.18, 30.48)#human or cat?species <-c("human", "cat", "human", "cat")data <-tibble(student.names, fav.color, age, height, species)
#calculating the mean of numbers 1 through 5(1+2+3+4+5)/5
[1] 3
#method 1mean(1:5)
[1] 3
#the colon indicates 'through' as in 1 through 5#method2mean(1,2,3,4,5)
[1] 1
#typing all the numbers manually#method 3Numbers <-c(1,2,3,4,5)mean(Numbers)
#there are two major camps pf coding syntax: tidyverse and base R#a data set is a group of related variables as variable represents a column #and a row represents a unique observation#baseR dataeets are called dat frames or df#tidyverse datasets are called tibbles or tbl -have all properties of data #frames and more#Pipes are a shortcut tool (Ctrl + Shift + M) that tidyverse use for more #efficient coding. Hitting Entre after a pipe will autoindent. Pipe is derived#from package magrittr but tidyverse loads this#there are many sturctural types of data eg numerical, letters,#boolean - (true/false), categorical. We can also have datasets#VECTOR a data strcuture which contains a single type of values eg all letter#characters. If you combine muliple vectors together you get a dataset. Each#column within a dataset is a vector eg name, hair colour,age, human t/false##defining 3 objects all of which are vectors#a vector containing namesNames <-c("Sam", "Tina", "Alex")#a vector conating character values for "Hair Color"Hair <-c ("brown", "black","blonde")#a vector containing numeric values for ageAge <-c(24,41,2)#a vector containg true/flase to denote humanHuman <-c (TRUE, FALSE, TRUE)##executing three lines of code to view these objects defintions in consoleNames
[1] "Sam" "Tina" "Alex"
Hair
[1] "brown" "black" "blonde"
Age
[1] 24 41 2
Human
[1] TRUE FALSE TRUE
##defining an object names my dataset#this dataset combines the four vectors previously definedmydataset <-tibble(Names, Hair, Age, Human)##viewing the dataset in the consolemydataset
# A tibble: 3 × 4
Names Hair Age Human
<chr> <chr> <dbl> <lgl>
1 Sam brown 24 TRUE
2 Tina black 41 FALSE
3 Alex blonde 2 TRUE
#atomn vector types Character (a string eg letters , numbers),numeric, #integer, logical, comlplex and defaluts to the least specialist
#ordered factors are cut, color and clarity. 6 #variables are numeric carat, depth, table, x,y,z#1 variable has an interger structure price#putting a ?infront of built in data sets brings help?diamonds
Chapter 6 Wendy Huynh
Basic Data Management
#mutate () can be used to create variables based on #existing variablesdiamonds %>%mutate(JustOne =1,Values ="something",Simple =TRUE)
# A tibble: 53,940 × 13
carat cut color clarity depth table price x y z JustOne Values
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <chr>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 1 somet…
2 0.21 Premi… E SI1 59.8 61 326 3.89 3.84 2.31 1 somet…
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 1 somet…
4 0.29 Premi… I VS2 62.4 58 334 4.2 4.23 2.63 1 somet…
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 1 somet…
6 0.24 Very … J VVS2 62.8 57 336 3.94 3.96 2.48 1 somet…
7 0.24 Very … I VVS1 62.3 57 336 3.95 3.98 2.47 1 somet…
8 0.26 Very … H SI1 61.9 55 337 4.07 4.11 2.53 1 somet…
9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 1 somet…
10 0.23 Very … H VS1 59.4 61 338 4 4.05 2.39 1 somet…
# ℹ 53,930 more rows
# ℹ 1 more variable: Simple <lgl>
diamonds %>%mutate(price200= price-200)
# A tibble: 53,940 × 11
carat cut color clarity depth table price x y z price200
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 126
2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 126
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 127
4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 134
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 135
6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 136
7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 136
8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 137
9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 137
10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 138
# ℹ 53,930 more rows
#if saving this as a data set give it a name like #diamonds.new so dont contaminate original data
Nesting Functions
#we can use other functions inside mutate to create # #new variables this is nesting where a funtion such #as mean nests inside eg mutatelibrary(tidyverse)diamonds %>%mutate (m =mean(price), #calc mean pricesd=sd(price), #calcs sdmed=median(price) #calc median )
# A tibble: 53,940 × 13
carat cut color clarity depth table price x y z m sd
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 3933. 3989.
2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 3933. 3989.
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 3933. 3989.
4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 3933. 3989.
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 3933. 3989.
6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 3933. 3989.
7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 3933. 3989.
8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 3933. 3989.
9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 3933. 3989.
10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 3933. 3989.
# ℹ 53,930 more rows
# ℹ 1 more variable: med <dbl>
# A tibble: 437 × 29
PID county state area poptotal popdensity popwhite popblack popamerindian
<int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98
2 562 ALEXAN… IL 0.014 10626 759 7054 3496 19
3 563 BOND IL 0.022 14991 681. 14477 429 35
4 564 BOONE IL 0.017 30806 1812. 29344 127 46
5 565 BROWN IL 0.018 5836 324. 5264 547 14
6 566 BUREAU IL 0.05 35688 714. 35157 50 65
7 567 CALHOUN IL 0.017 5322 313. 5298 1 8
8 568 CARROLL IL 0.027 16805 622. 16519 111 30
9 569 CASS IL 0.024 13437 560. 13384 16 8
10 570 CHAMPA… IL 0.058 173025 2983. 146506 16559 331
# ℹ 427 more rows
# ℹ 20 more variables: popasian <int>, popother <int>, percwhite <dbl>,
# percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
# popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
# poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
# percchildbelowpovert <dbl>, percadultpoverty <dbl>,
# percelderlypoverty <dbl>, inmetro <int>, category <chr>, …
#mutate is used to create the new column avg.pop.den#mean(popdensity) calulates from the entire data set#calulating for entire data set so same value for each row?midwest#massive drum roll i did the line below all by myselfmidwest %>%mutate(avg.area =mean(area))
# A tibble: 437 × 29
PID county state area poptotal popdensity popwhite popblack popamerindian
<int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98
2 562 ALEXAN… IL 0.014 10626 759 7054 3496 19
3 563 BOND IL 0.022 14991 681. 14477 429 35
4 564 BOONE IL 0.017 30806 1812. 29344 127 46
5 565 BROWN IL 0.018 5836 324. 5264 547 14
6 566 BUREAU IL 0.05 35688 714. 35157 50 65
7 567 CALHOUN IL 0.017 5322 313. 5298 1 8
8 568 CARROLL IL 0.027 16805 622. 16519 111 30
9 569 CASS IL 0.024 13437 560. 13384 16 8
10 570 CHAMPA… IL 0.058 173025 2983. 146506 16559 331
# ℹ 427 more rows
# ℹ 20 more variables: popasian <int>, popother <int>, percwhite <dbl>,
# percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
# popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
# poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
# percchildbelowpovert <dbl>, percadultpoverty <dbl>,
# percelderlypoverty <dbl>, inmetro <int>, category <chr>, avg.area <dbl>