RNotes

# Useful Libraries
library(tidyverse)
library(rvest)
library(stringr)
library(robotstxt) 
library(googleVis) # Map
library(editData) # To eddit data frames.
library(gapminder) # you can get data from WB, or gapminder
library(jtools) # New way to summarize data. 
library(XML) # It gets HTML tables fromt the web. 
library(leaflet.extras) # Estra tools for leaflet

# Adding multiple libraries at the sametime
library <- c("dplyr", "bitops", "RCurl", "rJava", "xlsx", "zipcode", "ggmap", "ggplot2", 
             "RColorBrewer", "readr", "tidyr", "DT", "knitr", "reshape2", "map")
lapply(library, library, character.only = TRUE)

Useful R Commands

head() will give you the first 6
list() will show the data list
print()
glimpse() gives glips of data

Variables.names() would give you the name of the colums.
names() would gie you the names of culoms as well.

runif(N, low parameter, high parameter) will give random number between paratemeters
s=seq(from=1,to=5, by=1/03) gives sequence

Operation
setwd("") cahnges working directory.take out c: and change /. dont for get "".
getwd() working directory
dir() list stuff on direct
class() would give, matrix, dataframe, list. this gives what class the data is in. rm() removes objects from consol.
attach() This would take out colums of dataframe & attatch on mainconsol. library() will load packages. use packages toget them.
object.size() To check out size of dataframe.
as.numeric() Will change to numeric.
as.logical() Will set T or F.
rm(list=ls()) This would delete everything on the current enviroment.
remove.packages("acs") Unistall Packages.
install.packages("installr"); library(installr) install+load installr. updateR() update r

Import into R
x<-c( "csv", "csv" )
read.csv( , head = TRUE) if it has a header.

Export out of R print(object.size(a), units = "Mb") change a to the data set or matrix
write.csv(X, file = "name of new csv") creates csv file
Inf() ifinity

Data Manipulation
cbind() will bind another vector into the matrix.
rbind() binds rows
sort(x, decreasing = FALSE) This would sort the data in a vector/columm
t() Transpost data
as.matrix() Data frames into matrixis and they take out less sapce.

Use this bracket to subset. you can use it as [ ,2] to show first column.

# Subset Data
x<-c("a","b")
x[1]

# Remove rows
is.na(x)
myData[-c(2, 4, 6), ]

subset(dataframe, A==B & E!=0) this would subset data

select variables v1, v2, v3
myvars <- c("v1", "v2", "v3")
newdata <- mydata[myvars]
newdata <- mydata[c(-3,-5)] Exclude 3rd and 5th variable
newdata <- mydata[1:5,] First 5 observations
newdata <- mydata[which(mydata$gender=='F' & mydata$age > 65), ] Based on variable values

Using subset function

z <- subset(x, 'Victim Age' = 0,
                 select=c('Date Occurred', 'Time Occurred'))

newdata <- subset(mydata, age >= 20 | age < 10, 
                  select=c(ID, Weight))

To deal with NA’s
x <- All[complete.cases(All), ] would take out all the NA’s in rows, x would be with no NAs
na.fail() Stop if any missing values are encountered
na.omit() Drop out any rows with missing values anywhere in them and forgets them forever.
na.exclude() Drop out rows with missing values, but keeps track of where they were (so that when you make predictions, for example, you end up with a vector whose length is that of the original response.)
na.pass() Take no action.
is.na(x) gives what has NA
na.rm=TRUE would remove NAs from analysis like mean, variance, etc.
which(is.na(All), arr.ind=TRUE) Would give a list of rows and colums with NA.

Descriptive Stats

mean()
sd()
var()
min()
max()
median()
range()
quantile()
coefficinets()
slope()
intercept()
———————————————————————————————————————————–

Plotting the data

library(ggplot2)

# R in house plot funciton.    

plot(cen2010$`Total Population`, cen2010$`Median Age`, 
     type = NULL, # Points, Lines, Both, c, o, Histogram, Steps, 
     main = "Scaterplot", # title of plot
     sub = "Bottom", #subtitile of plot
     xlab = "X Axis", # x title
     ylab = "Y Axis", # y title.
     col = "red",
     lwd = 2, #this would increase the thickness of the lines higher bigger, 1 low
     lty = 6, #This does things to some type, not all. NUll is auto
     asp = FALSE, #y/x aspect ratio. 
     lines(sd(cen2010$`Median Age`), col = "black", lwd = 8, lty = 8), #null will keep it auto
     abline(h = cen2010$`Median Age`, col = "red", lwd = 8, lty = 8)) #v for vertical lines, h = horizonral lines.

This is to read streight from html, like tables.

# This is to read streight from html, like tables. 
library(XML)
library(RCurl)
library(rlist)
theurl <- getURL("https://en.wikipedia.org/wiki/Brazil_national_football_team",.opts = list(ssl.verifypeer = FALSE) )
tables <- readHTMLTable(theurl)
tables <- list.clean(tables, fun = is.null, recursive = FALSE)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))

Data Frames

#Data frames
name <- c("Anne", "Pete", "Frank", "Julian", "Catch")
age <- c(28, 30, 21, 39, 35)
child <- c(FALSE, TRUE, TRUE, FALSE, TRUE) 

setDT() # would set dataframe into data table

People<- data.frame(name, age, child, stringsAsFactors = FALSE)
People
People[3,]

# add/remove column
dataframe$column = C() # dataframe goes the name of the dataframe $ then column name = C() is adding he column
dataframe$column <- NULL # same as above, 

which(putdataframe[,] >22), ]
mydata[order(mydata$e1)] # will gie order on data set colom, $, -mydata for desending. e1 is colom name

#see the data
head("mydata", n = 5) # would give the first 5 rows of mydata
tail("mydata", n = 5) # would give the last 5 rows of mydata
names() # gives name
colnames() # would only give colnames
rownames() # would only give row names
ncol() # would give out number of colums
nrow() # would give number of rows.

#add new column. must have same type of number.
weight<-c(74, 63, 68, 55, 56)
cbind(People, weight) #would add it as a column

# adding new row with different variables.
tom<- data.frame(name = "Tom", age = 37, child = FALSE, heigt = 183)
rbind(People, tom)

#filter by cullums. e.g. age who are 20 and are females.
c<- filter(df2, Date.Occurred == '2014-01-01 ; 2014-12-31') #use this to get certain values od df.

summary(A)           # summarise data frame
apply(A, 1, mean)    # calculate row means
apply(A, 2, mean)    # calculate column means: same as "mean(A)"

# merging multiple csv files.. create new folder put csv files in there, set wd to folder
#this creates a function to read from wd. 
###########################################################################
multMerge = function(mypath){
  filenames = list.files(path = mypath, full.names = TRUE)
  datalist = lapply(filenames, 
                    function(x){read.csv(file = x,
                                         header = TRUE,
                                         stringsAsFactors = FALSE)})
  Reduce(function(x,y) {merge(x, y, all = TRUE)}, datalist) 
}
#########################################
mydata31 <- multMerge("/Users/marre/Documents/OptionData") #add wd in "" and this combinescsv files.
write.csv(mydata31, file = "mydata312")

####-----merging two dataframes.merge()
x<-merge("my_data", "my_data3")
rbind()

Summarize the data

# https://cran.r-project.org/web/packages/jtools/vignettes/summ.html#effect_plot
library(jtools) # This are the codes
summ() # Write the model name in parentacies.

Summary() Would give you sumary of data related to the individual object that was fed into. OLS, Pannels
str(his.mobi) like summary.

# Changing formats for one column. 
mtcars$am <- as.factor(mtcars$am)

This would create a table

Results

Table i

dfgdf

Table ii

dfgfd

Leaflet

# There has been problems with addLegend. Use the code below. 
leaflet() %>%
  addTiles() %>%
  setView(lng = -118.2437, lat = 34.0522, zoom = 9)%>%
  addCircleMarkers(lng = shoot$APPROX_LONGITUDE, lat = shoot$APPROX_LATITUDE, 
             radius = 3,
             color = pal(shoot$INCIDENT.TYPE),
             popup = b) %>% 
  leaflet::addLegend(position = "bottomright", pal = pal, values = shoot$INCIDENT.TYPE)

```