IMPORTING DATA

URL data

library(RCurl)
covid_raw=getURL("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv")

covid.csv=read.csv(text=covid_raw)

covid_raw=getURL(“https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv”)

covid.csv=read.csv(text=covid_raw)

CSV data

1)Easy way read.csv(file.choose(),header = TRUE)

2)Path method read.csv(file=“path”,header=TRUE)

3)Table method ## Comma seperated read.table(file.choose(),header=TRUE,sep=“,”)

txt data

  1. read.delim(file.choose(),header=TRUE)

2)Table method read.table(file.choose(),header=TRUE,sep=")

EXPORTING DATA

  1. Saving in current working directory

write.table(data in environment,file=“maho.csv”,sep=“,”)

write.table(data in environment,file=“maho.csv”,sep=“,”,row.names=FALSE)

2)Saving in other than working directory

write.table(data in environment,file=“path”,sep=“,”,row.names=FALSE)

3)Saving csv file write.csv(data1,file=“path/name of the new file”,row.names = FALSE)

4)Saving txt file

write.csv(data1,file=“path/name of the new file”,row.names = FALSE, sep=" ")

Insight about the dataframe

dim(mtcars) 
## [1] 32 11
head(mtcars)
tail(mtcars)
mtcars[c(3,4,7),]
mtcars[c(5:9),]
mtcars[-c(5:9),]
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"

Subset rows using column values

dplyr package

Usage filter(.data, …, .preserve = FALSE)

Examples:

filter(dataset,columnname==“specific value”) filter(pickcount,PickCount==“924”)

library(dplyr)

subset(dataframe,select=“colname”)

Creating a data frame

dataset\(Column[i:j] df_name1=df_name\)Primed? 1=Y[21:42]

Working with data

rm(list=ls())

# rm(name of the object)
mean(mtcars$mpg)
## [1] 20.09062
attach(mtcars)

mean(mpg)
## [1] 20.09062
detach(mtcars)
attach(mtcars)

class(mpg)
## [1] "numeric"
length(mpg)
## [1] 32
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
x<-c(0,1,0,1,0,0,0,1,0,1)

x<-as.factor(x)
# You can convert TRUE,FALSE,TRUE .... vector to 1,0,0

a=c(TRUE,FALSE,TRUE)

as.numeric(a)
## [1] 1 0 1
mtcars["hp"]
mtcars[,c(1,3)]
mtcars[1:3,1:5]
a=mtcars["mpg">21]
a
conditions<- vs==0 & am==1

New_data<-cbind(mtcars,conditions)

New_data[1:5,]

Working Directory

getwd()
## [1] "/Users/metuhead/Desktop/R"

Set the working directory

  1. setwd(“exact path”)

  2. setwd(“~missing path”) setwd(“~/Desktop/Midterm1”)

  3. projectWD<-“/Users/metuhead/Desktop/FE 541- Applied Stat/Midterm1” setwd(projectWD)

  4. Use the menu "Session/Set Working Directory

Save workspace image file

  1. Use save.image save.image(“nameproject.Rdata”)

  2. Use the menu Session/Save work space As

  3. Clear workspace

rm(list=ls())

  1. Loading work space image

load("nameproject.Rdata)

5)Loading the workspace image another way

load(file.choose())

6)Use the menu Session/Load Workspace

Rscript

1)To comment and uncomment all the lines in Rscript

Use the menu Code/Comment Uncomment Lines

2)Use tab key to complete commands

me and hit the tab it will show the suggestions ex: mean

Installing packages

1)Use install.packages

install.packages(“epiR”)

2)Then use library()

You must use library() for each session everytime

3)See all available packages

https://cran.r-project.org/

4)Menu Tools/Install Packages

Using the Apply Function

Apply functions are a set of loop functions in R

apply(X,MARGIN,FUN,….)

Example:

apply(X=mtcars,MARGIN=2,FUN=mean)
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500

Another way of finding column mean

colMeans(mtcars)
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
# just an example
# apply(X=mtcars,MARGIN=2,FUN=plot,type="l")

# plot(apply(X=mtcars,MARGIN=1,FUN=sum))

Removing NA value in apply

Using the Tapply Function

tapply can be used to apply a function to subsets of a variable or vector

tapply(X, INDEX, FUN = NULL, …, simplify = TRUE)

Example:

attach(chickwts)
tapply(X=weight,INDEX=feed,FUN=mean)
##    casein horsebean   linseed  meatmeal   soybean sunflower 
##  323.5833  160.2000  218.7500  276.9091  246.4286  328.9167
tapply(hp,vs,mean,simplfy=FALSE)
##         0         1 
## 189.72222  91.35714
mean(weight[feed=="horsebean"])
## [1] 160.2
tapply(X=hp,INDEX=list(vs,am),FUN=mean,simplfy=TRUE)
##          0         1
## 0 194.1667 180.83333
## 1 102.1429  80.57143
mean(hp[vs==0 & am==0])
## [1] 194.1667