cacao.csv <- read.csv("flavors_of_cacao.csv")

Preview the data

head(cacao.csv)

tail(cacao.csv)

Describe the data

str(cacao.csv)

## 'data.frame':    1795 obs. of  9 variables:
##  $ Company...Maker.if.known.       : chr  "A. Morin" "A. Morin" "A. Morin" "A. Morin" ...
##  $ Specific.Bean.Origin.or.Bar.Name: chr  "Agua Grande" "Kpime" "Atsane" "Akata" ...
##  $ REF                             : int  1876 1676 1676 1680 1704 1315 1315 1315 1319 1319 ...
##  $ Review.Date                     : int  2016 2015 2015 2015 2015 2014 2014 2014 2014 2014 ...
##  $ Cocoa.Percent                   : chr  "63%" "70%" "70%" "70%" ...
##  $ Company.Location                : chr  "France" "France" "France" "France" ...
##  $ Rating                          : num  3.75 2.75 3 3.5 3.5 2.75 3.5 3.5 3.75 4 ...
##  $ Bean.Type                       : chr  " " " " " " " " ...
##  $ Broad.Bean.Origin               : chr  "Sao Tome" "Togo" "Togo" "Togo" ...

summary(cacao.csv)

##  Company...Maker.if.known. Specific.Bean.Origin.or.Bar.Name      REF      
##  Length:1795               Length:1795                      Min.   :   5  
##  Class :character          Class :character                 1st Qu.: 576  
##  Mode  :character          Mode  :character                 Median :1069  
##                                                             Mean   :1036  
##                                                             3rd Qu.:1502  
##                                                             Max.   :1952  
##   Review.Date   Cocoa.Percent      Company.Location       Rating     
##  Min.   :2006   Length:1795        Length:1795        Min.   :1.000  
##  1st Qu.:2010   Class :character   Class :character   1st Qu.:2.875  
##  Median :2013   Mode  :character   Mode  :character   Median :3.250  
##  Mean   :2012                                         Mean   :3.186  
##  3rd Qu.:2015                                         3rd Qu.:3.500  
##  Max.   :2017                                         Max.   :5.000  
##   Bean.Type         Broad.Bean.Origin 
##  Length:1795        Length:1795       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##

Create a contingency table

sort(table(cacao.csv$Company.Location), decreasing=TRUE)

## 
##            U.S.A.            France            Canada              U.K. 
##               764               156               125                96 
##             Italy           Ecuador         Australia           Belgium 
##                63                54                49                40 
##       Switzerland           Germany           Austria             Spain 
##                38                35                26                25 
##          Colombia           Hungary         Venezuela            Brazil 
##                23                22                20                17 
##             Japan        Madagascar       New Zealand              Peru 
##                17                17                17                17 
##           Denmark           Vietnam         Guatemala          Scotland 
##                15                11                10                10 
##         Argentina        Costa Rica            Israel            Poland 
##                 9                 9                 9                 8 
##          Honduras         Lithuania Domincan Republic         Nicaragua 
##                 6                 6                 5                 5 
##       South Korea            Sweden         Amsterdam              Fiji 
##                 5                 5                 4                 4 
##           Ireland            Mexico       Netherlands       Puerto Rico 
##                 4                 4                 4                 4 
##          Sao Tome           Grenada           Iceland          Portugal 
##                 4                 3                 3                 3 
##         Singapore      South Africa           Bolivia             Chile 
##                 3                 3                 2                 2 
##           Finland         St. Lucia    Czech Republic           Eucador 
##                 2                 2                 1                 1 
##             Ghana             India        Martinique         Niacragua 
##                 1                 1                 1                 1 
##       Philippines            Russia          Suriname             Wales 
##                 1                 1                 1                 1

Select the data using indexing

# select 8th row, and/or 9th column
cacao.csv[8,]

head(cacao.csv[,9])

## [1] "Sao Tome"  "Togo"      "Togo"      "Togo"      "Peru"      "Venezuela"

cacao.csv[8,9]

## [1] "Venezuela"

Subset the data using the subset()

# find out all chocolate bars of which rating >= 4
subset(cacao.csv, Rating >=4) # show the first few rows only to save space

# or you can use logical test to complete indexing:
cacao.csv[cacao.csv$Rating>=4,]

# or you can use indexing and which() to complete subsetting
cacao.csv[which(cacao.csv$Rating>=4),]

# find out all chocolate bars of which company locations in Italy or USA
subset(cacao.csv, Company.Location %in% c("Italy", "USA"))

# find all chocolate bars that meet both criteria above
subset(cacao.csv, 
       (Rating >=4) & (Company.Location %in% c("Italy", "USA")))

Import data from flat files using readr

library(readr)
cacao_csv <- read_csv("flavors_of_cacao.csv")

## Rows: 1795 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Company 
## (Maker-if known), Specific Bean Origin
## or Bar Name, Cocoa
## ...
## dbl (3): REF, Review
## Date, Rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cacao_csv

cacao_tsv <- read_tsv("flavors_of_cacao.txt")

## Rows: 1795 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Company 
## (Maker-if known), Specific Bean Origin
## or Bar Name, Cocoa
## ...
## dbl (3): REF, Review
## Date, Rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Import data from flat files using data.table

library(data.table)
cacaof <- fread("flavors_of_cacao.csv")
cacaof

Import data from spreadsheets using readxl

library(readxl)
excel_sheets("penguins.xlsx")

## [1] "Torgersen Island" "Biscoe Island"    "Dream Island"

torgersen <- read_excel("penguins.xlsx", "Torgersen Island", na="NA")
torgersen

biscoe <- read_excel("penguins.xlsx", "Biscoe Island", na="NA")
biscoe

dream <- read_excel("penguins.xlsx", "Dream Island", na="NA")
dream

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

penguins <- bind_rows(torgersen, biscoe, dream)
penguins

# Check the islands where different species of penguins lived
table(penguins$species, penguins$island)

##            
##             Biscoe Dream Torgersen
##   Adelie        44    56        52
##   Chinstrap      0    68         0
##   Gentoo       124     0         0

Extract data from local files (flat files, excel spreadsheet)

Preview the data

Describe the data

Create a contingency table

Select the data using indexing

Subset the data using the subset()

Import data from flat files using readr

Import data from flat files using data.table

Import data from spreadsheets using readxl