cacao.csv <- read.csv("flavors_of_cacao.csv")

Preview the data

head(cacao.csv)
tail(cacao.csv)

Describe the data

str(cacao.csv)
## 'data.frame':    1795 obs. of  9 variables:
##  $ Company...Maker.if.known.       : chr  "A. Morin" "A. Morin" "A. Morin" "A. Morin" ...
##  $ Specific.Bean.Origin.or.Bar.Name: chr  "Agua Grande" "Kpime" "Atsane" "Akata" ...
##  $ REF                             : int  1876 1676 1676 1680 1704 1315 1315 1315 1319 1319 ...
##  $ Review.Date                     : int  2016 2015 2015 2015 2015 2014 2014 2014 2014 2014 ...
##  $ Cocoa.Percent                   : chr  "63%" "70%" "70%" "70%" ...
##  $ Company.Location                : chr  "France" "France" "France" "France" ...
##  $ Rating                          : num  3.75 2.75 3 3.5 3.5 2.75 3.5 3.5 3.75 4 ...
##  $ Bean.Type                       : chr  " " " " " " " " ...
##  $ Broad.Bean.Origin               : chr  "Sao Tome" "Togo" "Togo" "Togo" ...
summary(cacao.csv)
##  Company...Maker.if.known. Specific.Bean.Origin.or.Bar.Name      REF      
##  Length:1795               Length:1795                      Min.   :   5  
##  Class :character          Class :character                 1st Qu.: 576  
##  Mode  :character          Mode  :character                 Median :1069  
##                                                             Mean   :1036  
##                                                             3rd Qu.:1502  
##                                                             Max.   :1952  
##   Review.Date   Cocoa.Percent      Company.Location       Rating     
##  Min.   :2006   Length:1795        Length:1795        Min.   :1.000  
##  1st Qu.:2010   Class :character   Class :character   1st Qu.:2.875  
##  Median :2013   Mode  :character   Mode  :character   Median :3.250  
##  Mean   :2012                                         Mean   :3.186  
##  3rd Qu.:2015                                         3rd Qu.:3.500  
##  Max.   :2017                                         Max.   :5.000  
##   Bean.Type         Broad.Bean.Origin 
##  Length:1795        Length:1795       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Create a contingency table

sort(table(cacao.csv$Company.Location), decreasing=TRUE)
## 
##            U.S.A.            France            Canada              U.K. 
##               764               156               125                96 
##             Italy           Ecuador         Australia           Belgium 
##                63                54                49                40 
##       Switzerland           Germany           Austria             Spain 
##                38                35                26                25 
##          Colombia           Hungary         Venezuela            Brazil 
##                23                22                20                17 
##             Japan        Madagascar       New Zealand              Peru 
##                17                17                17                17 
##           Denmark           Vietnam         Guatemala          Scotland 
##                15                11                10                10 
##         Argentina        Costa Rica            Israel            Poland 
##                 9                 9                 9                 8 
##          Honduras         Lithuania Domincan Republic         Nicaragua 
##                 6                 6                 5                 5 
##       South Korea            Sweden         Amsterdam              Fiji 
##                 5                 5                 4                 4 
##           Ireland            Mexico       Netherlands       Puerto Rico 
##                 4                 4                 4                 4 
##          Sao Tome           Grenada           Iceland          Portugal 
##                 4                 3                 3                 3 
##         Singapore      South Africa           Bolivia             Chile 
##                 3                 3                 2                 2 
##           Finland         St. Lucia    Czech Republic           Eucador 
##                 2                 2                 1                 1 
##             Ghana             India        Martinique         Niacragua 
##                 1                 1                 1                 1 
##       Philippines            Russia          Suriname             Wales 
##                 1                 1                 1                 1

Select the data using indexing

# select 8th row, and/or 9th column
cacao.csv[8,]
head(cacao.csv[,9])
## [1] "Sao Tome"  "Togo"      "Togo"      "Togo"      "Peru"      "Venezuela"
cacao.csv[8,9]
## [1] "Venezuela"

Subset the data using the subset()

# find out all chocolate bars of which rating >= 4
subset(cacao.csv, Rating >=4) # show the first few rows only to save space
# or you can use logical test to complete indexing:
cacao.csv[cacao.csv$Rating>=4,]
# or you can use indexing and which() to complete subsetting
cacao.csv[which(cacao.csv$Rating>=4),]
# find out all chocolate bars of which company locations in Italy or USA
subset(cacao.csv, Company.Location %in% c("Italy", "USA"))
# find all chocolate bars that meet both criteria above
subset(cacao.csv, 
       (Rating >=4) & (Company.Location %in% c("Italy", "USA")))

Import data from flat files using readr

library(readr)
cacao_csv <- read_csv("flavors_of_cacao.csv")
## Rows: 1795 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Company 
## (Maker-if known), Specific Bean Origin
## or Bar Name, Cocoa
## ...
## dbl (3): REF, Review
## Date, Rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cacao_csv
cacao_tsv <- read_tsv("flavors_of_cacao.txt")
## Rows: 1795 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (6): Company 
## (Maker-if known), Specific Bean Origin
## or Bar Name, Cocoa
## ...
## dbl (3): REF, Review
## Date, Rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Import data from flat files using data.table

library(data.table)
cacaof <- fread("flavors_of_cacao.csv")
cacaof

Import data from spreadsheets using readxl

library(readxl)
excel_sheets("penguins.xlsx")
## [1] "Torgersen Island" "Biscoe Island"    "Dream Island"
torgersen <- read_excel("penguins.xlsx", "Torgersen Island", na="NA")
torgersen
biscoe <- read_excel("penguins.xlsx", "Biscoe Island", na="NA")
biscoe
dream <- read_excel("penguins.xlsx", "Dream Island", na="NA")
dream
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
penguins <- bind_rows(torgersen, biscoe, dream)
penguins
# Check the islands where different species of penguins lived
table(penguins$species, penguins$island)
##            
##             Biscoe Dream Torgersen
##   Adelie        44    56        52
##   Chinstrap      0    68         0
##   Gentoo       124     0         0