# install.packages('tinytex')
# tinytex::install_tinytex() # install TinyTeX
\[\int{\dfrac{5x^2}{3x+2}}dx\]
read.table, read.csv, for reading
tabular data
readLines, for reading lines of a text file
source, for reading in R code files (inverse of
dump)
dget, for reading in R code files (inverse of
dput)
load, for reading in saved workspaces
unserialize, for reading single R objects in binary
form
team_standing <- read.csv("team_standings.csv")
str(team_standing)
'data.frame': 32 obs. of 2 variables:
$ Standing: int 1 2 3 4 5 6 7 8 9 10 ...
$ Team : chr "Spain" "Netherlands" "Germany" "Uruguay" ...
team_standing
head(team_standing, 10)
tail(team_standing,5)
little_mermaid <- read.table("little_mermaid.txt",
col.names = "The_Little_Mermaid")
little_mermaid
write.table, for writing tabular data to text files
(i.e. CSV) or connections
writeLines, for writing character data line-by-line
to a file or connection
dump, for dumping a textual representation of
multiple R objects
dput, for outputting a textual representation of an
R object
save, for saving an arbitrary number of R objects in
binary format (possibly compressed) to a file.
serialize, for converting an R object into a binary
format for outputting to a connection (or file).
str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
iris
write.csv(x = iris,
file = "iris.csv",
row.names = FALSE)
str(swiss)
'data.frame': 47 obs. of 6 variables:
$ Fertility : num 80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ...
$ Agriculture : num 17 45.1 39.7 36.5 43.5 35.3 70.2 67.8 53.3 45.2 ...
$ Examination : int 15 6 5 12 17 9 16 14 12 16 ...
$ Education : int 12 9 5 7 15 7 7 8 7 13 ...
$ Catholic : num 9.96 84.84 93.4 33.77 5.16 ...
$ Infant.Mortality: num 22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ...
write.table(x = swiss,
file = "swiss.txt",
sep = ";",
row.names = FALSE)
swiss_v2 <- read.table(file = "swiss.txt",
header = TRUE,
sep = ";")
swiss_v2
initial <- read.table("president2022_05131447.csv",
sep = ",",
nrows = 100,
header = TRUE)
head(initial)
str(initial)
'data.frame': 100 obs. of 23 variables:
$ PRECINCT_CODE : int 1010017 1010017 1010017 1010017 1010017 1010017 1010017 1010017 1010017 1010017 ...
$ CONTEST_CODE : int 199000 199000 199000 199000 199000 199000 199000 199000 199000 199000 ...
$ CANDIDATE_CODE : num 9.9e+09 9.9e+09 9.9e+09 9.9e+09 9.9e+09 ...
$ PARTY_CODE : int 35 180 36 3 38 22 182 16 1 180 ...
$ VOTES_AMOUNT : int 1 0 0 7 5 17 616 0 3 31 ...
$ TOTALIZATION_ORDER : int 4 1 2 3 5 6 7 8 9 10 ...
$ NUMBER_VOTERS : int 697 697 697 697 697 697 697 697 697 697 ...
$ UNDERVOTE : int 2 2 2 2 2 2 2 2 2 2 ...
$ OVERVOTE : int 15 15 15 15 15 15 15 15 15 15 ...
$ RECEPTION_DATE : chr "05/09/2022 - 08:07:08 PM" "05/09/2022 - 08:07:08 PM" "05/09/2022 - 08:07:08 PM" "05/09/2022 - 08:07:08 PM" ...
$ CONTEST_NAME : chr "PRESIDENT PHILIPPINES" "PRESIDENT PHILIPPINES" "PRESIDENT PHILIPPINES" "PRESIDENT PHILIPPINES" ...
$ CANDIDATE_NAME : chr "GONZALES, NORBERTO (PDSP)" "ABELLA, ERNIE (IND)" "DE GUZMAN, LEODY (PLM)" "DOMAGOSO, ISKO MORENO (AKSYON)" ...
$ PARTIES_NAME : chr "PARTIDO DEMOKRATIKO SOSYALISTA NG PILIPINAS" "INDEPENDENT" "PARTIDO LAKAS NG MASA" "AKSYON DEMOKRATIKO" ...
$ PARTIES_ALIAS : chr "PDSP" "IND" "PLM" "AKSYON" ...
$ REGION : chr "CAR" "CAR" "CAR" "CAR" ...
$ PROVINCE : chr "ABRA" "ABRA" "ABRA" "ABRA" ...
$ MUNICIPALITY : chr "BANGUED" "BANGUED" "BANGUED" "BANGUED" ...
$ BARANGAY : chr "AGTANGAO" "AGTANGAO" "AGTANGAO" "AGTANGAO" ...
$ CLUSTER : int 17 17 17 17 17 17 17 17 17 17 ...
$ CLUSTERTOTAL : int 783 783 783 783 783 783 783 783 783 783 ...
$ CLUSTERED_PRECINCTS: chr "0045A, 0045B, 0050A, 0050B" "0045A, 0045B, 0050A, 0050B" "0045A, 0045B, 0050A, 0050B" "0045A, 0045B, 0050A, 0050B" ...
$ POLLINGCENTER : chr "AGTANGAO ELEMENTARY SCHOOL, AGTANGAO, BANGUED, ABRA" "AGTANGAO ELEMENTARY SCHOOL, AGTANGAO, BANGUED, ABRA" "AGTANGAO ELEMENTARY SCHOOL, AGTANGAO, BANGUED, ABRA" "AGTANGAO ELEMENTARY SCHOOL, AGTANGAO, BANGUED, ABRA" ...
$ DISTRICT : chr "ABRA - LONE DISTRICT" "ABRA - LONE DISTRICT" "ABRA - LONE DISTRICT" "ABRA - LONE DISTRICT" ...
classes <- sapply(initial, class)
classes
PRECINCT_CODE CONTEST_CODE CANDIDATE_CODE PARTY_CODE VOTES_AMOUNT
"integer" "integer" "numeric" "integer" "integer"
TOTALIZATION_ORDER NUMBER_VOTERS UNDERVOTE OVERVOTE RECEPTION_DATE
"integer" "integer" "integer" "integer" "character"
CONTEST_NAME CANDIDATE_NAME PARTIES_NAME PARTIES_ALIAS REGION
"character" "character" "character" "character" "character"
PROVINCE MUNICIPALITY BARANGAY CLUSTER CLUSTERTOTAL
"character" "character" "character" "integer" "integer"
CLUSTERED_PRECINCTS POLLINGCENTER DISTRICT
"character" "character" "character"
library(tictoc)
tic()
pres2022 <- read.csv("president2022_05131447.csv")
toc()
25.92 sec elapsed
dim(pres2022)
[1] 1060080 23
names(pres2022)
[1] "PRECINCT_CODE" "CONTEST_CODE" "CANDIDATE_CODE" "PARTY_CODE"
[5] "VOTES_AMOUNT" "TOTALIZATION_ORDER" "NUMBER_VOTERS" "UNDERVOTE"
[9] "OVERVOTE" "RECEPTION_DATE" "CONTEST_NAME" "CANDIDATE_NAME"
[13] "PARTIES_NAME" "PARTIES_ALIAS" "REGION" "PROVINCE"
[17] "MUNICIPALITY" "BARANGAY" "CLUSTER" "CLUSTERTOTAL"
[21] "CLUSTERED_PRECINCTS" "POLLINGCENTER" "DISTRICT"
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.4.1
✔ readr 2.1.2 ✔ forcats 0.5.2 ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
pres2022 %>%
filter(PROVINCE == "NEGROS ORIENTAL") %>%
group_by(CANDIDATE_NAME) %>%
summarise(VOTES_AMOUNT = sum(VOTES_AMOUNT)) %>%
ungroup() %>%
arrange(desc(VOTES_AMOUNT))
paste(1060080 * 23 * 8, "bytes")
[1] "195054720 bytes"
paste(195054720 / 2^{20}, "MB")
[1] "186.018676757812 MB"