Abstract
TO BE COMPLETEDlibrary(tidyverse)
library(magrittr)
library(fst)
PD <- read_fst("PhotoDir.fst")
ED <- read_fst("ExifData.fst")
source('RemoveDuplicatePhotos.R')
## Joining, by = c("date_dir", "fc_dir", "photoFile", "is_in_NFS")
## Joining, by = "ID"
np <- nrow(PD)
cat(paste("N. of photo:", np, "Average n. of fields per photo:", nrow(ED)/np))
## N. of photo: 11505 Average n. of fields per photo: 169.51890482399
ED <- ED %>%
mutate(field = trimws(field), value = trimws(value))
sts <- function(ED) {
np <- ED %>% distinct(ID) %>% nrow()
fields <- ED %>%
group_by(field) %>%
summarise(field.freq = n())
field.value.combinations <- ED %>%
group_by(field, value) %>%
summarise(comb.frq = n())
fields <- field.value.combinations %>%
group_by(field) %>%
summarise(n.of.distinct.values = n()) %>%
full_join(fields)
print(paste("Total number of photo:", np))
print(t(table(fields$n.of.distinct.values, fields$field.freq)))
return(list(fields, field.value.combinations))
}
l <- sts(ED)
## Joining, by = "field"
## [1] "Total number of photo: 11505"
##
## 1 2 3 5 6 7 10 14 38 41 68 86 111 163 210 548
## 5970 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 11505 127 4 5 4 0 1 1 5 1 1 1 1 1 1 1 1
## 23010 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0
##
## 1483 1891 9867 11497 11505
## 5970 0 0 0 0 0
## 11505 1 2 1 1 5
## 23010 0 0 0 0 0
fields <- l[[1]]
field.value.combinations <- l[[2]]
cat("Anomalous frequencies")
## Anomalous frequencies
(af.fields <- fields %>%
filter(field.freq != np))
## # A tibble: 3 x 3
## field n.of.distinct.values field.freq
## <chr> <int> <int>
## 1 Display Aperture 1 5970
## 2 Focal Length 6 23010
## 3 Lens 2 23010
af.fields %>%
left_join(field.value.combinations)
## Joining, by = "field"
## # A tibble: 9 x 5
## field n.of.distinct.va~ field.freq value comb.frq
## <chr> <int> <int> <chr> <int>
## 1 Display A~ 1 5970 2.7 5970
## 2 Focal Len~ 6 23010 4.3 mm 11370
## 3 Focal Len~ 6 23010 4.3 mm (35 mm equivale~ 11370
## 4 Focal Len~ 6 23010 4.6 mm 102
## 5 Focal Len~ 6 23010 4.6 mm (35 mm equivale~ 102
## 6 Focal Len~ 6 23010 6.4 mm 33
## 7 Focal Len~ 6 23010 6.4 mm (35 mm equivale~ 33
## 8 Lens 2 23010 4.3 - 150.5 mm 11505
## 9 Lens 2 23010 4.3 - 150.5 mm (35 mm ~ 11505
# add (2) to field name if field freq = 2 * np
x <- af.fields %>%
filter(field.freq == 2*np) %>%
left_join(field.value.combinations) %>%
add_column(vl = str_length(.$value)) %>%
arrange(field, comb.frq, vl)
## Joining, by = "field"
x <- x %>%
group_by(field, comb.frq) %>%
summarise(vl = max(vl)) %>%
left_join(x) %>%
select(field, value)
## Joining, by = c("field", "comb.frq", "vl")
ED <- ED %>%
mutate(field = ifelse(value %in% x$value, paste0(field, "(2)"), field))
l <- sts(ED)
## Joining, by = "field"
## [1] "Total number of photo: 11505"
##
## 1 2 3 5 7 10 14 38 41 68 86 111 163 210 548 1483
## 5970 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 11505 129 4 7 4 1 1 5 1 1 1 1 1 1 1 1 1
##
## 1891 9867 11497 11505
## 5970 0 0 0 0
## 11505 2 1 1 5
fields <- l[[1]]
field.value.combinations <- l[[2]]
cat("Anomalous frequencies AFTER separation of duplicate field names")
## Anomalous frequencies AFTER separation of duplicate field names
(af.fields <- fields %>%
filter(field.freq != np))
## # A tibble: 1 x 3
## field n.of.distinct.values field.freq
## <chr> <int> <int>
## 1 Display Aperture 1 5970
af.fields %>%
left_join(field.value.combinations)
## Joining, by = "field"
## # A tibble: 1 x 5
## field n.of.distinct.values field.freq value comb.frq
## <chr> <int> <int> <chr> <int>
## 1 Display Aperture 1 5970 2.7 5970
fvl <- function(xfield) {
fields %>%
filter(field == xfield) %>%
t %>%
print
field.value.combinations %>%
filter(field == xfield) %>%
print
}
z <- map(af.fields$field, fvl)
## [,1]
## field "Display Aperture"
## n.of.distinct.values "1"
## field.freq "5970"
## # A tibble: 1 x 3
## # Groups: field [1]
## field value comb.frq
## <chr> <chr> <int>
## 1 Display Aperture 2.7 5970
constants <- fields %>%
filter(n.of.distinct.values == 1 & field.freq == np) %>%
select(field) %>%
left_join(field.value.combinations)
## Joining, by = "field"
constants %>%
select(field) %>%
unlist %>%
paste(collapse = " - ") %>%
print
## [1] "AE Setting - AEB Bracket Value - AF Area Heights - AF Area Mode - AF Area Widths - AF Area X Positions - AF Area Y Positions - AF Image Height - AF Image Width - AF Point - AF Points In Focus - Aspect Ratio - Auto Exposure Bracketing - Base ISO - Bits Per Sample - Bulb Duration - Camera ISO - Camera Model Name - Camera Type - Canon Exposure Mode - Canon Firmware Version - Canon Flash Mode - Canon Image Height - Canon Image Size - Canon Image Type - Canon Image Width - Canon Model ID - Circle Of Confusion - Color Components - Color Space - Components Configuration - Compressed Bits Per Pixel - Compression - Continuous Drive - Contrast - Control Mode - Cropped Image Height - Cropped Image Left - Cropped Image Top - Cropped Image Width - Custom Rendered - Date Stamp Mode - Digital Zoom - Digital Zoom Ratio - Drive Mode - Easy Mode - Encoding Process - Exif Byte Order - Exif Image Height - Exif Image Width - Exif Version - ExifTool Version Number - Exposure Compensation - Exposure Mode - File Source - File Type - File Type Extension - Firmware Revision - Flash - Flash Activity - Flash Bits - Flash Exposure Compensation - Flash Guide Number - Flash Output - Flashpix Version - Focal Plane Resolution Unit - Focal Plane X Resolution - Focal Plane Y Resolution - Focal Units - Focus Continuous - Focus Distance Lower - Focus Mode - Focus Range - Image Description - Image Height - Image Size - Image Stabilization - Image Width - Intelligent Contrast - Interoperability Index - Interoperability Version - Lens - Lens ID - Lens Type - Lens(2) - Macro Mode - Make - Manual Flash Output - Max Focal Length - Megapixels - Metering Mode - MIME Type - Min Aperture - Min Focal Length - My Color Mode - ND Filter - Num AF Points - Owner Name - Primary AF Point - Quality - Rating - Record Mode - Related Image Height - Related Image Width - Resolution Unit - Saturation - Scale Factor To 35 mm Equivalent - Scene Capture Type - Self Timer - Self Timer 2 - Sensing Method - Sensitivity Type - Sharpness - Shooting Mode - Shot Number In Continuous Burst - Slow Shutter - Spot Metering Mode - Thumbnail Image Valid Area - Thumbnail Offset - User Comment - Valid AF Points - VRD Offset - White Balance - X Resolution - Y Cb Cr Positioning - Y Cb Cr Sub Sampling - Y Resolution - Zoom Source Width - Zoom Target Width"
constants %>% write.csv2("fields_with_same_value_for_all_photos.csv2")
ED <- constants %>%
anti_join(ED, .)
## Joining, by = c("field", "value")
# non constant attributes
l <- sts(ED)
## Joining, by = "field"
## [1] "Total number of photo: 11505"
##
## 1 2 3 5 7 10 14 38 41 68 86 111 163 210 548 1483 1891 9867 11497
## 5970 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 11505 0 4 7 4 1 1 5 1 1 1 1 1 1 1 1 1 2 1 1
##
## 11505
## 5970 0
## 11505 5
fields <- l[[1]]
field.value.combinations <- l[[2]]
PDext <- ED %>%
spread(field,value) %>%
left_join(PD, .)
## Joining, by = "ID"
library(ggpubr)
library(lubridate)
PDext %>%
mutate(CD = str_sub(`Create Date`,1,10)) %>%
group_by(date_dir, CD) %>%
tally
## # A tibble: 7 x 3
## # Groups: date_dir [?]
## date_dir CD n
## <chr> <chr> <int>
## 1 20180615 2018:06:15 1188
## 2 20180619 2018:06:19 2196
## 3 20180622 2018:06:22 2466
## 4 20180626 2018:06:26 2208
## 5 20180627 2018:06:27 1809
## 6 20180702 2018:07:02 1221
## 7 20180703 2018:07:03 417
PDext %>%
mutate(CD = str_sub(`Create Date`,1,10),
CT = as_datetime(`Create Date`)) %>%
group_by(CD) %>%
summarise(minT = min(CT), maxT = max(CT)) %>%
mutate(shhoting_time = difftime(maxT, minT, units = "hours"))
## # A tibble: 7 x 4
## CD minT maxT shhoting_time
## <chr> <dttm> <dttm> <time>
## 1 2018:06:15 2018-06-15 08:52:11 2018-06-15 11:53:34 3.0230556 hours
## 2 2018:06:19 2018-06-19 08:40:03 2018-06-19 15:16:06 6.6008333 hours
## 3 2018:06:22 2018-06-22 08:48:33 2018-06-22 15:21:00 6.5408333 hours
## 4 2018:06:26 2018-06-26 08:40:48 2018-06-26 15:06:14 6.4238889 hours
## 5 2018:06:27 2018-06-27 08:45:02 2018-06-27 15:06:40 6.3605556 hours
## 6 2018:07:02 2018-07-02 09:40:53 2018-07-02 14:45:48 5.0819444 hours
## 7 2018:07:03 2018-07-03 09:12:42 2018-07-03 10:01:03 0.8058333 hours
PDext <- within(PDext, {
Orientation <- factor(Orientation)
levels(Orientation)[pmatch("Rotate", levels(Orientation))] <- "B-portrait"
levels(Orientation)[pmatch("Horizontal", levels(Orientation))] <- "A-landscape"
})
PDext %>%
group_by(fc_dir, Orientation) %>%
tally %>%
mutate(Orientation = fct_rev(Orientation)) %>%
ggbarplot(x = "fc_dir", y = "n", fill = "Orientation",
x.text.angle = -90, xlab = "sample tree", ylab = "n.of.photo",
main = "Distribution by sample tree (FC) or sample trees group")
PDext <- mutate(PDext, fc_group = str_length(fc_dir)>4)
x <- PDext %>%
filter(fc_group == F) %>%
mutate(CT = as_datetime(`Create Date`)) %>%
group_by(fc_dir, Orientation) %>%
summarise(np = n(), minT = min(CT), maxT = max(CT)) %>%
mutate(te = difftime(maxT, minT, units = "sec"),
tebyp = te / (np-1))
xmg <- x %>%
group_by(Orientation) %>%
summarise(ym = mean(np)) %>%
mutate(x = 1:2-.3, xend = 1:2+.3)
p1 <-
x %>%
ggplot(aes(Orientation, np, colour = Orientation)) +
geom_violin(trim = T, scale = "count", draw_quantiles = T) +
scale_y_continuous(breaks = seq(from = 10, to = 150, by = 10)) +
geom_dotplot(binaxis='y', stackdir='center', stackratio = 1, dotsize=.3) +
annotate("segment", x = xmg$x, xend = xmg$xend, y = xmg$ym, yend = xmg$ym,
colour = "red", size = 1.5) +
annotate("text", x = xmg$x-.1, y = xmg$ym, label = paste("mean =", formatC(xmg$ym,digits=0, format="f") )) +
# geom_segment(aes(x = 50, xend = x, y = -.3, yend = -y)) +
ylab("number of photos per tree") +
coord_flip() +
theme(legend.position="none")
p2 <-
x %>%
ggplot(aes(as.numeric(tebyp), colour = Orientation, fill = Orientation)) +
geom_density(alpha = 0.5) + expand_limits(x = c(2, 8)) + xlab("time between photos of same tree [sec]")
p3 <-
x %>%
ggplot(aes(as.numeric(te)/60, colour = Orientation, fill = Orientation)) +
geom_density(alpha = 0.5) + expand_limits(x = c(0, 11)) + xlab("time spent per tree [mins]")
ggarrange(p1, # First row with scatter plot
ggarrange(p2, p3, ncol = 2, labels = c("B", "C"),
common.legend = TRUE, legend = "bottom"), # Second row
nrow = 2,
labels = "A" # Labels of the scatter plot
)
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
rmarkdown::render(“PPC_EXIFdataAnalysis.Rmd”)