library(here)
library(stringr)
library(reticulate)
files_moved<-list.files("M:/biome_health_project_files/country_files/kenya/working_data/", recursive = TRUE, full.names = TRUE)
#Save the list of files as an R object
saveRDS(files_moved, "list_files_RDS_working.rds")
exif_out<-readRDS("list_files_RDS_working.rds")
exif_out<-exif_out[grepl(".JPG", exif_out)]
file_split<-strsplit(exif_out, "/")
get_last<-function(x){
image_out<-x[[length(x)]]
return(image_out)
}
image_nos<-lapply(file_split, get_last)
img_nos<-unlist(image_nos)
exif_out<-data.frame(exif_out, img_nos)
colnames(exif_out)<-c("filepath", "image_no")
exif_out$image_num<-as.numeric(gsub("[^0-9]", "",exif_out$image_no))
get_second_last<-function(x){
image_out<-x[[(length(x)-1)]]
if(grepl("BTCF",image_out)){
image_out<-x[[(length(x)-2)]]
}
#image_out<-strsplit(image_out, "_")[[1]][1]
return(image_out)
}
site_cam<-lapply(file_split, get_second_last)
site_cam<-unlist(site_cam)
exif_out$site_cam<-site_cam
#last character in camera string
ab<-str_sub(exif_out$site_cam,-1,-1)
# Check that it doesn't match any non-letter
letters_only <- function(x) !grepl("[^A-Za-z]", x)
# Check that it doesn't match any non-number
numbers_only <- function(x) !grepl("\\D", x)
ab[numbers_only(ab)]<-"a"
ab[ab == "A"]<-"a"
ab[ab == "B"]<-"b"
table(ab)
## ab
## a b
## 1949181 408276
exif_out$ab<-ab
exif_out$month<-ifelse(grepl("november",exif_out$filepath), "november", "october")
exif_out$new_img_num<-exif_out$image_num
Creating unique file numbers - there were duplicates for files in october/november and in a/b folders Remedy this by adding 20000 to files in november and 10000 to b files. The numbers are then padded with leading zeros so that they are 6 digits long
October a = 000001 - 009999 October b = 010001 - 019999 November a = 020001 - 029999 November b = 030001 - 039999
exif_out$new_img_num[exif_out$month == "november"]<-exif_out$image_num[exif_out$month == "november"]+20000
exif_out$new_img_num[exif_out$ab == "b"]<-exif_out$new_img_num[exif_out$ab == "b"]+10000
exif_out$new_img_num<-str_pad(exif_out$new_img_num, 6, pad = "0")
site_split<-strsplit(exif_out$site_cam, "_")
get_first<-function(x){
site_out<-x[[1]]
return(site_out)
}
site_id<-lapply(site_split, get_first)
site_ids<-unlist(site_id)
exif_out$site_id<-site_ids
This function gets the filepath but removes the last section - the part with the filename,
file_split<-strsplit(as.character(exif_out$filepath), "/")
remove_last<-function(x){
image_out<-x[-length(x)]
# image_out_img<-paste("2018",image_out[2], sep = "_")
image_out<-paste(image_out, collapse = "/", sep="")
return(image_out)
}
image_nos<-lapply(file_split, remove_last)
img_nos<-unlist(image_nos)
Year<-2018
exif_out$filepath_image_rename<-paste(img_nos, "/",Year,"_",exif_out$site_id,"_" ,exif_out$new_img_num,".JPG", sep= "")
write.csv(exif_out, "rename_image_file.csv", row.names = FALSE)
file.rename(as.character(exif_out$filepath), exif_out$filepath_image_rename)
exif_out<-read.csv("rename_image_file.csv")
exif_out$site<-gsub('[[:digit:]]+', '', exif_out$site_id)
base_path<-"M:/biome_health_project_files/country_files/kenya/working_data"
data_type<-"CT"
exif_out$new_file_structure<-paste(base_path,"/",exif_out$site,"/",exif_out$site_id,"/",data_type,"/" ,Year,"/",Year,"_", site_id,"_",exif_out$new_img_num,".JPG", sep= "")
exif_out$new_dir_structure<-paste(base_path, exif_out$site,exif_out$site_id,data_type, Year, sep= "/")
dir_creator<-function(x){
dir.create(x, recursive = TRUE)
}
lapply(unique(exif_out$new_dir_structure), dir_creator)
saveRDS(exif_out, "original_filepaths_working_filepaths.RDS")
write.csv(exif_out, "original_filepaths_working_filepaths.csv")
file.rename(exif_out$filepath_image_rename, exif_out$new_file_structure)
import PIL.Image
import PIL.ExifTags
import pickle
import os
import sys
import pandas as pd
df = pd.read_csv("original_filepaths_working_filepaths.csv")
allfiles = df.new_file_structure
sys.stdout = open(os.path.join('D:/Fiona/Biome_Health_Project/exif_output/exif_out_test.txt'), "w")
# Pick out which exif data you're interested in
keys = ['Make', 'Model', 'DateTime','DateTimeDigitized','LightSource', 'Flash']
###saves filepath rather than information extracted from it####
for image in allfiles:
try:
img = PIL.Image.open(image)
except OSError as e:
print('Bad file ' + image)
exif = {
PIL.ExifTags.TAGS[k]: v
for k, v in img._getexif().items()
if k in PIL.ExifTags.TAGS
}
keys_out = [str(exif.get(key)) for key in keys]
filepath = str(image)
print(filepath + ', ' + ', '.join(keys_out))