Ziel dieses Scraping Projekts ist es, interessante Webpages mit Veranstaltungen in Berlin zusammenzufassen und auf einer Karte zu visualisieren. Dir fehlt eine tolle Seite? Schreibs in die Kommentare. :)

library(ggplot2);library(anytime);library(tidygeocoder);library(stringr);library(rvest);library(xml2);library(tidyverse);library(here);library(sf);library(leaflet)  ;library(leaflet.extras);library(htmltools);library(gt);library(knitr)

Stressfaktor

date <- Sys.Date()  #today , "%%%y-%m-%d"
events_stressfaktor <- data.frame(matrix(ncol = 6, nrow = 0))

for (date_n in 0:2){
  
  date <- Sys.Date() + date_n
  base_url <- str_c("https://stressfaktor.squat.net/termine/alle?f%5B0%5D=event_date%3A",date,"&f%5B1%5D=kategorie%3A10&f%5B2%5D=kategorie%3A166")
  html <- read_html(base_url)
  html <- as.character(html_elements(html, ".views-row")) # each entery as single row
  
  
  for (n in 1:length(html)){ #by each entry since some information mid be not available
    new_row <- c()
    new_row <- append(new_row, as.character(date))
    event_name <- str_extract_all(html[n], pattern = "(?<=hreflang=\"de\">).*(?=</a>)")[[1]][1]
    new_row <- append(new_row, event_name)
    
    event_type <- str_split(str_extract_all(html[n], pattern = '(?<="field-content\">).*(?=</span></span>)')[[1]][1], "<")[[1]][1]
    new_row <- append(new_row, event_type)
    
    event_location <- str_extract_all(html[n], pattern = '(?<="organization">).*(?=</span><br><span class=\"address-line1)')[[1]][1]
    new_row <- append(new_row, event_location)
    
    event_time <- str_extract_all(html[n], pattern = "(?<=Z\">).*(?=</time>)")[[1]][1]
    new_row <- append(new_row, event_time)
    
    event_address <- str_extract_all(html[n], pattern = '(?<="address-line1\">).*(?=</span><br><span class=\"postal-code\")')[[1]][1]
    event_zip_cpde <- str_extract_all(html[n], pattern = '(?<="postal-code\">).*(?=</span> <span class=\"locality\")')[[1]][1]
    
    new_row <- append(new_row, str_c(event_address,", ",event_zip_cpde))
    
    events_stressfaktor <- rbind(events_stressfaktor, new_row)
    
  }
}
colnames(events_stressfaktor) <- c("Datum","Name","Event_Type","Location","Start","Adresse")

Heinrich böll

date <- Sys.Date()

base_url <- "https://calendar.boell.de/de/calendar/frontpage?f%5B0%5D=ort_slide_in%3A2445&f%5B1%5D=veranstaltungs_format%3A857&f%5B2%5D=veranstaltungs_format%3A859&f%5B3%5D=veranstaltungs_format%3A875&f%5B4%5D=veranstaltungs_format%3A877&f%5B5%5D=veranstaltungs_format%3A895&f%5B6%5D=veranstaltungs_format%3A4387&f%5B7%5D=veranstaltungs_format%3A4499"
html <- read_html(base_url)
t <- as.character(html_elements(html, ".eventlist-white-wrapper"))
events_boell <- data.frame(matrix(ncol = 6, nrow = 0))


for (n in 1:length(t)){
  new_row <- c()
  
  event_time <- str_extract_all(t[n], pattern = ".*(?=\n                                                    </span>\n                                                                                    <span class=\"field--city\">)")[[1]][1]
  event_time <- str_remove_all(event_time, " ")
  event_time <- str_split(event_time, ",")[[1]]
  event_duration <- event_time[3]
  event_date <- (str_c(substr(date, 1,4), substr(anydate(event_time[2]),5,10)))
  new_row <- append(new_row, event_date)  

  event_name <- str_extract_all(t[n], pattern = '(?<="event--title\">).*(?=</h1>)')[[1]]
  event_sub_name <- str_extract_all(t[n], pattern = '(?<="field--subtitle\">\n).*')[[1]]
  event_sub_name <- str_remove_all(event_sub_name, " ")
  event_name <- str_c(event_name, " ", event_sub_name)
  new_row <- append(new_row, event_name)
    
  event_type <- str_extract_all(t[n], pattern = "(?<=field--event_type\">\n).*")[[1]]
  event_type <- str_remove_all(event_type, " ")
  new_row <- append(new_row, event_type)
  new_row <- append(new_row, "Heinrich Böll Stiftung")
  new_row <- append(new_row, event_duration)
  new_row <- append(new_row, "Schumannstraße 8, 10117")
  events_boell <- rbind(events_boell, new_row)
}

colnames(events_boell) <- c("Datum","Name","Event_Type","Location","Start","Adresse")

GENERATE GEOMETRY and REMOVE NA

events <- rbind(events_stressfaktor, events_boell)

# adress to lat and long
events <- events%>%
  geocode(Adresse , method = 'osm', lat = latitude , long = longitude)

# REMOVE NA
rows_na <- c()
for (n in 1:nrow(events)){
  if (is.na(events$latitude[n])){rows_na <- append(rows_na, n)}
}
events <- events[-rows_na,]  

events <- st_as_sf(events, coords=c("longitude", "latitude"), crs="EPSG:4326")

To day

date <- Sys.Date()  #today , "%%%y-%m-%d"
events_today <- subset(events, subset = (as.Date(Datum) == date))

leaflet() %>%
  addTiles()%>%
  addCircleMarkers(data=events_today,
                   color = "red",
                   radius = 3,
                   popup = ~paste("<b>Event</b>: ", Name,
                                  "</br>",
                                  "<b>Location</b>: ", Location,
                                  "</br>",
                                  "<b>Eventtype</b>: ", Event_Type,
                                  "</br>", 
                                  "<b>Beginn:</b>", Start))
subset(events_today, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
Name Location Event_Type Start Adresse geometry
k19 Konzert K19 Café Musik/Konzert 19:00 Kreutzigerstr. 19, 10247 c(13.4600856, 52.5131608)
Philosophischer Lesekreis: Adorno - Minima Moralia Bulbul Cafe Diskussion/Vortrag, Treffen 19:00 Lucy Lameck Str. 32, 12049 c(13.4225619, 52.4838008)
Polizeireform und gewaltsames Verschwindenlassen in Kolumbien Zielona Góra Diskussion/Vortrag 19:00 Grünbergerstr. 73, 10245 c(13.4595298, 52.5114491)
SOLI KONZERT GEGEN ABSCHIEBUNG // SOLI CONCERTS AGAINST DEPORTATION Linie 206 (Umsonst)Laden/Markt, Kneipe/Café, Essen, Musik/Konzert, Party, Musik/Konzert, Kneipe/Café 19:00 Linienstr. 206, 10119 c(13.4028556, 52.5288992)
FLINTA* Rebel Rats Dienstagsküfa + FILM: Deckname Jenny (OmenU) B-Lage Kneipe/Café, Diskussion/Vortrag, Essen 19:30 Mareschstr. 1, 12055 c(13.451159, 52.4736946)
Mahall-Ohlmeier-Rupp-Fischerlehner (Impro / Jazz) Jugendwiderstandsmuseum music/concert, musique, Musik/Konzert 19:30 Rigaer Str. 9-10, 10243 c(13.4561865, 52.5177979)
Konzert im Koma F KØPI Musik/Konzert 21:00 Köpenicker Straße 137, 10179 c(13.4260264398012, 52.50770215)

tomorrow

date <- Sys.Date()+1  #today , "%%%y-%m-%d"
events_tomorrow <- subset(events, subset = (as.Date(Datum) == date))

leaflet() %>%
  addTiles()%>%
  addCircleMarkers(data=events_tomorrow,
                   color = "red",
                   radius = 3,
                   popup = ~paste("<b>Event</b>: ", Name,
                                  "</br>",
                                  "<b>Location</b>: ", Location,
                                  "</br>",
                                  "<b>Eventtype</b>: ", Event_Type,
                                  "</br>", 
                                  "<b>Beginn:</b>", Start))
subset(events_tomorrow, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
Name Location Event_Type Start Adresse geometry
Soli-Siebdruck + Live Musik im Regenbogencafé! Regenbogenfabrik Musik/Konzert, Arbeitsplatz/Selbermachen, Kneipe/Café, Essen 13:00 Lausitzer Str. 22a, 10999 c(13.4269273, 52.4954719)
Squat de la Musique: musikalische Kundgebung Rigaer 84-78 NA Aktion/Protest/Camp, Musik/Konzert 16:00 Rigaerstr, 10247 c(13.4630095, 52.5166942)
Squat de la Musique: musikalische Kundgebung "Villa Felix" Schreina47 Aktion/Protest/Camp, Musik/Konzert 16:00 Schreinerstr. 47, 10245 c(13.4673168, 52.5171407)
KünstlerInnen aus dem Kiez Wagenburg Lohmühle Musik/Konzert 18:00 Lohmühlenstr. 17, 12435 c(13.4400026, 52.48962)
Fete de la Musique in der Tristeza Tristeza Kneipe/Café, Musik/Konzert, Party 18:00 Pannierstr. 5, 12047 c(13.430953, 52.4865618)
FLINTA* Rebel Rats Dienstagsküfa + Talk on the rise of fascism in North and South Europe B-Lage Kneipe/Café, Diskussion/Vortrag, Essen 19:00 Mareschstr. 1, 12055 c(13.451159, 52.4736946)
Konzert Schokoladen Musik/Konzert 19:00 Ackerstrasse 169, 10115 c(13.3972037, 52.5297465)

day after

date <- Sys.Date() +2  #today , "%%%y-%m-%d"
events_day_after <- subset(events, subset = (as.Date(Datum) == date))

leaflet() %>%
  addTiles()%>%
  addCircleMarkers(data=events_day_after,
                   color = "red",
                   radius = 3,
                   popup = ~paste("<b>Event</b>: ", Name,
                                  "</br>",
                                  "<b>Location</b>: ", Location,
                                  "</br>",
                                  "<b>Eventtype</b>: ", Event_Type,
                                  "</br>", 
                                  "<b>Beginn:</b>", Start))
subset(events_day_after, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
Name Location Event_Type Start Adresse geometry
Konzert Schokoladen Musik/Konzert 19:00 Ackerstrasse 169, 10115 c(13.3972037, 52.5297465)
Konzert Schokoladen Musik/Konzert 19:00 Ackerstrasse 169, 10115 c(13.3972037, 52.5297465)
Konzert mit Irina Gonzàlez Baiz Musik/Konzert 20:00 Schönhauser Allee 26 A, 10435 c(13.4126884, 52.5359511)
Lateinamerika &amp; Deutschland: Partner für Umweltgerechtigkeit? AuftaktveranstaltungADLAF-Tagung2023 Heinrich Böll Stiftung Podiumsdiskussion 18.30Uhr–20.00Uhr Schumannstraße 8, 10117 c(13.3829363, 52.5238655)