Ziel dieses Scraping Projekts ist es, interessante Webpages mit
Veranstaltungen in Berlin zusammenzufassen und auf einer Karte zu
visualisieren. Dir fehlt eine tolle Seite? Schreibs in die Kommentare.
:)
library(ggplot2);library(anytime);library(tidygeocoder);library(stringr);library(rvest);library(xml2);library(tidyverse);library(here);library(sf);library(leaflet) ;library(leaflet.extras);library(htmltools);library(gt);library(knitr)
Stressfaktor
date <- Sys.Date() #today , "%%%y-%m-%d"
events_stressfaktor <- data.frame(matrix(ncol = 6, nrow = 0))
for (date_n in 0:2){
date <- Sys.Date() + date_n
base_url <- str_c("https://stressfaktor.squat.net/termine/alle?f%5B0%5D=event_date%3A",date,"&f%5B1%5D=kategorie%3A10&f%5B2%5D=kategorie%3A166")
html <- read_html(base_url)
html <- as.character(html_elements(html, ".views-row")) # each entery as single row
for (n in 1:length(html)){ #by each entry since some information mid be not available
new_row <- c()
new_row <- append(new_row, as.character(date))
event_name <- str_extract_all(html[n], pattern = "(?<=hreflang=\"de\">).*(?=</a>)")[[1]][1]
new_row <- append(new_row, event_name)
event_type <- str_split(str_extract_all(html[n], pattern = '(?<="field-content\">).*(?=</span></span>)')[[1]][1], "<")[[1]][1]
new_row <- append(new_row, event_type)
event_location <- str_extract_all(html[n], pattern = '(?<="organization">).*(?=</span><br><span class=\"address-line1)')[[1]][1]
new_row <- append(new_row, event_location)
event_time <- str_extract_all(html[n], pattern = "(?<=Z\">).*(?=</time>)")[[1]][1]
new_row <- append(new_row, event_time)
event_address <- str_extract_all(html[n], pattern = '(?<="address-line1\">).*(?=</span><br><span class=\"postal-code\")')[[1]][1]
event_zip_cpde <- str_extract_all(html[n], pattern = '(?<="postal-code\">).*(?=</span> <span class=\"locality\")')[[1]][1]
new_row <- append(new_row, str_c(event_address,", ",event_zip_cpde))
events_stressfaktor <- rbind(events_stressfaktor, new_row)
}
}
colnames(events_stressfaktor) <- c("Datum","Name","Event_Type","Location","Start","Adresse")
Heinrich böll
date <- Sys.Date()
base_url <- "https://calendar.boell.de/de/calendar/frontpage?f%5B0%5D=ort_slide_in%3A2445&f%5B1%5D=veranstaltungs_format%3A857&f%5B2%5D=veranstaltungs_format%3A859&f%5B3%5D=veranstaltungs_format%3A875&f%5B4%5D=veranstaltungs_format%3A877&f%5B5%5D=veranstaltungs_format%3A895&f%5B6%5D=veranstaltungs_format%3A4387&f%5B7%5D=veranstaltungs_format%3A4499"
html <- read_html(base_url)
t <- as.character(html_elements(html, ".eventlist-white-wrapper"))
events_boell <- data.frame(matrix(ncol = 6, nrow = 0))
for (n in 1:length(t)){
new_row <- c()
event_time <- str_extract_all(t[n], pattern = ".*(?=\n </span>\n <span class=\"field--city\">)")[[1]][1]
event_time <- str_remove_all(event_time, " ")
event_time <- str_split(event_time, ",")[[1]]
event_duration <- event_time[3]
event_date <- (str_c(substr(date, 1,4), substr(anydate(event_time[2]),5,10)))
new_row <- append(new_row, event_date)
event_name <- str_extract_all(t[n], pattern = '(?<="event--title\">).*(?=</h1>)')[[1]]
event_sub_name <- str_extract_all(t[n], pattern = '(?<="field--subtitle\">\n).*')[[1]]
event_sub_name <- str_remove_all(event_sub_name, " ")
event_name <- str_c(event_name, " ", event_sub_name)
new_row <- append(new_row, event_name)
event_type <- str_extract_all(t[n], pattern = "(?<=field--event_type\">\n).*")[[1]]
event_type <- str_remove_all(event_type, " ")
new_row <- append(new_row, event_type)
new_row <- append(new_row, "Heinrich Böll Stiftung")
new_row <- append(new_row, event_duration)
new_row <- append(new_row, "Schumannstraße 8, 10117")
events_boell <- rbind(events_boell, new_row)
}
colnames(events_boell) <- c("Datum","Name","Event_Type","Location","Start","Adresse")
GENERATE GEOMETRY and REMOVE NA
events <- rbind(events_stressfaktor, events_boell)
# adress to lat and long
events <- events%>%
geocode(Adresse , method = 'osm', lat = latitude , long = longitude)
# REMOVE NA
rows_na <- c()
for (n in 1:nrow(events)){
if (is.na(events$latitude[n])){rows_na <- append(rows_na, n)}
}
events <- events[-rows_na,]
events <- st_as_sf(events, coords=c("longitude", "latitude"), crs="EPSG:4326")
To day
date <- Sys.Date() #today , "%%%y-%m-%d"
events_today <- subset(events, subset = (as.Date(Datum) == date))
leaflet() %>%
addTiles()%>%
addCircleMarkers(data=events_today,
color = "red",
radius = 3,
popup = ~paste("<b>Event</b>: ", Name,
"</br>",
"<b>Location</b>: ", Location,
"</br>",
"<b>Eventtype</b>: ", Event_Type,
"</br>",
"<b>Beginn:</b>", Start))
subset(events_today, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
| Name |
Location |
Event_Type |
Start |
Adresse |
geometry |
| k19 Konzert |
K19 Café |
Musik/Konzert |
19:00 |
Kreutzigerstr. 19, 10247 |
c(13.4600856, 52.5131608) |
| Philosophischer Lesekreis: Adorno - Minima Moralia |
Bulbul Cafe |
Diskussion/Vortrag, Treffen |
19:00 |
Lucy Lameck Str. 32, 12049 |
c(13.4225619, 52.4838008) |
| Polizeireform und gewaltsames Verschwindenlassen in Kolumbien |
Zielona Góra |
Diskussion/Vortrag |
19:00 |
Grünbergerstr. 73, 10245 |
c(13.4595298, 52.5114491) |
| SOLI KONZERT GEGEN ABSCHIEBUNG // SOLI CONCERTS AGAINST DEPORTATION |
Linie 206 |
(Umsonst)Laden/Markt, Kneipe/Café, Essen, Musik/Konzert, Party, Musik/Konzert, Kneipe/Café |
19:00 |
Linienstr. 206, 10119 |
c(13.4028556, 52.5288992) |
| FLINTA* Rebel Rats Dienstagsküfa + FILM: Deckname Jenny (OmenU) |
B-Lage |
Kneipe/Café, Diskussion/Vortrag, Essen |
19:30 |
Mareschstr. 1, 12055 |
c(13.451159, 52.4736946) |
| Mahall-Ohlmeier-Rupp-Fischerlehner (Impro / Jazz) |
Jugendwiderstandsmuseum |
music/concert, musique, Musik/Konzert |
19:30 |
Rigaer Str. 9-10, 10243 |
c(13.4561865, 52.5177979) |
| Konzert im Koma F |
KØPI |
Musik/Konzert |
21:00 |
Köpenicker Straße 137, 10179 |
c(13.4260264398012, 52.50770215) |
tomorrow
date <- Sys.Date()+1 #today , "%%%y-%m-%d"
events_tomorrow <- subset(events, subset = (as.Date(Datum) == date))
leaflet() %>%
addTiles()%>%
addCircleMarkers(data=events_tomorrow,
color = "red",
radius = 3,
popup = ~paste("<b>Event</b>: ", Name,
"</br>",
"<b>Location</b>: ", Location,
"</br>",
"<b>Eventtype</b>: ", Event_Type,
"</br>",
"<b>Beginn:</b>", Start))
subset(events_tomorrow, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
| Name |
Location |
Event_Type |
Start |
Adresse |
geometry |
| Soli-Siebdruck + Live Musik im Regenbogencafé! |
Regenbogenfabrik |
Musik/Konzert, Arbeitsplatz/Selbermachen, Kneipe/Café, Essen |
13:00 |
Lausitzer Str. 22a, 10999 |
c(13.4269273, 52.4954719) |
| Squat de la Musique: musikalische Kundgebung Rigaer 84-78 |
NA |
Aktion/Protest/Camp, Musik/Konzert |
16:00 |
Rigaerstr, 10247 |
c(13.4630095, 52.5166942) |
| Squat de la Musique: musikalische Kundgebung "Villa Felix" |
Schreina47 |
Aktion/Protest/Camp, Musik/Konzert |
16:00 |
Schreinerstr. 47, 10245 |
c(13.4673168, 52.5171407) |
| KünstlerInnen aus dem Kiez |
Wagenburg Lohmühle |
Musik/Konzert |
18:00 |
Lohmühlenstr. 17, 12435 |
c(13.4400026, 52.48962) |
| Fete de la Musique in der Tristeza |
Tristeza |
Kneipe/Café, Musik/Konzert, Party |
18:00 |
Pannierstr. 5, 12047 |
c(13.430953, 52.4865618) |
| FLINTA* Rebel Rats Dienstagsküfa + Talk on the rise of fascism in North and South Europe |
B-Lage |
Kneipe/Café, Diskussion/Vortrag, Essen |
19:00 |
Mareschstr. 1, 12055 |
c(13.451159, 52.4736946) |
| Konzert |
Schokoladen |
Musik/Konzert |
19:00 |
Ackerstrasse 169, 10115 |
c(13.3972037, 52.5297465) |
day after
date <- Sys.Date() +2 #today , "%%%y-%m-%d"
events_day_after <- subset(events, subset = (as.Date(Datum) == date))
leaflet() %>%
addTiles()%>%
addCircleMarkers(data=events_day_after,
color = "red",
radius = 3,
popup = ~paste("<b>Event</b>: ", Name,
"</br>",
"<b>Location</b>: ", Location,
"</br>",
"<b>Eventtype</b>: ", Event_Type,
"</br>",
"<b>Beginn:</b>", Start))
subset(events_day_after, select = c(Name, Location, Event_Type, Start, Adresse))%>%gt()
| Name |
Location |
Event_Type |
Start |
Adresse |
geometry |
| Konzert |
Schokoladen |
Musik/Konzert |
19:00 |
Ackerstrasse 169, 10115 |
c(13.3972037, 52.5297465) |
| Konzert |
Schokoladen |
Musik/Konzert |
19:00 |
Ackerstrasse 169, 10115 |
c(13.3972037, 52.5297465) |
| Konzert mit Irina Gonzàlez |
Baiz |
Musik/Konzert |
20:00 |
Schönhauser Allee 26 A, 10435 |
c(13.4126884, 52.5359511) |
| Lateinamerika & Deutschland: Partner für Umweltgerechtigkeit? AuftaktveranstaltungADLAF-Tagung2023 |
Heinrich Böll Stiftung |
Podiumsdiskussion |
18.30Uhr–20.00Uhr |
Schumannstraße 8, 10117 |
c(13.3829363, 52.5238655) |