Library
# Load required libraries
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Warning: package 'tidytext' was built under R version 4.4.3
Step 1: Get article URLs from Kompas Money
get_finansial_articles <- function(page_num = 1, max_articles = 100) {
if (page_num == 1) {
url <- "https://indeks.kompas.com/?source=navbar&site=money"
} else {
url <- paste0("https://indeks.kompas.com/?source=navbar&site=money&page=", page_num)
}
cat("Accessing URL:", url, "\n")
tryCatch({
page <- read_html(url)
# Ambil semua tag <a>
links <- html_nodes(page, "a")
hrefs <- html_attr(links, "href")
hrefs <- hrefs[!is.na(hrefs)]
# Filter hanya link artikel Kompas Money
article_links <- unique(hrefs[grepl("^https://money\\.kompas\\.com/read", hrefs)])
# Ambil teks dari <a> yang cocok
article_texts <- html_text(links[grepl("^https://money\\.kompas\\.com/read", hrefs)], trim = TRUE)
# Judul fallback jika kosong
article_titles <- ifelse(article_texts == "" | is.na(article_texts),
paste("Article", seq_along(article_links)),
article_texts)
# Batasi jumlah artikel
n <- min(length(article_links), max_articles)
df <- data.frame(
url = article_links[1:n],
title = article_titles[1:n],
stringsAsFactors = FALSE
)
cat("Successfully extracted", nrow(df), "article URLs\n")
return(df)
}, error = function(e) {
cat("Error:", e$message, "\n")
return(data.frame(url = character(0), title = character(0), stringsAsFactors = FALSE))
})
}
library(knitr)
komparasi <- data.frame(
Komponen = c(
"Selector HTML",
"Ekstraksi Judul",
"Struktur Halaman",
"URL Pattern",
"Fallback Judul",
"Normalisasi URL"
),
Kompas = c(
'`html_nodes("a")` lalu filter URL',
'Teks langsung dari `<a>`',
'Lebih konsisten',
'`money.kompas.com/read`',
'`paste("Article", i)` kalau kosong',
'Tidak diperlukan (sudah absolut)'
),
Detik = c(
'`html_nodes("article")`, cek `<a>` & judul',
'Cek `<a>`, lalu `<h2>`, `.title`, dll',
'Harus pakai fallback selector (`.media__title`, dll)',
'`finance.detik.com/...`',
'Sama, tapi pakai `html_text()` fallback dari elemen lain',
'Diperlukan (kadang relatif seperti `/read/...`)'
)
)
kable(komparasi, format = "html", escape = FALSE, caption = "Perbandingan Ekstraksi Artikel Kompas vs Detik")
Perbandingan Ekstraksi Artikel Kompas vs Detik
Komponen
|
Kompas
|
Detik
|
Selector HTML
|
html_nodes("a") lalu filter URL
|
html_nodes("article") , cek <a> &
judul
|
Ekstraksi Judul
|
Teks langsung dari <a>
|
Cek <a> , lalu <h2> ,
.title , dll
|
Struktur Halaman
|
Lebih konsisten
|
Harus pakai fallback selector (.media__title , dll)
|
URL Pattern
|
money.kompas.com/read
|
finance.detik.com/...
|
Fallback Judul
|
paste("Article", i) kalau kosong
|
Sama, tapi pakai html_text() fallback dari elemen lain
|
Normalisasi URL
|
Tidak diperlukan (sudah absolut)
|
Diperlukan (kadang relatif seperti /read/... )
|
Modifikasi utama pada fungsi Kompas adalah karena struktur halaman
lebih konsisten dan link artikelnya sudah jelas dalam dengan domain
dan pola tetap (money.kompas.com/read). Sehingga tidak perlu selektor
kompleks seperti pada versi Detik.
Step 2: Function to extract article content from Kompas Money
extract_article_content <- function(url) {
cat("Crawling article:", url, "\n")
tryCatch({
page <- read_html(url)
# Helper: try multiple selectors
try_selectors <- function(selectors, default = NA, type = "text", attr = NULL) {
for (selector in selectors) {
element <- page %>% html_node(selector)
if (!is.null(element) && !inherits(element, "xml_missing")) {
if (type == "text") {
content <- element %>% html_text(trim = TRUE)
if (!is.na(content) && content != "") return(content)
} else if (type == "attr" && !is.null(attr)) {
content <- element %>% html_attr(attr)
if (!is.na(content) && content != "") return(content)
} else if (type == "nodes") {
nodes <- page %>% html_nodes(selector)
if (length(nodes) > 0) {
content <- nodes %>% html_text(trim = TRUE) %>% paste(collapse = "\n\n")
if (!is.na(content) && content != "") return(content)
}
}
}
}
return(default)
}
# ==== Extract each component ====
# Title
title <- try_selectors(c("h1.read__title", "h1", ".article__title"), "No Title Found")
# Author
# Author
author <- try_selectors(c("div.credit-title-name"), "Unknown Author")
# Date
date_raw <- try_selectors(c(".read__time", ".article__date", ".date"), NA)
date_clean <- str_extract(date_raw, "\\d{1,2}/\\d{1,2}/\\d{4}")
parsed_date <- tryCatch({
as.Date(date_clean, format = "%d/%m/%Y")
}, error = function(e) Sys.Date())
if (is.na(parsed_date)) {
date_match <- str_extract(date_raw, "\\d{4}-\\d{2}-\\d{2}")
if (!is.na(date_match)) {
parsed_date <- as.Date(date_match)
} else {
parsed_date <- Sys.Date()
}
}
# Content
content <- try_selectors(c(".read__content p", ".read__content", "article p", ".article__body"), "No content found", "nodes")
if (content == "No content found") {
content <- try_selectors(c(".read__content", ".article-content", "article"), "No content found")
}
# Image
image_url <- try_selectors(c(".photo img", ".photo__wrap img", "figure img", "img"), NA, "attr", "src")
# Category (from breadcrumb or meta tag)
category <- try_selectors(c(".breadcrumb__item a", ".breadcrumb a", "meta[property='article:section']"), "Money")
# Tags (usually not available, but check anyway)
tags <- try_selectors(c(".tag a", ".tags a", ".read__tag a"), "", "nodes")
# Return as data frame row
return(data.frame(
url = url,
title = title,
author = author,
date = parsed_date,
publish_date_raw = date_raw,
content = content,
image_url = image_url,
category = category,
tags = tags,
crawl_time = Sys.time(),
stringsAsFactors = FALSE
))
}, error = function(e) {
cat("Error extracting content from", url, ":", e$message, "\n")
return(data.frame(
url = url,
title = "Error extracting content",
author = NA,
date = as.Date(Sys.Date()),
publish_date_raw = NA,
content = paste("Error:", e$message),
image_url = NA,
category = NA,
tags = NA,
crawl_time = Sys.time(),
stringsAsFactors = FALSE
))
})
}
knitr::kable(
data.frame(
Komponen = c(
"Struktur Halaman", "Title", "Author", "Tanggal Publikasi",
"Konten Artikel", "Gambar", "Kategori", "Tags", "Parsing Tanggal", "Error Handling"
),
Kompas = c(
"Konsisten, link artikel sudah jelas dengan pola domain `money.kompas.com/read`",
"`h1.read__title`, `h1`, `.article__title`",
"`div.credit-title-name`",
"`.read__time`, `.article__date`, atau `.date` → parsing `%d/%m/%Y`",
"`.read__content p`, `article p`, `.article__body`, fallback ke `.read__content`",
"`.photo img`, `figure img`, `img`",
"`.breadcrumb__item a`, `meta[property='article:section']`",
"`.read__tag a`, `.tags a`",
"Jika format gagal, fallback ke `Sys.Date()` atau regex untuk pola tanggal",
"Langsung kembali `data.frame` default dengan pesan error jika gagal"
),
Detik = c(
"Lebih kompleks, membutuhkan banyak fallback dan kombinasi selector",
"Banyak variasi selector: `h1.detail__title`, `.article-title h1`, `h1`, dll.",
"`span.author`, `.byline`, `.detail__author`, dsb.",
"Lebih banyak variasi dan format tanggal (hingga 5 percobaan format parsing)",
"`.detail__body-text p`, `article-content p`, fallback ke area konten penuh",
"`.detail__media-image img`, `figure img`, `article img`",
"`.breadcrumb__item`, `.nav__item--active`, `breadcrumb a`",
"`.detail__body-tag a`, `.tags a`",
"Mencoba berbagai format `%d %b %Y`, `%d/%m/%Y`, regex fallback lebih kompleks",
"Sama, tapi proses parsing tanggal lebih ketat dan spesifik"
),
check.names = FALSE
),
caption = "Tabel Perbandingan Struktur Fungsi `extract_article_content` untuk Kompas dan Detik",
format = "html",
escape = FALSE
)
Tabel Perbandingan Struktur Fungsi extract_article_content
untuk Kompas dan Detik
Komponen
|
Kompas
|
Detik
|
Struktur Halaman
|
Konsisten, link artikel sudah jelas dengan pola domain
money.kompas.com/read
|
Lebih kompleks, membutuhkan banyak fallback dan kombinasi selector
|
Title
|
h1.read__title , h1 ,
.article__title
|
Banyak variasi selector: h1.detail__title ,
.article-title h1 , h1 , dll.
|
Author
|
div.credit-title-name
|
span.author , .byline ,
.detail__author , dsb.
|
Tanggal Publikasi
|
.read__time , .article__date , atau
.date → parsing %d/%m/%Y
|
Lebih banyak variasi dan format tanggal (hingga 5 percobaan format
parsing)
|
Konten Artikel
|
.read__content p , article p ,
.article__body , fallback ke .read__content
|
.detail__body-text p , article-content p ,
fallback ke area konten penuh
|
Gambar
|
.photo img , figure img , img
|
.detail__media-image img , figure img ,
article img
|
Kategori
|
.breadcrumb__item a ,
meta[property='article:section']
|
.breadcrumb__item , .nav__item--active ,
breadcrumb a
|
Tags
|
.read__tag a , .tags a
|
.detail__body-tag a , .tags a
|
Parsing Tanggal
|
Jika format gagal, fallback ke Sys.Date() atau regex untuk
pola tanggal
|
Mencoba berbagai format %d %b %Y , %d/%m/%Y ,
regex fallback lebih kompleks
|
Error Handling
|
Langsung kembali data.frame default dengan pesan error jika
gagal
|
Sama, tapi proses parsing tanggal lebih ketat dan spesifik
|
Step 3: Function to analyze financial news articles
analyze_financial_news <- function(articles_df) {
# Ensure we have articles to analyze
if (nrow(articles_df) == 0) {
cat("No articles to analyze\n")
return(NULL)
}
# Create analysis result structure
analysis_results <- list()
# 1. Basic statistics
analysis_results$basic_stats <- list(
total_articles = nrow(articles_df),
date_range = range(articles_df$date, na.rm = TRUE),
unique_authors = length(unique(articles_df$author[!is.na(articles_df$author)])),
avg_content_length = mean(nchar(articles_df$content), na.rm = TRUE)
)
# 2. Time series analysis
if ("date" %in% names(articles_df) && any(!is.na(articles_df$date))) {
# Count articles by date
articles_by_date <- articles_df %>%
group_by(date) %>%
summarise(count = n()) %>%
arrange(date)
analysis_results$time_series <- articles_by_date
}
# 3. Author analysis
if ("author" %in% names(articles_df) && any(!is.na(articles_df$author))) {
# Count articles by author
articles_by_author <- articles_df %>%
filter(!is.na(author) & author != "Unknown Author") %>%
group_by(author) %>%
summarise(
count = n(),
avg_length = mean(nchar(content), na.rm = TRUE)
) %>%
arrange(desc(count))
analysis_results$author_stats <- head(articles_by_author, 10)
}
# 4. Text analysis - requires tidytext
if ("content" %in% names(articles_df) && any(!is.na(articles_df$content))) {
# Define Indonesian stopwords
id_stopwords <- c("yang", "dan", "di", "dengan", "untuk", "pada", "ini", "dari", "dalam", "akan",
"tidak", "juga", "ke", "atau", "tersebut", "bisa", "oleh", "ada", "itu", "jika",
"telah", "sudah", "seperti", "karena", "hanya", "bahwa", "saat", "untuk", "itu")
# Tokenize content and remove stopwords
word_counts <- articles_df %>%
unnest_tokens(word, content) %>%
filter(
!word %in% id_stopwords,
nchar(word) > 2,
!grepl("^\\d+$", word) # Remove numbers
) %>%
count(word, sort = TRUE)
analysis_results$top_words <- head(word_counts, 20)
# Find common bigrams
bigrams <- articles_df %>%
unnest_tokens(bigram, content, token = "ngrams", n = 2) %>%
filter(!is.na(bigram)) %>%
count(bigram, sort = TRUE)
analysis_results$top_bigrams <- head(bigrams, 20)
# Sentiment analysis (if you have a sentiment lexicon for Indonesian)
# This is a placeholder - you would need an actual sentiment lexicon
# analysis_results$sentiment <- sentiment_analysis_results
}
# 5. Tag/Category analysis
if ("tags" %in% names(articles_df) && any(!is.na(articles_df$tags))) {
# Split tags and count
all_tags <- unlist(strsplit(paste(articles_df$tags, collapse = ", "), ", "))
tag_counts <- table(all_tags)
tag_df <- data.frame(
tag = names(tag_counts),
count = as.numeric(tag_counts),
stringsAsFactors = FALSE
) %>%
arrange(desc(count))
analysis_results$tag_stats <- head(tag_df, 15)
}
# 6. Entity recognition (simplified)
# Look for monetary values
if ("content" %in% names(articles_df) && any(!is.na(articles_df$content))) {
# Pattern for Indonesian Rupiah amounts
rupiah_pattern <- "Rp\\s*\\d+[\\.,]?\\d*\\s*(?:ribu|juta|miliar|triliun)?"
all_content <- paste(articles_df$content, collapse = " ")
rupiah_values <- str_extract_all(all_content, rupiah_pattern)[[1]]
analysis_results$rupiah_mentions <- head(table(rupiah_values), 10)
# Look for company names (simplified approach)
company_keywords <- c("PT", "Tbk", "Persero", "Bank", "Perusahaan", "Grup")
company_pattern <- paste0("(?:[A-Z][a-z]+\\s+)+(?:", paste(company_keywords, collapse = "|"), ")")
company_mentions <- str_extract_all(all_content, company_pattern)[[1]]
analysis_results$company_mentions <- head(table(company_mentions), 10)
}
return(analysis_results)
}
Main execution function
crawl_and_analyze_financial_news <- function(max_pages = 3, max_articles_per_page = 20, delay = 2) {
# Step 1: Collect article links
cat("==== COLLECTING ARTICLE LINKS ====\n")
all_articles <- data.frame(url = character(), title = character(), stringsAsFactors = FALSE)
for (page in 1:max_pages) {
cat("\n----- Processing page", page, "-----\n")
page_articles <- get_finansial_articles(page, max_articles_per_page)
if (nrow(page_articles) > 0) {
# Remove duplicates based on URL
all_articles <- rbind(all_articles, page_articles)
all_articles <- all_articles[!duplicated(all_articles$url), ]
cat("Total unique articles collected so far:", nrow(all_articles), "\n")
# Add delay between requests
if (page < max_pages) {
cat("Waiting", delay, "seconds before next page...\n")
Sys.sleep(delay)
}
} else {
cat("No articles found on this page or reached the end\n")
break
}
}
if (nrow(all_articles) == 0) {
cat("No articles were found. Exiting.\n")
return(NULL)
}
# Step 2: Crawl each article
cat("\n==== CRAWLING INDIVIDUAL ARTICLES ====\n")
articles_content <- data.frame()
for (i in 1:nrow(all_articles)) {
cat("\nProcessing article", i, "of", nrow(all_articles), "\n")
article_data <- extract_article_content(all_articles$url[i])
if (!is.null(article_data) && nrow(article_data) > 0) {
articles_content <- rbind(articles_content, article_data)
}
# Add delay between requests
if (i < nrow(all_articles)) {
cat("Waiting", delay, "seconds before next article...\n")
Sys.sleep(delay)
}
}
if (nrow(articles_content) == 0) {
cat("Failed to extract content from any articles. Exiting.\n")
return(NULL)
}
# Save the raw data
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
csv_filename <- paste0("detik_finansial_articles_", timestamp, ".csv")
write.csv(articles_content, csv_filename, row.names = FALSE)
cat("\nRaw article data saved to", csv_filename, "\n")
# Step 3: Analyze the articles
cat("\n==== ANALYZING ARTICLES ====\n")
analysis_results <- analyze_financial_news(articles_content)
# Step 4: Visualize the analysis
cat("\n==== CREATING VISUALIZATIONS ====\n")
visualizations <- visualize_analysis(analysis_results, articles_content)
# Print text-based analysis results
cat("\n==== ANALYSIS RESULTS ====\n")
cat("\nBasic Statistics:\n")
cat("Total articles:", analysis_results$basic_stats$total_articles, "\n")
cat("Date range:", format(analysis_results$basic_stats$date_range[1]), "to",
format(analysis_results$basic_stats$date_range[2]), "\n")
cat("Unique authors:", analysis_results$basic_stats$unique_authors, "\n")
cat("Average content length:", round(analysis_results$basic_stats$avg_content_length), "characters\n")
cat("\nTop 10 Words:\n")
print(head(analysis_results$top_words, 10))
cat("\nTop 5 Authors:\n")
print(head(analysis_results$author_stats, 5))
cat("\nTop 5 Tags:\n")
print(head(analysis_results$tag_stats, 5))
# Save plots if available
if (length(visualizations) > 0) {
# Requires the gridExtra package for arranging multiple plots
if (require(gridExtra)) {
pdf_filename <- paste0("kompas_finansial_analysis_", timestamp, ".pdf")
pdf(pdf_filename, width = 10, height = 8)
# Arrange and print plots
plot_list <- list()
for (name in names(visualizations)) {
plot_list[[name]] <- visualizations[[name]]
}
# Print each plot on its own page
for (plot in plot_list) {
print(plot)
}
dev.off()
cat("\nVisualizations saved to", pdf_filename, "\n")
} else {
cat("\nCould not save visualizations. The gridExtra package is required.\n")
}
}
# Return the complete results
return(list(
articles = articles_content,
analysis = analysis_results,
visualizations = visualizations
))
}
Execute the full workflow
cat("\n====== STARTING FINANCIAL NEWS CRAWLING AND ANALYSIS ======\n")
##
## ====== STARTING FINANCIAL NEWS CRAWLING AND ANALYSIS ======
result <- crawl_and_analyze_financial_news(max_pages = 2, max_articles_per_page = 10, delay = 3)
## ==== COLLECTING ARTICLE LINKS ====
##
## ----- Processing page 1 -----
## Accessing URL: https://indeks.kompas.com/?source=navbar&site=money
## Successfully extracted 10 article URLs
## Total unique articles collected so far: 10
## Waiting 3 seconds before next page...
##
## ----- Processing page 2 -----
## Accessing URL: https://indeks.kompas.com/?source=navbar&site=money&page=2
## Successfully extracted 10 article URLs
## Total unique articles collected so far: 20
##
## ==== CRAWLING INDIVIDUAL ARTICLES ====
##
## Processing article 1 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/110000926/lp3es--pembebasan-kuota-impor-bisa-jadi-alternatif-asal-dibarengi-tarif-
## Waiting 3 seconds before next article...
##
## Processing article 2 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/104439826/viral-video-parkir-rp-60000-di-pasar-tanah-abang-ylki-meresahkan-pembeli-bisa
## Waiting 3 seconds before next article...
##
## Processing article 3 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/103521926/danantara-dan-badan-investasi-qatar-kelola-dana-bersama-rp-672-triliun
## Waiting 3 seconds before next article...
##
## Processing article 4 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/103232126/saham-fore-ara-lagi-usai-resmi-melantai-di-bursa-efek
## Waiting 3 seconds before next article...
##
## Processing article 5 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/102411326/kata-ibu-rumah-tangga-soal-tutupnya-tupperware-ada-yang-lebih-murah
## Waiting 3 seconds before next article...
##
## Processing article 6 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/101819826/harga-emas-terbaru-hari-ini-15-april-2025-di-pegadaian
## Waiting 3 seconds before next article...
##
## Processing article 7 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/101412726/mdla-resmi-melantai-di-bursa-himpun-dana-rp-685-miliar-lewat-ipo
## Waiting 3 seconds before next article...
##
## Processing article 8 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/101409826/simak-daftar-kurs-rupiah-di-5-bank-besar-indonesia
## Waiting 3 seconds before next article...
##
## Processing article 9 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/100000326/plts-terapung-tembesi-di-batam-dapat-suntikan-dana-siap-masuk-tahap-konstruksi
## Waiting 3 seconds before next article...
##
## Processing article 10 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/095745726/pembuatan-npwp-online-berapa-lama
## Waiting 3 seconds before next article...
##
## Processing article 11 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/084758926/indonesia-jajaki-fta-dengan-rusia-dan-eurasia-target-selesai-semester-i-2025
## Waiting 3 seconds before next article...
##
## Processing article 12 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/083541926/antam-catat-penjualan-emas-tertinggi-sepanjang-sejarah-pada-2024
## Waiting 3 seconds before next article...
##
## Processing article 13 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/082213926/indonesia-siap-negosiasi-dengan-trump-bawa-strategi-apa-saja
## Waiting 3 seconds before next article...
##
## Processing article 14 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/081632926/harga-emas-dunia-melemah-setelah-cetak-rekor-tertinggi
## Waiting 3 seconds before next article...
##
## Processing article 15 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/081147326/ihsg-berpeluang-lanjutkan-kenaikan-simak-analisis-dan-saham-pilihan
## Waiting 3 seconds before next article...
##
## Processing article 16 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/075942826/prabowo-beli-banyak-fosfat-dari-yordania-untuk-bahan-baku-pupuk-indonesia
## Waiting 3 seconds before next article...
##
## Processing article 17 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/075602326/menaker-eks-pegawai-sritex-segera-kembali-bekerja-tunggu-proses-kurator-dan
## Waiting 3 seconds before next article...
##
## Processing article 18 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/075126026/ipo-fore-cetak-ara-di-tengah-pasar-saham-yang-bergejolak
## Waiting 3 seconds before next article...
##
## Processing article 19 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/074736326/impor-garam-industri-dihentikan-pengusaha-soroti-kualitas-dan-cuaca
## Waiting 3 seconds before next article...
##
## Processing article 20 of 20
## Crawling article: https://money.kompas.com/read/2025/04/15/074107126/ramai-di-medsos-maxim-pastikan-tak-ada-pungutan-biaya-untuk-driver
##
## Raw article data saved to detik_finansial_articles_20250415_111117.csv
##
## ==== ANALYZING ARTICLES ====
##
## ==== CREATING VISUALIZATIONS ====
##
## ==== ANALYSIS RESULTS ====
##
## Basic Statistics:
## Total articles: 20
## Date range: 2025-04-15 to 2025-04-15
## Unique authors: 11
## Average content length: 2280 characters
##
## Top 10 Words:
## word n
## 1 indonesia 60
## 2 baca 42
## 3 harga 39
## 4 saham 34
## 5 emas 30
## 6 fore 30
## 7 jakarta 30
## 8 impor 28
## 9 pasar 27
## 10 produk 27
##
## Top 5 Authors:
## # A tibble: 5 × 3
## author count avg_length
## <chr> <int> <dbl>
## 1 "Agustinus Rangga Respati,\n Teuku Mu… 5 2532
## 2 "Dian Erika Nugraheny,\n Teuku Muhamm… 4 2434.
## 3 "Elsa Catriana,\n Teuku Muhammad Vald… 2 1814
## 4 "Nirmala Maulana Achmad,\n Aprillia I… 2 2552
## 5 "Agustinus Rangga Respati,\n Aprillia… 1 1718
##
## Top 5 Tags:
## tag
## 1 cara buat npwp online\n\npembuatan NPWP online berapa lama\n\nberapa lama buat NPWP
## 2 emas\n\nAntam\n\nemas Antam
## 3 Fore Coffee\n\nFore Kopi IPO\n\nFore IPO\n\nFore Kopi
## 4 fore\n\nSaham Fore ARA\n\nfore ara\n\nsaham kopi fore ARA
## 5 harga emas UBS\n\nharga emas batangan\n\nharga emas\n\nharga emas galeri 24\n\nharga emas hari ini\n\nharga emas antam
## count
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
##
## Visualizations saved to kompas_finansial_analysis_20250415_111117.pdf
cat("\n====== FINISHED FINANCIAL NEWS CRAWLING AND ANALYSIS ======\n")
##
## ====== FINISHED FINANCIAL NEWS CRAWLING AND ANALYSIS ======