knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(tidyverse)
setwd("C:/Users/harin/Downloads/Crypto_dataset")
getwd()
All CSV files present in the directory are loaded and combined into a single dataset.
files <- list.files(pattern = "*.csv")
crypto_data <- lapply(files, read.csv)
crypto_data <- bind_rows(crypto_data, .id = "coin")
crypto_data <- distinct(crypto_data)
crypto_data <- crypto_data %>%
mutate_if(is.numeric, ~replace_na(., mean(., na.rm = TRUE)))
selected_data <- crypto_data %>%
select(Date, Open, High, Low, Close, Volume, coin)
selected_data$Date <- as.Date(selected_data$Date)
selected_data <- selected_data %>%
mutate(
log_volume = log(Volume + 1),
close_scaled = (Close - min(Close)) /
(max(Close) - min(Close))
)
summary(selected_data)
head(selected_data)
The dataset was obtained from Kaggle:
https://www.kaggle.com/datasets/sudalairajkumar/cryptocurrencypricehistory
The dataset was preprocessed by performing data integration, data cleaning, data selection, and data transformation using R.