knitr::opts_chunk$set(echo = TRUE)

1. Load Required Libraries

library(dplyr)
library(tidyverse)

2. Set Working Directory

setwd("C:/Users/harin/Downloads/Crypto_dataset")
getwd()

3. Data Integration

All CSV files present in the directory are loaded and combined into a single dataset.

files <- list.files(pattern = "*.csv")
crypto_data <- lapply(files, read.csv)
crypto_data <- bind_rows(crypto_data, .id = "coin")

4. Data Cleaning

crypto_data <- distinct(crypto_data)

crypto_data <- crypto_data %>%
  mutate_if(is.numeric, ~replace_na(., mean(., na.rm = TRUE)))

5. Data Selection

selected_data <- crypto_data %>%
  select(Date, Open, High, Low, Close, Volume, coin)

6. Data Transformation

selected_data$Date <- as.Date(selected_data$Date)

selected_data <- selected_data %>%
  mutate(
    log_volume = log(Volume + 1),
    close_scaled = (Close - min(Close)) /
                   (max(Close) - min(Close))
  )

7. Final Dataset Summary

summary(selected_data)
head(selected_data)

Dataset Source

The dataset was obtained from Kaggle:
https://www.kaggle.com/datasets/sudalairajkumar/cryptocurrencypricehistory

Conclusion

The dataset was preprocessed by performing data integration, data cleaning, data selection, and data transformation using R.