# Install necessary packages
packages <- c("jsonlite", "htmlTable", "XML", "arrow")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
library(jsonlite)
library(htmlTable)
library(XML)
library(arrow)
## 
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
## 
##     timestamp
# Create the original dataset as a data frame
data <- data.frame(
  Category = c("Electronics", "Electronics", "Electronics", "Electronics", 
               "Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances", 
               "Clothing", "Clothing", "Clothing", "Clothing", 
               "Books", "Books", "Books", "Books", 
               "Sports Equipment", "Sports Equipment", "Sports Equipment", "Sports Equipment"),
  Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop", 
                "Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine", 
                "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", 
                "Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide", 
                "Basketball", "Tennis Racket", "Tennis Racket", "Tennis Racket"),
  Item_ID = c(101, 101, 102, 102, 201, 201, 202, 202, 301, 301, 301, 302, 401, 401, 402, 402, 501, 502, 502, 502),
  Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand", 
            "HomeCool", "HomeCool", "CleanTech", "CleanTech", 
            "FashionCo", "FashionCo", "FashionCo", "DenimWorks", 
            "-", "-", "-", "-", 
            "SportsGear", "RacketPro", "RacketPro", "RacketPro"),
  Price = c(699.99, 699.99, 1099.99, 1099.99, 899.99, 899.99, 499.99, 499.99, 
            19.99, 19.99, 19.99, 49.99, 14.99, 14.99, 24.99, 24.99, 29.99, 89.99, 89.99, 89.99),
  Variation_ID = c("101-A", "101-B", "102-A", "102-B", "201-A", "201-B", "202-A", "202-B", 
                   "301-A", "301-B", "301-C", "302-A", "401-A", "401-B", "402-A", "402-B", 
                   "501-A", "502-A", "502-B", "502-B"),
  Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB", 
                        "Color: Silver, Storage: 256GB", "Color: Space Gray, Storage: 512GB", 
                        "Color: Stainless Steel, Capacity: 20 cu ft", "Color: White, Capacity: 18 cu ft", 
                        "Type: Front Load, Capacity: 4.5 cu ft", "Type: Top Load, Capacity: 5.0 cu ft", 
                        "Color: Blue, Size: S", "Color: Red, Size: M", "Color: Green, Size: L", 
                        "Color: Dark Blue, Size: 32", "Format: Hardcover, Language: English", 
                        "Format: Paperback, Language: Spanish", "Format: eBook, Language: English", 
                        "Format: Paperback, Language: French", "Size: Size 7, Color: Orange", 
                        "Material: Graphite, Color: Black", "Material: Aluminum, Color: Silver", 
                        "Material: Aluminum, Color: Silver")
)

Convert to JSON and Import Back

# 1. Convert to JSON and Import Back
json_data <- toJSON(data, pretty = TRUE)
write(json_data, "CUNYMart_data.json")
imported_json_data <- fromJSON("CUNYMart_data.json")

# Print JSON Imported Data
print("JSON Data Imported:")
## [1] "JSON Data Imported:"
print(head(imported_json_data))
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Pros and Cons of Json:

Pros: Easy to read and write, widely used in web APIs, supports hierarchical data structures. Cons: Can be less efficient for very large datasets, lacks schema validation.

Convert to HTML and Import Back

# 2. Convert to HTML and Import Back
html_data <- htmlTable(data)
write(html_data, "CUNYMart_data.html")
html_raw <- readLines("CUNYMart_data.html")  # Importing as plain text

# Print HTML Imported Data (raw text)
print("HTML Data Imported (raw text):")
## [1] "HTML Data Imported (raw text):"
print(head(html_raw))
## [1] "<table class='gmisc_table' style='border-collapse: collapse; margin-top: 1em; margin-bottom: 1em;' >"                       
## [2] "<thead>"                                                                                                                    
## [3] "<tr><th style='border-bottom: 1px solid grey; border-top: 2px solid grey;'></th>"                                           
## [4] "<th style='font-weight: 900; border-bottom: 1px solid grey; border-top: 2px solid grey; text-align: center;'>Category</th>" 
## [5] "<th style='font-weight: 900; border-bottom: 1px solid grey; border-top: 2px solid grey; text-align: center;'>Item_Name</th>"
## [6] "<th style='font-weight: 900; border-bottom: 1px solid grey; border-top: 2px solid grey; text-align: center;'>Item_ID</th>"

Convert to XML and Import Back

# 3. Convert to XML and Import Back
xml_data <- newXMLNode("Inventory")
for (i in 1:nrow(data)) {
  item_node <- newXMLNode("Item", parent = xml_data)
  for (col in names(data)) {
    newXMLNode(col, data[i, col], parent = item_node)
  }
}
saveXML(xml_data, file = "CUNYMart_data.xml")
## [1] "CUNYMart_data.xml"
imported_xml_data <- xmlToDataFrame("CUNYMart_data.xml")

# Print XML Imported Data
print("XML Data Imported:")
## [1] "XML Data Imported:"
print(head(imported_xml_data))
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Pros: Supports complex hierarchical structures, provides validation (DTD/XSD). Cons: Verbose and large files, slower processing compared to JSON.

Convert to Parquet and Import Back

# 4. Convert to Parquet and Import Back
write_parquet(data, "CUNYMart_data.parquet")
imported_parquet_data <- read_parquet("CUNYMart_data.parquet")

# Print Parquet Imported Data
print("Parquet Data Imported:")
## [1] "Parquet Data Imported:"
print(head(imported_parquet_data))
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft

Pros and Cons of Parquet:

Pros: Efficient for large datasets, optimized for analytical queries, compact storage format. Cons: Less human-readable, not as commonly used as JSON/XML for web data exchange.