Library

library(rjson)
library(RCurl)
library(rvest)
library(selectr)
library(httr)
library(XML)
library(stringr)
library(arrow)
## 
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
## 
##     timestamp
library(ROAuth)
library(gt)
library(knitr)

JSON format

cunymart_data_json <- list(
  list(Category = "Electronics", Item_Name = "Smartphone", Item_ID = 101, Brand = "TechBrand", Price = 699.99,
       Variation_ID = "101-A", Variation_Details = "Color: Black, Storage: 64GB"),
  list(Category = "Electronics", Item_Name = "Smartphone", Item_ID = 101, Brand = "TechBrand", Price = 699.99,
       Variation_ID = "101-B", Variation_Details = "Color: White, Storage: 128GB"),
  list(Category = "Electronics", Item_Name = "Laptop", Item_ID = 102, Brand = "CompuBrand", Price = 1099.99, 
       Variation_ID = "102-A", Variation_Details = "Color: Silver, Storage: 256GB"),
  list(Category = "Electronics", Item_Name = "Laptop", Item_ID = 102, Brand = "CompuBrand", Price = 1099.99,
       Variation_ID = "102-B", Variation_Details = "Color: Space Gray, Storage: 512GB"),
  list(Category = "Home Appliances", Item_Name = "Refrigerator", Item_ID = 201, Brand = "HomeCool", Price = 899.99,
       Variation_ID = "201-A", Variation_Details = "Color: Stainless Steel, Capacity: 20 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Refrigerator", Item_ID = 201, Brand = "HomeCool", Price = 899.99,
       Variation_ID = "201-B", Variation_Details = "Color: White, Capacity: 18 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Washing Machine", Item_ID = 202, Brand = "CleanTech", Price = 499.99,
       Variation_ID = "202-A", Variation_Details = "Type: Front Load, Capacity: 4.5 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Washing Machine", Item_ID = 202, Brand = "CleanTech", Price = 499.99,
       Variation_ID = "202-B", Variation_Details = "Type: Top Load, Capacity: 5.0 cu ft"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-A", Variation_Details = "Color: Blue, Size: S"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-B", Variation_Details = "Color: Red, Size: M"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-C", Variation_Details = "Color: Green, Size: L"),
  list(Category = "Clothing", Item_Name = "Jeans", Item_ID = 302, Brand = "DenimWorks", Price = 49.99,
       Variation_ID = "302-A", Variation_Details = "Color: Dark Blue, Size: 32"),
  list(Category = "Clothing", Item_Name = "Jeans", Item_ID = 302, Brand = "DenimWorks", Price = 49.99,
       Variation_ID = "302-B", Variation_Details = "Color: Light Blue, Size: 34"),
  list(Category = "Books", Item_Name = "Fiction Novel", Item_ID = 401, Brand = "-", Price = 14.99,
       Variation_ID = "401-A", Variation_Details = "Format: Hardcover, Language: English"),
  list(Category = "Books", Item_Name = "Fiction Novel", Item_ID = 401, Brand = "-", Price = 14.99,
       Variation_ID = "401-B", Variation_Details = "Format: Paperback, Language: Spanish"),
  list(Category = "Books", Item_Name = "Non-Fiction Guide", Item_ID = 402, Brand = "-", Price = 24.99,
       Variation_ID = "402-A", Variation_Details = "Format: eBook, Language: English"),
  list(Category = "Books", Item_Name = "Non-Fiction Guide", Item_ID = 402, Brand = "-", Price = 24.99,
       Variation_ID = "402-B", Variation_Details = "Format: Paperback, Language: French"),
  list(Category = "Sports Equipment", Item_Name = "Basketball", Item_ID = 501, Brand = "SportsGear", Price = 29.99,
       Variation_ID = "501-A", Variation_Details = "Size: Size 7, Color: Orange"),
  list(Category = "Sports Equipment", Item_Name = "Tennis Racket", Item_ID = 502, Brand = "RacketPro", Price = 89.99,
       Variation_ID = "502-A", Variation_Details = "Material: Graphite, Color: Black"),
  list(Category = "Sports Equipment", Item_Name = "Tennis Racket", Item_ID = 502, Brand = "RacketPro", Price = 89.99,
       Variation_ID = "502-B", Variation_Details = "Material: Aluminum, Color: Silver")
)

# Export JSON
json_format <- toJSON(cunymart_data_json)
write(json_format, file = "cunymart_data.json")

# Import JSON
imported_json <- fromJSON(file = "cunymart_data.json")
print(imported_json)
## [[1]]
## [[1]]$Category
## [1] "Electronics"
## 
## [[1]]$Item_Name
## [1] "Smartphone"
## 
## [[1]]$Item_ID
## [1] 101
## 
## [[1]]$Brand
## [1] "TechBrand"
## 
## [[1]]$Price
## [1] 699.99
## 
## [[1]]$Variation_ID
## [1] "101-A"
## 
## [[1]]$Variation_Details
## [1] "Color: Black, Storage: 64GB"
## 
## 
## [[2]]
## [[2]]$Category
## [1] "Electronics"
## 
## [[2]]$Item_Name
## [1] "Smartphone"
## 
## [[2]]$Item_ID
## [1] 101
## 
## [[2]]$Brand
## [1] "TechBrand"
## 
## [[2]]$Price
## [1] 699.99
## 
## [[2]]$Variation_ID
## [1] "101-B"
## 
## [[2]]$Variation_Details
## [1] "Color: White, Storage: 128GB"
## 
## 
## [[3]]
## [[3]]$Category
## [1] "Electronics"
## 
## [[3]]$Item_Name
## [1] "Laptop"
## 
## [[3]]$Item_ID
## [1] 102
## 
## [[3]]$Brand
## [1] "CompuBrand"
## 
## [[3]]$Price
## [1] 1099.99
## 
## [[3]]$Variation_ID
## [1] "102-A"
## 
## [[3]]$Variation_Details
## [1] "Color: Silver, Storage: 256GB"
## 
## 
## [[4]]
## [[4]]$Category
## [1] "Electronics"
## 
## [[4]]$Item_Name
## [1] "Laptop"
## 
## [[4]]$Item_ID
## [1] 102
## 
## [[4]]$Brand
## [1] "CompuBrand"
## 
## [[4]]$Price
## [1] 1099.99
## 
## [[4]]$Variation_ID
## [1] "102-B"
## 
## [[4]]$Variation_Details
## [1] "Color: Space Gray, Storage: 512GB"
## 
## 
## [[5]]
## [[5]]$Category
## [1] "Home Appliances"
## 
## [[5]]$Item_Name
## [1] "Refrigerator"
## 
## [[5]]$Item_ID
## [1] 201
## 
## [[5]]$Brand
## [1] "HomeCool"
## 
## [[5]]$Price
## [1] 899.99
## 
## [[5]]$Variation_ID
## [1] "201-A"
## 
## [[5]]$Variation_Details
## [1] "Color: Stainless Steel, Capacity: 20 cu ft"
## 
## 
## [[6]]
## [[6]]$Category
## [1] "Home Appliances"
## 
## [[6]]$Item_Name
## [1] "Refrigerator"
## 
## [[6]]$Item_ID
## [1] 201
## 
## [[6]]$Brand
## [1] "HomeCool"
## 
## [[6]]$Price
## [1] 899.99
## 
## [[6]]$Variation_ID
## [1] "201-B"
## 
## [[6]]$Variation_Details
## [1] "Color: White, Capacity: 18 cu ft"
## 
## 
## [[7]]
## [[7]]$Category
## [1] "Home Appliances"
## 
## [[7]]$Item_Name
## [1] "Washing Machine"
## 
## [[7]]$Item_ID
## [1] 202
## 
## [[7]]$Brand
## [1] "CleanTech"
## 
## [[7]]$Price
## [1] 499.99
## 
## [[7]]$Variation_ID
## [1] "202-A"
## 
## [[7]]$Variation_Details
## [1] "Type: Front Load, Capacity: 4.5 cu ft"
## 
## 
## [[8]]
## [[8]]$Category
## [1] "Home Appliances"
## 
## [[8]]$Item_Name
## [1] "Washing Machine"
## 
## [[8]]$Item_ID
## [1] 202
## 
## [[8]]$Brand
## [1] "CleanTech"
## 
## [[8]]$Price
## [1] 499.99
## 
## [[8]]$Variation_ID
## [1] "202-B"
## 
## [[8]]$Variation_Details
## [1] "Type: Top Load, Capacity: 5.0 cu ft"
## 
## 
## [[9]]
## [[9]]$Category
## [1] "Clothing"
## 
## [[9]]$Item_Name
## [1] "T-Shirt"
## 
## [[9]]$Item_ID
## [1] 301
## 
## [[9]]$Brand
## [1] "FashionCo"
## 
## [[9]]$Price
## [1] 19.99
## 
## [[9]]$Variation_ID
## [1] "301-A"
## 
## [[9]]$Variation_Details
## [1] "Color: Blue, Size: S"
## 
## 
## [[10]]
## [[10]]$Category
## [1] "Clothing"
## 
## [[10]]$Item_Name
## [1] "T-Shirt"
## 
## [[10]]$Item_ID
## [1] 301
## 
## [[10]]$Brand
## [1] "FashionCo"
## 
## [[10]]$Price
## [1] 19.99
## 
## [[10]]$Variation_ID
## [1] "301-B"
## 
## [[10]]$Variation_Details
## [1] "Color: Red, Size: M"
## 
## 
## [[11]]
## [[11]]$Category
## [1] "Clothing"
## 
## [[11]]$Item_Name
## [1] "T-Shirt"
## 
## [[11]]$Item_ID
## [1] 301
## 
## [[11]]$Brand
## [1] "FashionCo"
## 
## [[11]]$Price
## [1] 19.99
## 
## [[11]]$Variation_ID
## [1] "301-C"
## 
## [[11]]$Variation_Details
## [1] "Color: Green, Size: L"
## 
## 
## [[12]]
## [[12]]$Category
## [1] "Clothing"
## 
## [[12]]$Item_Name
## [1] "Jeans"
## 
## [[12]]$Item_ID
## [1] 302
## 
## [[12]]$Brand
## [1] "DenimWorks"
## 
## [[12]]$Price
## [1] 49.99
## 
## [[12]]$Variation_ID
## [1] "302-A"
## 
## [[12]]$Variation_Details
## [1] "Color: Dark Blue, Size: 32"
## 
## 
## [[13]]
## [[13]]$Category
## [1] "Clothing"
## 
## [[13]]$Item_Name
## [1] "Jeans"
## 
## [[13]]$Item_ID
## [1] 302
## 
## [[13]]$Brand
## [1] "DenimWorks"
## 
## [[13]]$Price
## [1] 49.99
## 
## [[13]]$Variation_ID
## [1] "302-B"
## 
## [[13]]$Variation_Details
## [1] "Color: Light Blue, Size: 34"
## 
## 
## [[14]]
## [[14]]$Category
## [1] "Books"
## 
## [[14]]$Item_Name
## [1] "Fiction Novel"
## 
## [[14]]$Item_ID
## [1] 401
## 
## [[14]]$Brand
## [1] "-"
## 
## [[14]]$Price
## [1] 14.99
## 
## [[14]]$Variation_ID
## [1] "401-A"
## 
## [[14]]$Variation_Details
## [1] "Format: Hardcover, Language: English"
## 
## 
## [[15]]
## [[15]]$Category
## [1] "Books"
## 
## [[15]]$Item_Name
## [1] "Fiction Novel"
## 
## [[15]]$Item_ID
## [1] 401
## 
## [[15]]$Brand
## [1] "-"
## 
## [[15]]$Price
## [1] 14.99
## 
## [[15]]$Variation_ID
## [1] "401-B"
## 
## [[15]]$Variation_Details
## [1] "Format: Paperback, Language: Spanish"
## 
## 
## [[16]]
## [[16]]$Category
## [1] "Books"
## 
## [[16]]$Item_Name
## [1] "Non-Fiction Guide"
## 
## [[16]]$Item_ID
## [1] 402
## 
## [[16]]$Brand
## [1] "-"
## 
## [[16]]$Price
## [1] 24.99
## 
## [[16]]$Variation_ID
## [1] "402-A"
## 
## [[16]]$Variation_Details
## [1] "Format: eBook, Language: English"
## 
## 
## [[17]]
## [[17]]$Category
## [1] "Books"
## 
## [[17]]$Item_Name
## [1] "Non-Fiction Guide"
## 
## [[17]]$Item_ID
## [1] 402
## 
## [[17]]$Brand
## [1] "-"
## 
## [[17]]$Price
## [1] 24.99
## 
## [[17]]$Variation_ID
## [1] "402-B"
## 
## [[17]]$Variation_Details
## [1] "Format: Paperback, Language: French"
## 
## 
## [[18]]
## [[18]]$Category
## [1] "Sports Equipment"
## 
## [[18]]$Item_Name
## [1] "Basketball"
## 
## [[18]]$Item_ID
## [1] 501
## 
## [[18]]$Brand
## [1] "SportsGear"
## 
## [[18]]$Price
## [1] 29.99
## 
## [[18]]$Variation_ID
## [1] "501-A"
## 
## [[18]]$Variation_Details
## [1] "Size: Size 7, Color: Orange"
## 
## 
## [[19]]
## [[19]]$Category
## [1] "Sports Equipment"
## 
## [[19]]$Item_Name
## [1] "Tennis Racket"
## 
## [[19]]$Item_ID
## [1] 502
## 
## [[19]]$Brand
## [1] "RacketPro"
## 
## [[19]]$Price
## [1] 89.99
## 
## [[19]]$Variation_ID
## [1] "502-A"
## 
## [[19]]$Variation_Details
## [1] "Material: Graphite, Color: Black"
## 
## 
## [[20]]
## [[20]]$Category
## [1] "Sports Equipment"
## 
## [[20]]$Item_Name
## [1] "Tennis Racket"
## 
## [[20]]$Item_ID
## [1] 502
## 
## [[20]]$Brand
## [1] "RacketPro"
## 
## [[20]]$Price
## [1] 89.99
## 
## [[20]]$Variation_ID
## [1] "502-B"
## 
## [[20]]$Variation_Details
## [1] "Material: Aluminum, Color: Silver"

HTML Format

cunymart_data_html <- data.frame(
  Category = c("Electronics", "Electronics", "Electronics", "Electronics", "Home Appliances", "Home Appliances", 
               "Home Appliances", "Home Appliances", "Clothing", "Clothing", "Clothing", "Clothing", "Clothing",
               "Books", "Books", "Books", "Books", "Sports Equipment", "Sports Equipment", "Sports Equipment"),
  Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop", "Refrigerator", "Refrigerator", "Washing Machine",
                "Washing Machine", "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans", "Fiction Novel", "Fiction Novel",
                "Non-Fiction Guide", "Non-Fiction Guide", "Basketball", "Tennis Racket", "Tennis Racket"),
  Item_ID = c(101, 101, 102, 102, 201, 201, 202, 202, 301, 301, 301, 302, 302, 401, 401, 402, 402, 501, 502, 502),
  Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand", "HomeCool", "HomeCool", "CleanTech", "CleanTech",
            "FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks", "-", "-", "-", "-", "SportsGear", "RacketPro", "RacketPro"),
  Price = c(699.99, 699.99, 1099.99, 1099.99, 899.99, 899.99, 499.99, 499.99, 19.99, 19.99, 19.99, 49.99,
            49.99, 14.99, 14.99, 24.99, 24.99, 29.99, 89.99, 89.99),
  Variation_ID = c("101-A", "101-B", "102-A", "102-B", "201-A", "201-B", "202-A", "202-B", "301-A", "301-B", "301-C",
                   "302-A", "302-B", "401-A", "401-B", "402-A", "402-B", "501-A", "502-A", "502-B"),
  Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB", "Color: Silver, Storage: 256GB",
                        "Color: Space Gray, Storage: 512GB", "Color: Stainless Steel, Capacity: 20 cu ft",
                        "Color: White, Capacity: 18 cu ft", "Type: Front Load, Capacity: 4.5 cu ft", 
                        "Type: Top Load, Capacity: 5.0 cu ft", "Color: Blue, Size: S", "Color: Red, Size: M",
                        "Color: Green, Size: L", "Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34",
                        "Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish",
                        "Format: eBook, Language: English", "Format: Paperback, Language: French", 
                        "Size: Size 7, Color: Orange", "Material: Graphite, Color: Black", "Material: Aluminum, Color: Silver")
)

# Export HTML
kable(cunymart_data_html, format = "html") %>%
  writeLines("cunymart_data.html")

# Import HTML
html_content <- read_html("cunymart_data.html")
extracted_table <- html_table(html_content)
print(extracted_table)
## [[1]]
## # A tibble: 20 × 7
##    Category        Item_Name Item_ID Brand  Price Variation_ID Variation_Details
##    <chr>           <chr>       <int> <chr>  <dbl> <chr>        <chr>            
##  1 Electronics     Smartpho…     101 Tech…  700.  101-A        Color: Black, St…
##  2 Electronics     Smartpho…     101 Tech…  700.  101-B        Color: White, St…
##  3 Electronics     Laptop        102 Comp… 1100.  102-A        Color: Silver, S…
##  4 Electronics     Laptop        102 Comp… 1100.  102-B        Color: Space Gra…
##  5 Home Appliances Refriger…     201 Home…  900.  201-A        Color: Stainless…
##  6 Home Appliances Refriger…     201 Home…  900.  201-B        Color: White, Ca…
##  7 Home Appliances Washing …     202 Clea…  500.  202-A        Type: Front Load…
##  8 Home Appliances Washing …     202 Clea…  500.  202-B        Type: Top Load, …
##  9 Clothing        T-Shirt       301 Fash…   20.0 301-A        Color: Blue, Siz…
## 10 Clothing        T-Shirt       301 Fash…   20.0 301-B        Color: Red, Size…
## 11 Clothing        T-Shirt       301 Fash…   20.0 301-C        Color: Green, Si…
## 12 Clothing        Jeans         302 Deni…   50.0 302-A        Color: Dark Blue…
## 13 Clothing        Jeans         302 Deni…   50.0 302-B        Color: Light Blu…
## 14 Books           Fiction …     401 -       15.0 401-A        Format: Hardcove…
## 15 Books           Fiction …     401 -       15.0 401-B        Format: Paperbac…
## 16 Books           Non-Fict…     402 -       25.0 402-A        Format: eBook, L…
## 17 Books           Non-Fict…     402 -       25.0 402-B        Format: Paperbac…
## 18 Sports Equipme… Basketba…     501 Spor…   30.0 501-A        Size: Size 7, Co…
## 19 Sports Equipme… Tennis R…     502 Rack…   90.0 502-A        Material: Graphi…
## 20 Sports Equipme… Tennis R…     502 Rack…   90.0 502-B        Material: Alumin…

XML Format

xml_doc <- newXMLDoc()
root_node <- newXMLNode("Inventory", doc = xml_doc)

items <- list(
  list(Category = "Electronics", Item_Name = "Smartphone", Item_ID = 101, Brand = "TechBrand", Price = 699.99,
       Variation_ID = "101-A", Variation_Details = "Color: Black, Storage: 64GB"),
  list(Category = "Electronics", Item_Name = "Smartphone", Item_ID = 101, Brand = "TechBrand", Price = 699.99,
       Variation_ID = "101-B", Variation_Details = "Color: White, Storage: 128GB"),
  list(Category = "Electronics", Item_Name = "Laptop", Item_ID = 102, Brand = "CompuBrand", Price = 1099.99, 
       Variation_ID = "102-A", Variation_Details = "Color: Silver, Storage: 256GB"),
  list(Category = "Electronics", Item_Name = "Laptop", Item_ID = 102, Brand = "CompuBrand", Price = 1099.99,
       Variation_ID = "102-B", Variation_Details = "Color: Space Gray, Storage: 512GB"),
  list(Category = "Home Appliances", Item_Name = "Refrigerator", Item_ID = 201, Brand = "HomeCool", Price = 899.99,
       Variation_ID = "201-A", Variation_Details = "Color: Stainless Steel, Capacity: 20 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Refrigerator", Item_ID = 201, Brand = "HomeCool", Price = 899.99,
       Variation_ID = "201-B", Variation_Details = "Color: White, Capacity: 18 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Washing Machine", Item_ID = 202, Brand = "CleanTech", Price = 499.99,
       Variation_ID = "202-A", Variation_Details = "Type: Front Load, Capacity: 4.5 cu ft"),
  list(Category = "Home Appliances", Item_Name = "Washing Machine", Item_ID = 202, Brand = "CleanTech", Price = 499.99,
       Variation_ID = "202-B", Variation_Details = "Type: Top Load, Capacity: 5.0 cu ft"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-A", Variation_Details = "Color: Blue, Size: S"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-B", Variation_Details = "Color: Red, Size: M"),
  list(Category = "Clothing", Item_Name = "T-Shirt", Item_ID = 301, Brand = "FashionCo", Price = 19.99,
       Variation_ID = "301-C", Variation_Details = "Color: Green, Size: L"),
  list(Category = "Clothing", Item_Name = "Jeans", Item_ID = 302, Brand = "DenimWorks", Price = 49.99,
       Variation_ID = "302-A", Variation_Details = "Color: Dark Blue, Size: 32"),
  list(Category = "Clothing", Item_Name = "Jeans", Item_ID = 302, Brand = "DenimWorks", Price = 49.99,
       Variation_ID = "302-B", Variation_Details = "Color: Light Blue, Size: 34"),
  list(Category = "Books", Item_Name = "Fiction Novel", Item_ID = 401, Brand = "-", Price = 14.99,
       Variation_ID = "401-A", Variation_Details = "Format: Hardcover, Language: English"),
  list(Category = "Books", Item_Name = "Fiction Novel", Item_ID = 401, Brand = "-", Price = 14.99,
       Variation_ID = "401-B", Variation_Details = "Format: Paperback, Language: Spanish"),
  list(Category = "Books", Item_Name = "Non-Fiction Guide", Item_ID = 402, Brand = "-", Price = 24.99,
       Variation_ID = "402-A", Variation_Details = "Format: eBook, Language: English"),
  list(Category = "Books", Item_Name = "Non-Fiction Guide", Item_ID = 402, Brand = "-", Price = 24.99,
       Variation_ID = "402-B", Variation_Details = "Format: Paperback, Language: French"),
  list(Category = "Sports Equipment", Item_Name = "Basketball", Item_ID = 501, Brand = "SportsGear", Price = 29.99,
       Variation_ID = "501-A", Variation_Details = "Size: Size 7, Color: Orange"),
  list(Category = "Sports Equipment", Item_Name = "Tennis Racket", Item_ID = 502, Brand = "RacketPro", Price = 89.99,
       Variation_ID = "502-A", Variation_Details = "Material: Graphite, Color: Black"),
  list(Category = "Sports Equipment", Item_Name = "Tennis Racket", Item_ID = 502, Brand = "RacketPro", Price = 89.99,
       Variation_ID = "502-B", Variation_Details = "Material: Aluminum, Color: Silver")
)

for (item in items) {
  item_node <- newXMLNode("Item", parent = root_node)
  for (key in names(item)) {
    newXMLNode(key, item[[key]], parent = item_node)
  }
}

# Export XML
saveXML(xml_doc, file = "cunymart_data.xml")
## [1] "cunymart_data.xml"
# Import XML
imported_xml <- xmlParse("cunymart_data.xml")
print(xmlToList(imported_xml))
## $Item
## $Item$Category
## [1] "Electronics"
## 
## $Item$Item_Name
## [1] "Smartphone"
## 
## $Item$Item_ID
## [1] "101"
## 
## $Item$Brand
## [1] "TechBrand"
## 
## $Item$Price
## [1] "699.99"
## 
## $Item$Variation_ID
## [1] "101-A"
## 
## $Item$Variation_Details
## [1] "Color: Black, Storage: 64GB"
## 
## 
## $Item
## $Item$Category
## [1] "Electronics"
## 
## $Item$Item_Name
## [1] "Smartphone"
## 
## $Item$Item_ID
## [1] "101"
## 
## $Item$Brand
## [1] "TechBrand"
## 
## $Item$Price
## [1] "699.99"
## 
## $Item$Variation_ID
## [1] "101-B"
## 
## $Item$Variation_Details
## [1] "Color: White, Storage: 128GB"
## 
## 
## $Item
## $Item$Category
## [1] "Electronics"
## 
## $Item$Item_Name
## [1] "Laptop"
## 
## $Item$Item_ID
## [1] "102"
## 
## $Item$Brand
## [1] "CompuBrand"
## 
## $Item$Price
## [1] "1099.99"
## 
## $Item$Variation_ID
## [1] "102-A"
## 
## $Item$Variation_Details
## [1] "Color: Silver, Storage: 256GB"
## 
## 
## $Item
## $Item$Category
## [1] "Electronics"
## 
## $Item$Item_Name
## [1] "Laptop"
## 
## $Item$Item_ID
## [1] "102"
## 
## $Item$Brand
## [1] "CompuBrand"
## 
## $Item$Price
## [1] "1099.99"
## 
## $Item$Variation_ID
## [1] "102-B"
## 
## $Item$Variation_Details
## [1] "Color: Space Gray, Storage: 512GB"
## 
## 
## $Item
## $Item$Category
## [1] "Home Appliances"
## 
## $Item$Item_Name
## [1] "Refrigerator"
## 
## $Item$Item_ID
## [1] "201"
## 
## $Item$Brand
## [1] "HomeCool"
## 
## $Item$Price
## [1] "899.99"
## 
## $Item$Variation_ID
## [1] "201-A"
## 
## $Item$Variation_Details
## [1] "Color: Stainless Steel, Capacity: 20 cu ft"
## 
## 
## $Item
## $Item$Category
## [1] "Home Appliances"
## 
## $Item$Item_Name
## [1] "Refrigerator"
## 
## $Item$Item_ID
## [1] "201"
## 
## $Item$Brand
## [1] "HomeCool"
## 
## $Item$Price
## [1] "899.99"
## 
## $Item$Variation_ID
## [1] "201-B"
## 
## $Item$Variation_Details
## [1] "Color: White, Capacity: 18 cu ft"
## 
## 
## $Item
## $Item$Category
## [1] "Home Appliances"
## 
## $Item$Item_Name
## [1] "Washing Machine"
## 
## $Item$Item_ID
## [1] "202"
## 
## $Item$Brand
## [1] "CleanTech"
## 
## $Item$Price
## [1] "499.99"
## 
## $Item$Variation_ID
## [1] "202-A"
## 
## $Item$Variation_Details
## [1] "Type: Front Load, Capacity: 4.5 cu ft"
## 
## 
## $Item
## $Item$Category
## [1] "Home Appliances"
## 
## $Item$Item_Name
## [1] "Washing Machine"
## 
## $Item$Item_ID
## [1] "202"
## 
## $Item$Brand
## [1] "CleanTech"
## 
## $Item$Price
## [1] "499.99"
## 
## $Item$Variation_ID
## [1] "202-B"
## 
## $Item$Variation_Details
## [1] "Type: Top Load, Capacity: 5.0 cu ft"
## 
## 
## $Item
## $Item$Category
## [1] "Clothing"
## 
## $Item$Item_Name
## [1] "T-Shirt"
## 
## $Item$Item_ID
## [1] "301"
## 
## $Item$Brand
## [1] "FashionCo"
## 
## $Item$Price
## [1] "19.99"
## 
## $Item$Variation_ID
## [1] "301-A"
## 
## $Item$Variation_Details
## [1] "Color: Blue, Size: S"
## 
## 
## $Item
## $Item$Category
## [1] "Clothing"
## 
## $Item$Item_Name
## [1] "T-Shirt"
## 
## $Item$Item_ID
## [1] "301"
## 
## $Item$Brand
## [1] "FashionCo"
## 
## $Item$Price
## [1] "19.99"
## 
## $Item$Variation_ID
## [1] "301-B"
## 
## $Item$Variation_Details
## [1] "Color: Red, Size: M"
## 
## 
## $Item
## $Item$Category
## [1] "Clothing"
## 
## $Item$Item_Name
## [1] "T-Shirt"
## 
## $Item$Item_ID
## [1] "301"
## 
## $Item$Brand
## [1] "FashionCo"
## 
## $Item$Price
## [1] "19.99"
## 
## $Item$Variation_ID
## [1] "301-C"
## 
## $Item$Variation_Details
## [1] "Color: Green, Size: L"
## 
## 
## $Item
## $Item$Category
## [1] "Clothing"
## 
## $Item$Item_Name
## [1] "Jeans"
## 
## $Item$Item_ID
## [1] "302"
## 
## $Item$Brand
## [1] "DenimWorks"
## 
## $Item$Price
## [1] "49.99"
## 
## $Item$Variation_ID
## [1] "302-A"
## 
## $Item$Variation_Details
## [1] "Color: Dark Blue, Size: 32"
## 
## 
## $Item
## $Item$Category
## [1] "Clothing"
## 
## $Item$Item_Name
## [1] "Jeans"
## 
## $Item$Item_ID
## [1] "302"
## 
## $Item$Brand
## [1] "DenimWorks"
## 
## $Item$Price
## [1] "49.99"
## 
## $Item$Variation_ID
## [1] "302-B"
## 
## $Item$Variation_Details
## [1] "Color: Light Blue, Size: 34"
## 
## 
## $Item
## $Item$Category
## [1] "Books"
## 
## $Item$Item_Name
## [1] "Fiction Novel"
## 
## $Item$Item_ID
## [1] "401"
## 
## $Item$Brand
## [1] "-"
## 
## $Item$Price
## [1] "14.99"
## 
## $Item$Variation_ID
## [1] "401-A"
## 
## $Item$Variation_Details
## [1] "Format: Hardcover, Language: English"
## 
## 
## $Item
## $Item$Category
## [1] "Books"
## 
## $Item$Item_Name
## [1] "Fiction Novel"
## 
## $Item$Item_ID
## [1] "401"
## 
## $Item$Brand
## [1] "-"
## 
## $Item$Price
## [1] "14.99"
## 
## $Item$Variation_ID
## [1] "401-B"
## 
## $Item$Variation_Details
## [1] "Format: Paperback, Language: Spanish"
## 
## 
## $Item
## $Item$Category
## [1] "Books"
## 
## $Item$Item_Name
## [1] "Non-Fiction Guide"
## 
## $Item$Item_ID
## [1] "402"
## 
## $Item$Brand
## [1] "-"
## 
## $Item$Price
## [1] "24.99"
## 
## $Item$Variation_ID
## [1] "402-A"
## 
## $Item$Variation_Details
## [1] "Format: eBook, Language: English"
## 
## 
## $Item
## $Item$Category
## [1] "Books"
## 
## $Item$Item_Name
## [1] "Non-Fiction Guide"
## 
## $Item$Item_ID
## [1] "402"
## 
## $Item$Brand
## [1] "-"
## 
## $Item$Price
## [1] "24.99"
## 
## $Item$Variation_ID
## [1] "402-B"
## 
## $Item$Variation_Details
## [1] "Format: Paperback, Language: French"
## 
## 
## $Item
## $Item$Category
## [1] "Sports Equipment"
## 
## $Item$Item_Name
## [1] "Basketball"
## 
## $Item$Item_ID
## [1] "501"
## 
## $Item$Brand
## [1] "SportsGear"
## 
## $Item$Price
## [1] "29.99"
## 
## $Item$Variation_ID
## [1] "501-A"
## 
## $Item$Variation_Details
## [1] "Size: Size 7, Color: Orange"
## 
## 
## $Item
## $Item$Category
## [1] "Sports Equipment"
## 
## $Item$Item_Name
## [1] "Tennis Racket"
## 
## $Item$Item_ID
## [1] "502"
## 
## $Item$Brand
## [1] "RacketPro"
## 
## $Item$Price
## [1] "89.99"
## 
## $Item$Variation_ID
## [1] "502-A"
## 
## $Item$Variation_Details
## [1] "Material: Graphite, Color: Black"
## 
## 
## $Item
## $Item$Category
## [1] "Sports Equipment"
## 
## $Item$Item_Name
## [1] "Tennis Racket"
## 
## $Item$Item_ID
## [1] "502"
## 
## $Item$Brand
## [1] "RacketPro"
## 
## $Item$Price
## [1] "89.99"
## 
## $Item$Variation_ID
## [1] "502-B"
## 
## $Item$Variation_Details
## [1] "Material: Aluminum, Color: Silver"

Parquet Format

# Create CUNYMart data
cunymart_data_parquet <- data.frame(
  Category = c("Electronics", "Electronics", "Electronics", "Electronics", "Home Appliances", "Home Appliances", 
               "Home Appliances", "Home Appliances", "Clothing", "Clothing", "Clothing", "Clothing", "Clothing",
               "Books", "Books", "Books", "Books", "Sports Equipment", "Sports Equipment", "Sports Equipment"),
  Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop", "Refrigerator", "Refrigerator", "Washing Machine",
                "Washing Machine", "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans", "Fiction Novel", "Fiction Novel",
                "Non-Fiction Guide", "Non-Fiction Guide", "Basketball", "Tennis Racket", "Tennis Racket"),
  Item_ID = c(101, 101, 102, 102, 201, 201, 202, 202, 301, 301, 301, 302, 302, 401, 401, 402, 402, 501, 502, 502),
  Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand", "HomeCool", "HomeCool", "CleanTech", "CleanTech",
            "FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks", "-", "-", "-", "-", "SportGear", "RacketPro", "RacketPro"),
  Price = c(699.99, 699.99, 1099.99, 1099.99, 899.99, 899.99, 499.99, 499.99, 19.99, 19.99, 19.99, 49.99,
            49.99, 14.99, 14.99, 24.99, 24.99, 29.99, 89.99, 89.99),
  Variation_ID = c("101-A", "101-B", "102-A", "102-B", "201-A", "201-B", "202-A", "202-B", "301-A", "301-B", "301-C",
                   "302-A", "302-B", "401-A", "401-B", "402-A", "402-B", "501-A", "502-A", "502-B"),
  Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB", "Color: Silver, Storage: 256GB",
                        "Color: Space Gray, Storage: 512GB", "Color: Stainless Steel, Capacity: 20 cu ft",
                        "Color: White, Capacity: 18 cu ft", "Type: Front Load, Capacity: 4.5 cu ft", 
                        "Type: Top Load, Capacity: 5.0 cu ft", "Color: Blue, Size: S", "Color: Red, Size: M",
                        "Color: Green, Size: L", "Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34",
                        "Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish",
                        "Format: eBook, Language: English", "Format: Paperback, language: French", 
                        "Size: Size 7, Color: Orange", "Material: Graphite, Color: Black", "Material: Aluminum, Color: Silver")
)

# Export to Parquet
write_parquet(cunymart_data_parquet, "cunymart_data.parquet")

imported_parquet <- read_parquet("cunymart_data.parquet")
head(imported_parquet)
## # A tibble: 6 × 7
##   Category        Item_Name   Item_ID Brand Price Variation_ID Variation_Details
##   <chr>           <chr>         <dbl> <chr> <dbl> <chr>        <chr>            
## 1 Electronics     Smartphone      101 Tech…  700. 101-A        Color: Black, St…
## 2 Electronics     Smartphone      101 Tech…  700. 101-B        Color: White, St…
## 3 Electronics     Laptop          102 Comp… 1100. 102-A        Color: Silver, S…
## 4 Electronics     Laptop          102 Comp… 1100. 102-B        Color: Space Gra…
## 5 Home Appliances Refrigerat…     201 Home…  900. 201-A        Color: Stainless…
## 6 Home Appliances Refrigerat…     201 Home…  900. 201-B        Color: White, Ca…

Summary Table

pros_cons <- data.frame(
  Format = c("JSON", "HTML", "XML", "Parquet"),
  Pros = c("Simple to debug, Easy to read & write, Used in APIs",
           "Great for displaying in web browsers, Interative visualizations",
           "Hierarchial data support, self-descriptive format",
           "Highly compressed, Fast for big data and analytics"),
  Cons = c("No Metadata, Harder to work with large files",
           "Not a suitable data exchange format, Parsing is harder",
           "Verbose and large file sizes, Harder to read",
           "Harder to read, Complex to debug")
)

proscons_table <- gt(pros_cons) %>%
  tab_header(
    title = "Pros and Cons of Different Data Formats"
  )

proscons_table
Pros and Cons of Different Data Formats
Format Pros Cons
JSON Simple to debug, Easy to read & write, Used in APIs No Metadata, Harder to work with large files
HTML Great for displaying in web browsers, Interative visualizations Not a suitable data exchange format, Parsing is harder
XML Hierarchial data support, self-descriptive format Verbose and large file sizes, Harder to read
Parquet Highly compressed, Fast for big data and analytics Harder to read, Complex to debug