#Create data frame
df <- data.frame(
  Category = c("Electronics", "Electronics", "Electronics", "Electronics", "Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances", "Clothing", "Clothing", "Clothing", "Clothing", "Clothing", "Books", "Books", "Books", "Books", "Sports Equipment", "Sports Equipment", "Sports Equipment" ),
  Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop", "Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine", "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans", "Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide", "Basketball", "Tennis Racket", "Tennis Racket"),
  Item_ID = c("101", "101", "102", "102", "201", "201", "202", "202", "301", "301", "301", "302", "302", "401", "401", "402", "402", "501", "502", "502"),
  Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand", "HomeCool", "HomeCool", "CleanTech", "CleanTech", "FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks", "-", "-", "-", "-", "SportsGear", "RacketPro", "RacketPro"),
  Price = c("699.99", "699.99", "1099.99", "1099.99", "899.99", "899.99", "499.99", "499.99", "19.99", "19.99", "19.99", "49.99", "49.99", "14.99", "14.99", "24.99", "24.99", "29.99", "89.99", "89.99"),
  Variation_ID = c("101-A", "101-B", "102-A", "102-B", "201-A", "201-B", "202-A", "202-B", "301-A", "301-B", "301-C", "302-A", "302-B", "401-A", "401-B", "402-A", "402-B", "501-A", "502-A", "502-B"),
  Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB", "Color: Silver, Storage: 256GB", "Color: Space Gray, Storage: 512GB", "Color: Stainless Steel, Capacity: 20 cu ft", "Color: White, Capacity: 18 cu ft", "Type: Front Load, Capacity: 4.5 cu ft", "Type: Top Load, Capacity: 5.0 cu ft", "Color: Blue, Size: S", "Color: Red, Size: M", "Color: Green, Size: L", "Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34", "Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish", "Format: eBook, Language: English", "Format: Paperback, Language: French", "Size: Size 7, Color: Orange", "Material: Graphite, Color: Black", "Material: Aluminum, Color: Silver"))
head(df)
##          Category    Item_Name Item_ID      Brand   Price Variation_ID
## 1     Electronics   Smartphone     101  TechBrand  699.99        101-A
## 2     Electronics   Smartphone     101  TechBrand  699.99        101-B
## 3     Electronics       Laptop     102 CompuBrand 1099.99        102-A
## 4     Electronics       Laptop     102 CompuBrand 1099.99        102-B
## 5 Home Appliances Refrigerator     201   HomeCool  899.99        201-A
## 6 Home Appliances Refrigerator     201   HomeCool  899.99        201-B
##                            Variation_Details
## 1                Color: Black, Storage: 64GB
## 2               Color: White, Storage: 128GB
## 3              Color: Silver, Storage: 256GB
## 4          Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6           Color: White, Capacity: 18 cu ft
#JSON
json <- toJSON(x = df, dataframe = 'rows', pretty = T)
head(json)
## [
##   {
##     "Category": "Electronics",
##     "Item_Name": "Smartphone",
##     "Item_ID": "101",
##     "Brand": "TechBrand",
##     "Price": "699.99",
##     "Variation_ID": "101-A",
##     "Variation_Details": "Color: Black, Storage: 64GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Smartphone",
##     "Item_ID": "101",
##     "Brand": "TechBrand",
##     "Price": "699.99",
##     "Variation_ID": "101-B",
##     "Variation_Details": "Color: White, Storage: 128GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Laptop",
##     "Item_ID": "102",
##     "Brand": "CompuBrand",
##     "Price": "1099.99",
##     "Variation_ID": "102-A",
##     "Variation_Details": "Color: Silver, Storage: 256GB"
##   },
##   {
##     "Category": "Electronics",
##     "Item_Name": "Laptop",
##     "Item_ID": "102",
##     "Brand": "CompuBrand",
##     "Price": "1099.99",
##     "Variation_ID": "102-B",
##     "Variation_Details": "Color: Space Gray, Storage: 512GB"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Refrigerator",
##     "Item_ID": "201",
##     "Brand": "HomeCool",
##     "Price": "899.99",
##     "Variation_ID": "201-A",
##     "Variation_Details": "Color: Stainless Steel, Capacity: 20 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Refrigerator",
##     "Item_ID": "201",
##     "Brand": "HomeCool",
##     "Price": "899.99",
##     "Variation_ID": "201-B",
##     "Variation_Details": "Color: White, Capacity: 18 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Washing Machine",
##     "Item_ID": "202",
##     "Brand": "CleanTech",
##     "Price": "499.99",
##     "Variation_ID": "202-A",
##     "Variation_Details": "Type: Front Load, Capacity: 4.5 cu ft"
##   },
##   {
##     "Category": "Home Appliances",
##     "Item_Name": "Washing Machine",
##     "Item_ID": "202",
##     "Brand": "CleanTech",
##     "Price": "499.99",
##     "Variation_ID": "202-B",
##     "Variation_Details": "Type: Top Load, Capacity: 5.0 cu ft"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": "301",
##     "Brand": "FashionCo",
##     "Price": "19.99",
##     "Variation_ID": "301-A",
##     "Variation_Details": "Color: Blue, Size: S"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": "301",
##     "Brand": "FashionCo",
##     "Price": "19.99",
##     "Variation_ID": "301-B",
##     "Variation_Details": "Color: Red, Size: M"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "T-Shirt",
##     "Item_ID": "301",
##     "Brand": "FashionCo",
##     "Price": "19.99",
##     "Variation_ID": "301-C",
##     "Variation_Details": "Color: Green, Size: L"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "Jeans",
##     "Item_ID": "302",
##     "Brand": "DenimWorks",
##     "Price": "49.99",
##     "Variation_ID": "302-A",
##     "Variation_Details": "Color: Dark Blue, Size: 32"
##   },
##   {
##     "Category": "Clothing",
##     "Item_Name": "Jeans",
##     "Item_ID": "302",
##     "Brand": "DenimWorks",
##     "Price": "49.99",
##     "Variation_ID": "302-B",
##     "Variation_Details": "Color: Light Blue, Size: 34"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Fiction Novel",
##     "Item_ID": "401",
##     "Brand": "-",
##     "Price": "14.99",
##     "Variation_ID": "401-A",
##     "Variation_Details": "Format: Hardcover, Language: English"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Fiction Novel",
##     "Item_ID": "401",
##     "Brand": "-",
##     "Price": "14.99",
##     "Variation_ID": "401-B",
##     "Variation_Details": "Format: Paperback, Language: Spanish"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Non-Fiction Guide",
##     "Item_ID": "402",
##     "Brand": "-",
##     "Price": "24.99",
##     "Variation_ID": "402-A",
##     "Variation_Details": "Format: eBook, Language: English"
##   },
##   {
##     "Category": "Books",
##     "Item_Name": "Non-Fiction Guide",
##     "Item_ID": "402",
##     "Brand": "-",
##     "Price": "24.99",
##     "Variation_ID": "402-B",
##     "Variation_Details": "Format: Paperback, Language: French"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Basketball",
##     "Item_ID": "501",
##     "Brand": "SportsGear",
##     "Price": "29.99",
##     "Variation_ID": "501-A",
##     "Variation_Details": "Size: Size 7, Color: Orange"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Tennis Racket",
##     "Item_ID": "502",
##     "Brand": "RacketPro",
##     "Price": "89.99",
##     "Variation_ID": "502-A",
##     "Variation_Details": "Material: Graphite, Color: Black"
##   },
##   {
##     "Category": "Sports Equipment",
##     "Item_Name": "Tennis Racket",
##     "Item_ID": "502",
##     "Brand": "RacketPro",
##     "Price": "89.99",
##     "Variation_ID": "502-B",
##     "Variation_Details": "Material: Aluminum, Color: Silver"
##   }
## ]

Pros:

  1. JSON is easy to read and write due to its simple syntax.
  2. JSON is less verbose than XML, making it more efficient in terms of data size.
  3. JSON is supported by most modern programming languages.

Cons:

  1. JSON supports only a limited set of data types: strings, numbers, objects, arrays, booleans, and null.
  2. JSON does not support comments.
  3. JSON does not have a built-in schema definition.
#HTML
html <- df
html %>%
  tableHTML()
Category Item_Name Item_ID Brand Price Variation_ID Variation_Details
1 Electronics Smartphone 101 TechBrand 699.99 101-A Color: Black, Storage: 64GB
2 Electronics Smartphone 101 TechBrand 699.99 101-B Color: White, Storage: 128GB
3 Electronics Laptop 102 CompuBrand 1099.99 102-A Color: Silver, Storage: 256GB
4 Electronics Laptop 102 CompuBrand 1099.99 102-B Color: Space Gray, Storage: 512GB
5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A Color: Stainless Steel, Capacity: 20 cu ft
6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B Color: White, Capacity: 18 cu ft
7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A Type: Front Load, Capacity: 4.5 cu ft
8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B Type: Top Load, Capacity: 5.0 cu ft
9 Clothing T-Shirt 301 FashionCo 19.99 301-A Color: Blue, Size: S
10 Clothing T-Shirt 301 FashionCo 19.99 301-B Color: Red, Size: M
11 Clothing T-Shirt 301 FashionCo 19.99 301-C Color: Green, Size: L
12 Clothing Jeans 302 DenimWorks 49.99 302-A Color: Dark Blue, Size: 32
13 Clothing Jeans 302 DenimWorks 49.99 302-B Color: Light Blue, Size: 34
14 Books Fiction Novel 401 - 14.99 401-A Format: Hardcover, Language: English
15 Books Fiction Novel 401 - 14.99 401-B Format: Paperback, Language: Spanish
16 Books Non-Fiction Guide 402 - 24.99 402-A Format: eBook, Language: English
17 Books Non-Fiction Guide 402 - 24.99 402-B Format: Paperback, Language: French
18 Sports Equipment Basketball 501 SportsGear 29.99 501-A Size: Size 7, Color: Orange
19 Sports Equipment Tennis Racket 502 RacketPro 89.99 502-A Material: Graphite, Color: Black
20 Sports Equipment Tennis Racket 502 RacketPro 89.99 502-B Material: Aluminum, Color: Silver

Pros:

  1. HTML is easy to learn and use, making it accessible for beginners.
  2. HTML allows for integration with CSS and JavaScript for enhanced styling and functionality.
  3. HTML is Supported by all web browsers, ensuring compatibility.

Cons:

  1. Limited to structuring content; does not define how it looks (requires CSS).
  2. Can become cluttered with excessive use of tags, making maintenance difficult.
  3. Not suitable for complex applications without additional technologies (e.g., JavaScript).
#XML
xml <- df
df_to_xml(
xml,
fields = "tags",
record.tag = "record",
field.names = NULL,
only.fields = NULL,
exclude.fields = NULL,
root.node = "root",
xml.file = NULL,
non.exist = NULL,
encoding = "UTF-8",
no.return = FALSE
)
## {xml_document}
## <root encoding="UTF-8">
##  [1] <record>\n  <Category>Electronics</Category>\n  <Item_Name>Smartphone</I ...
##  [2] <record>\n  <Category>Electronics</Category>\n  <Item_Name>Smartphone</I ...
##  [3] <record>\n  <Category>Electronics</Category>\n  <Item_Name>Laptop</Item_ ...
##  [4] <record>\n  <Category>Electronics</Category>\n  <Item_Name>Laptop</Item_ ...
##  [5] <record>\n  <Category>Home Appliances</Category>\n  <Item_Name>Refrigera ...
##  [6] <record>\n  <Category>Home Appliances</Category>\n  <Item_Name>Refrigera ...
##  [7] <record>\n  <Category>Home Appliances</Category>\n  <Item_Name>Washing M ...
##  [8] <record>\n  <Category>Home Appliances</Category>\n  <Item_Name>Washing M ...
##  [9] <record>\n  <Category>Clothing</Category>\n  <Item_Name>T-Shirt</Item_Na ...
## [10] <record>\n  <Category>Clothing</Category>\n  <Item_Name>T-Shirt</Item_Na ...
## [11] <record>\n  <Category>Clothing</Category>\n  <Item_Name>T-Shirt</Item_Na ...
## [12] <record>\n  <Category>Clothing</Category>\n  <Item_Name>Jeans</Item_Name ...
## [13] <record>\n  <Category>Clothing</Category>\n  <Item_Name>Jeans</Item_Name ...
## [14] <record>\n  <Category>Books</Category>\n  <Item_Name>Fiction Novel</Item ...
## [15] <record>\n  <Category>Books</Category>\n  <Item_Name>Fiction Novel</Item ...
## [16] <record>\n  <Category>Books</Category>\n  <Item_Name>Non-Fiction Guide</ ...
## [17] <record>\n  <Category>Books</Category>\n  <Item_Name>Non-Fiction Guide</ ...
## [18] <record>\n  <Category>Sports Equipment</Category>\n  <Item_Name>Basketba ...
## [19] <record>\n  <Category>Sports Equipment</Category>\n  <Item_Name>Tennis R ...
## [20] <record>\n  <Category>Sports Equipment</Category>\n  <Item_Name>Tennis R ...

Pros:

  1. XML supports a wide range of data types and can represent complex data hierarchies.
  2. XML is self-descriptive, making it easier to understand the data structure.
  3. Well-formed documents can be validated against schemas (e.g., DTD, XSD) for consistency.

Cons:

  1. More verbose than JSON, leading to larger file sizes.
  2. Parsing can be more complex and slower compared to simpler formats.
  3. Requires strict adherence to syntax rules, which can lead to errors if not followed.
#Parquet
parquet <- df
write_parquet(parquet, "inventory.parquet")
read_parquet("inventory.parquet")
##            Category         Item_Name Item_ID      Brand   Price Variation_ID
## 1       Electronics        Smartphone     101  TechBrand  699.99        101-A
## 2       Electronics        Smartphone     101  TechBrand  699.99        101-B
## 3       Electronics            Laptop     102 CompuBrand 1099.99        102-A
## 4       Electronics            Laptop     102 CompuBrand 1099.99        102-B
## 5   Home Appliances      Refrigerator     201   HomeCool  899.99        201-A
## 6   Home Appliances      Refrigerator     201   HomeCool  899.99        201-B
## 7   Home Appliances   Washing Machine     202  CleanTech  499.99        202-A
## 8   Home Appliances   Washing Machine     202  CleanTech  499.99        202-B
## 9          Clothing           T-Shirt     301  FashionCo   19.99        301-A
## 10         Clothing           T-Shirt     301  FashionCo   19.99        301-B
## 11         Clothing           T-Shirt     301  FashionCo   19.99        301-C
## 12         Clothing             Jeans     302 DenimWorks   49.99        302-A
## 13         Clothing             Jeans     302 DenimWorks   49.99        302-B
## 14            Books     Fiction Novel     401          -   14.99        401-A
## 15            Books     Fiction Novel     401          -   14.99        401-B
## 16            Books Non-Fiction Guide     402          -   24.99        402-A
## 17            Books Non-Fiction Guide     402          -   24.99        402-B
## 18 Sports Equipment        Basketball     501 SportsGear   29.99        501-A
## 19 Sports Equipment     Tennis Racket     502  RacketPro   89.99        502-A
## 20 Sports Equipment     Tennis Racket     502  RacketPro   89.99        502-B
##                             Variation_Details
## 1                 Color: Black, Storage: 64GB
## 2                Color: White, Storage: 128GB
## 3               Color: Silver, Storage: 256GB
## 4           Color: Space Gray, Storage: 512GB
## 5  Color: Stainless Steel, Capacity: 20 cu ft
## 6            Color: White, Capacity: 18 cu ft
## 7       Type: Front Load, Capacity: 4.5 cu ft
## 8         Type: Top Load, Capacity: 5.0 cu ft
## 9                        Color: Blue, Size: S
## 10                        Color: Red, Size: M
## 11                      Color: Green, Size: L
## 12                 Color: Dark Blue, Size: 32
## 13                Color: Light Blue, Size: 34
## 14       Format: Hardcover, Language: English
## 15       Format: Paperback, Language: Spanish
## 16           Format: eBook, Language: English
## 17        Format: Paperback, Language: French
## 18                Size: Size 7, Color: Orange
## 19           Material: Graphite, Color: Black
## 20          Material: Aluminum, Color: Silver

Pros:

  1. Columnar format reduces storage space by compressing similar data types together, leading to better compression ratios.
  2. Enables faster read times for analytical queries, as only the necessary columns are read from disk.
  3. Designed to work well with data processing frameworks, making it ideal for large datasets.

Cons:

  1. More complex than row-based formats, which may require additional learning for new users.
  2. Writing data can be slower compared to row-oriented formats, especially for small, frequent writes.
  3. Less suitable for use cases that require frequent updates or transactions, as it is optimized for read-heavy operations.