install.packages(“tidyverse”)

load("Dataset_AI.RData")
ls()
## [1] "Dataset_AI"

1. Pregunta d’estudi

2. Descripció del conjunt de dades

A continuació es mostra com hem carregat el fitxer amb el nostre conjunt de dades:

dades <- read_csv("Dataset_AI.csv")
## Rows: 5000 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (5): agent_id, agent_type, model_architecture, deployment_environment,...
## dbl  (17): task_complexity, autonomy_level, success_rate, accuracy_score, ef...
## lgl   (3): human_intervention_required, multimodal_capability, edge_compatib...
## dttm  (1): timestamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Dimensions del dataset:

glimpse(dades)
## Rows: 5,000
## Columns: 26
## $ agent_id                    <chr> "AG_01012", "AG_00758", "AG_00966", "AG_00…
## $ agent_type                  <chr> "Project Manager", "Marketing Assistant", …
## $ model_architecture          <chr> "PaLM-2", "Mixtral-8x7B", "Mixtral-8x7B", …
## $ deployment_environment      <chr> "Server", "Hybrid", "Server", "Hybrid", "E…
## $ task_category               <chr> "Text Processing", "Decision Making", "Com…
## $ task_complexity             <dbl> 5, 6, 2, 8, 3, 5, 4, 8, 8, 7, 5, 3, 4, 5, …
## $ autonomy_level              <dbl> 3, 5, 4, 6, 4, 7, 2, 8, 8, 6, 7, 5, 5, 6, …
## $ success_rate                <dbl> 0.4788, 0.4833, 0.8116, 0.3574, 0.5706, 0.…
## $ accuracy_score              <dbl> 0.6455, 0.5660, 0.8395, 0.4888, 0.7137, 0.…
## $ efficiency_score            <dbl> 0.6573, 0.5844, 0.7650, 0.4742, 0.7209, 0.…
## $ execution_time_seconds      <dbl> 22.42, 9.30, 10.37, 43.85, 23.02, 11.04, 1…
## $ response_latency_ms         <dbl> 383.35, 127.38, 2185.27, 1847.43, 254.30, …
## $ memory_usage_mb             <dbl> 308.9, 372.4, 183.3, 488.2, 278.4, 346.3, …
## $ cpu_usage_percent           <dbl> 53.1, 84.9, 45.9, 75.3, 15.1, 66.5, 62.6, …
## $ cost_per_task_cents         <dbl> 0.0106, 0.0068, 0.0053, 0.0195, 0.0105, 0.…
## $ human_intervention_required <lgl> TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE,…
## $ error_recovery_rate         <dbl> 0.4999, 0.5580, 0.9196, 0.3809, 0.6717, 0.…
## $ multimodal_capability       <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, F…
## $ edge_compatibility          <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FA…
## $ privacy_compliance_score    <dbl> 0.9390, 0.8281, 0.7450, 0.9653, 0.9042, 0.…
## $ bias_detection_score        <dbl> 0.8061, 0.7816, 0.8214, 0.8684, 0.8417, 0.…
## $ timestamp                   <dttm> 2024-12-24 04:16:15, 2024-12-24 04:16:15,…
## $ data_quality_score          <dbl> 0.9510, 0.7822, 0.7621, 0.8117, 0.7762, 0.…
## $ performance_index           <dbl> 0.58236, 0.53844, 0.80599, 0.43186, 0.6586…
## $ cost_efficiency_ratio       <dbl> 50.203448, 69.030769, 127.934921, 21.06634…
## $ autonomous_capability_score <dbl> 64.993, 89.060, 124.372, 86.663, 87.019, 1…
dim(dades)
## [1] 5000   26
head(dades)
## # A tibble: 6 × 26
##   agent_id agent_type    model_architecture deployment_environment task_category
##   <chr>    <chr>         <chr>              <chr>                  <chr>        
## 1 AG_01012 Project Mana… PaLM-2             Server                 Text Process…
## 2 AG_00758 Marketing As… Mixtral-8x7B       Hybrid                 Decision Mak…
## 3 AG_00966 QA Tester     Mixtral-8x7B       Server                 Communication
## 4 AG_00480 Code Assista… CodeT5+            Hybrid                 Creative Wri…
## 5 AG_01050 QA Tester     Falcon-180B        Edge                   Planning & S…
## 6 AG_00037 Email Manager Transformer-XL     Edge                   Communication
## # ℹ 21 more variables: task_complexity <dbl>, autonomy_level <dbl>,
## #   success_rate <dbl>, accuracy_score <dbl>, efficiency_score <dbl>,
## #   execution_time_seconds <dbl>, response_latency_ms <dbl>,
## #   memory_usage_mb <dbl>, cpu_usage_percent <dbl>, cost_per_task_cents <dbl>,
## #   human_intervention_required <lgl>, error_recovery_rate <dbl>,
## #   multimodal_capability <lgl>, edge_compatibility <lgl>,
## #   privacy_compliance_score <dbl>, bias_detection_score <dbl>, …

Diccionari de variables:

Variable Tipus Descripció Valors possibles / rang
agent_id categòrica Identificador de l’agent Codi string
agent_type categòrica Rol o tipus de l’agent Textual
model_architecture categòrica Arquitectura/model utilitzat per l’agent Textual
deployment_environment categòrica Entorn de desplegament Server
task_category categòrica Categoria de la tasca realitzada Textual
task_complexity numèrica Nivell de complexitat de la tasca Int 1-10
autonomy_level numèrica Nivell d’autonomia atorgat a l’agent Int 1-10
success_rate numèrica Percentatge d’èxit de la tasca Float 0.0-1.0
accuracy_score numèrica Mesura d’exactitud per a la tasca Float 0.0-1.0
efficiency_score numèrica Mesura d’eficiència Float 0.0-1.0
execution_time_seconds numèrica Temps d’execució en segons Float>0.0
response_latency_ms numèrica Latència de resposta en mil·lisegons Float>0.0
memory_usage_mb numèrica Consum de memòria durant la tasca en MB Float>0.0
cpu_usage_percent numèrica Percentatge d’ús de la CPU durant la tasca Float 0.0-100.0
cost_per_task_cents numèrica Cost de la tasca en cèntims Float>=0.0
human_intervention_required categòrica Indica si ha calgut intervenció humana durant la tasca Bool true / false
error_recovery_rate numèrica Percentatge d’èxit en recuperació Float 0.0-1.0
multimodal_capability categòrica Indica si l’agent disposa de multimodalitat Bool true / false
edge_compatibility categòrica Indica si l’agent pot executar-se en un dispositiu edge Bool true / false
privacy_compliance_score numèrica Compliment de la privacitat Float 0.0-1.0
bias_detection_score numèrica Mesura de detecció de bias Float 0.0-1.0
timestamp numèrica Marca temportal de l’execució Enter amb rang de dates
data_quality_score numèrica Mesura de qualitat de dades per entrada Float 0.0-1.0
performance_index numèrica Índex compost de rendiment Float 0.0-1.0
cost_efficiency_ratio numèrica Relació cost / eficiència Float
autonomous_capability_score numèrica Puntuació composta d’habilitats autònomes Float 0.0-200.0 aproximadament

De moment, les que nosaltres farem servir principalment seran les següents:

Variable Tipus Descripció Valors possibles / rang
model_architecture categòrica Arquitectura/model utilitzat per l’agent Textual
task_complexity numèrica Nivell de complexitat de la tasca Int 1-10
autonomy_level numèrica Nivell d’autonomia atorgat a l’agent Int 1-10
data_quality_score numèrica Mesura de qualitat de dades per entrada Float 0.0-1.0
 tibble(
    variable = names(dades),
    tipus = sapply(dades, class)
)
## # A tibble: 26 × 2
##    variable               tipus       
##    <chr>                  <named list>
##  1 agent_id               <chr [1]>   
##  2 agent_type             <chr [1]>   
##  3 model_architecture     <chr [1]>   
##  4 deployment_environment <chr [1]>   
##  5 task_category          <chr [1]>   
##  6 task_complexity        <chr [1]>   
##  7 autonomy_level         <chr [1]>   
##  8 success_rate           <chr [1]>   
##  9 accuracy_score         <chr [1]>   
## 10 efficiency_score       <chr [1]>   
## # ℹ 16 more rows

3. Exploració inicial de les dades

summary(dades)
##    agent_id          agent_type        model_architecture
##  Length:5000        Length:5000        Length:5000       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##  deployment_environment task_category      task_complexity  autonomy_level  
##  Length:5000            Length:5000        Min.   : 2.000   Min.   : 1.000  
##  Class :character       Class :character   1st Qu.: 4.000   1st Qu.: 4.000  
##  Mode  :character       Mode  :character   Median : 6.000   Median : 6.000  
##                                            Mean   : 6.083   Mean   : 6.031  
##                                            3rd Qu.: 8.000   3rd Qu.: 8.000  
##                                            Max.   :10.000   Max.   :10.000  
##   success_rate    accuracy_score   efficiency_score execution_time_seconds
##  Min.   :0.3000   Min.   :0.4000   Min.   :0.3000   Min.   :  1.00        
##  1st Qu.:0.3390   1st Qu.:0.4655   1st Qu.:0.5020   1st Qu.: 26.66        
##  Median :0.4701   Median :0.5659   Median :0.5889   Median : 40.32        
##  Mean   :0.4907   Mean   :0.5734   Mean   :0.5889   Mean   : 46.22        
##  3rd Qu.:0.6133   3rd Qu.:0.6679   3rd Qu.:0.6747   3rd Qu.: 60.18        
##  Max.   :0.9765   Max.   :0.9596   Max.   :0.8855   Max.   :157.15        
##  response_latency_ms memory_usage_mb cpu_usage_percent cost_per_task_cents
##  Min.   : 100.0      Min.   :112.9   Min.   : 10.90    Min.   :0.00270    
##  1st Qu.: 338.5      1st Qu.:322.2   1st Qu.: 54.60    1st Qu.:0.01240    
##  Median : 637.7      Median :402.7   Median : 68.50    Median :0.01750    
##  Mean   : 899.6      Mean   :404.4   Mean   : 68.96    Mean   :0.01945    
##  3rd Qu.:1202.4      3rd Qu.:488.3   3rd Qu.: 83.50    3rd Qu.:0.02450    
##  Max.   :5478.1      Max.   :686.6   Max.   :133.60    Max.   :0.05850    
##  human_intervention_required error_recovery_rate multimodal_capability
##  Mode :logical               Min.   :0.3001      Mode :logical        
##  FALSE:607                   1st Qu.:0.4266      FALSE:4260           
##  TRUE :4393                  Median :0.5473      TRUE :740            
##                              Mean   :0.5659                           
##                              3rd Qu.:0.6900                           
##                              Max.   :0.9500                           
##  edge_compatibility privacy_compliance_score bias_detection_score
##  Mode :logical      Min.   :0.7000           Min.   :0.6002      
##  FALSE:2307         1st Qu.:0.7720           1st Qu.:0.6853      
##  TRUE :2693         Median :0.8422           Median :0.7738      
##                     Mean   :0.8419           Mean   :0.7744      
##                     3rd Qu.:0.9134           3rd Qu.:0.8643      
##                     Max.   :0.9799           Max.   :0.9500      
##    timestamp                   data_quality_score performance_index
##  Min.   :2024-12-24 04:16:15   Min.   :0.7500     Min.   :0.3300   
##  1st Qu.:2025-02-05 04:16:16   1st Qu.:0.8064     1st Qu.:0.4282   
##  Median :2025-03-23 04:16:16   Median :0.8625     Median :0.5361   
##  Mean   :2025-03-23 04:29:13   Mean   :0.8637     Mean   :0.5450   
##  3rd Qu.:2025-05-07 04:16:16   3rd Qu.:0.9222     3rd Qu.:0.6474   
##  Max.   :2025-06-22 04:16:16   Max.   :0.9799     Max.   :0.8875   
##  cost_efficiency_ratio autonomous_capability_score
##  Min.   :  5.952       Min.   : 56.28             
##  1st Qu.: 18.107       1st Qu.: 89.83             
##  Median : 28.916       Median :102.63             
##  Mean   : 34.321       Mean   :102.35             
##  3rd Qu.: 45.149       3rd Qu.:116.00             
##  Max.   :219.327       Max.   :147.69
summary(dades[c("task_complexity", "autonomy_level", 
                 "model_architecture", "data_quality_score")])
##  task_complexity  autonomy_level   model_architecture data_quality_score
##  Min.   : 2.000   Min.   : 1.000   Length:5000        Min.   :0.7500    
##  1st Qu.: 4.000   1st Qu.: 4.000   Class :character   1st Qu.:0.8064    
##  Median : 6.000   Median : 6.000   Mode  :character   Median :0.8625    
##  Mean   : 6.083   Mean   : 6.031                      Mean   :0.8637    
##  3rd Qu.: 8.000   3rd Qu.: 8.000                      3rd Qu.:0.9222    
##  Max.   :10.000   Max.   :10.000                      Max.   :0.9799
ggplot(dades, aes(x = task_complexity, y = autonomy_level, color = model_architecture)) +
geom_point(size = 3) +
labs(title = "Settler complexitat i autonomia segons model d'arquitectura",
x = "Task Complexity", y = "Autonomy Level")

ggplot(dades, aes(x = task_complexity, y = autonomy_level, color = agent_type)) +
geom_point(size = 3) +
labs(title = "Settler complexitat i autonomia segons tipus d'agent",
x = "Task Complexity", y = "Autonomy Level")

Altres gràfiques exploratòries per complementar l’exploració (histograma i barplot):

ggplot(dades, aes(x = autonomy_level)) +
  geom_histogram(bins = 30) +
  labs(title = "Histograma autonomy_level", x = "autonomy_level", y = "count")

dades %>%
  count(agent_type, sort = TRUE) %>%
  head(10) %>%
  ggplot(aes(x = reorder(agent_type, n), y = n)) +
  geom_col() +
  coord_flip() +
  labs(title = "Barplot model_architecture", x = "model_architecture", y = "count")

4. Anàlisi descriptiva

dades %>%
group_by(agent_type) %>%
summarise(
mitjana_autonomia = mean(autonomy_level),
mitjana_complexitat = mean(task_complexity)
)
## # A tibble: 16 × 3
##    agent_type           mitjana_autonomia mitjana_complexitat
##    <chr>                            <dbl>               <dbl>
##  1 Code Assistant                    6.10                6.15
##  2 Content Creator                   5.78                5.96
##  3 Customer Service                  6.23                6.30
##  4 Data Analyst                      5.93                5.97
##  5 Document Processor                6.25                6.23
##  6 Email Manager                     6.33                6.19
##  7 Financial Advisor                 6.13                6.09
##  8 HR Recruiter                      5.85                6.01
##  9 Marketing Assistant               6.08                6.11
## 10 Project Manager                   6.05                6.04
## 11 QA Tester                         6                   6.09
## 12 Research Assistant                5.99                6.10
## 13 Sales Assistant                   5.96                6.02
## 14 Social Media Manager              5.83                5.94
## 15 Task Planner                      5.89                6.02
## 16 Translation Agent                 6.06                6.11
dades %>%
group_by(model_architecture) %>%
summarise(
mitjana_autonomia = mean(autonomy_level),
mitjana_complexitat = mean(task_complexity)
)
## # A tibble: 10 × 3
##    model_architecture mitjana_autonomia mitjana_complexitat
##    <chr>                          <dbl>               <dbl>
##  1 Claude-3.5                      6.06                6.04
##  2 CodeT5+                         6.12                6.17
##  3 Falcon-180B                     5.96                5.97
##  4 GPT-4o                          6.10                6.14
##  5 Gemini-Pro                      5.92                6.05
##  6 InstructGPT                     6.11                6.16
##  7 LLaMA-3                         5.94                6.06
##  8 Mixtral-8x7B                    6.02                6.07
##  9 PaLM-2                          6.07                6.15
## 10 Transformer-XL                  6.01                6.03
ggplot(dades, aes(x = agent_type, y = autonomy_level)) +
geom_boxplot() +
coord_flip()

ggplot(dades, aes(x = model_architecture, y = autonomy_level)) +
geom_boxplot() +
coord_flip()

ggplot(dades, aes(x = task_complexity, y = autonomy_level)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(title = "Relació entre complexitat i autonomia",
       x = "Task Complexity", y = "Autonomy Level")
## `geom_smooth()` using formula = 'y ~ x'

5. Conclusió preliminar i pròximes passes