Objective :

# Generate Response : 
response = client.embeddings.create(
  # What model are we using ?
  model = "text-embedding-3-small",
  # What do you want to embed ? 
  input = "Yeh! Buddy"
)

# Generate dictionary : 
resp_dict = response.model_dump()

# What are the top level info : 
resp_dict.keys()
## dict_keys(['data', 'model', 'object', 'usage'])
# Data Structure : 
type(resp_dict)
## <class 'dict'>
type(resp_dict["data"])
## <class 'list'>
type(resp_dict["data"][0]["embedding"])
## <class 'list'>
type(resp_dict["data"][0]["embedding"][0])
## <class 'float'>
# Explore data structure : 
resp_dict.keys()
## dict_keys(['data', 'model', 'object', 'usage'])
resp_dict["data"][0]["embedding"][0:4]
## [-0.02148810774087906, 0.011688508093357086, -0.06987540423870087, 0.014997648075222969]
resp_dict["model"]
## 'text-embedding-3-small'
resp_dict["usage"]
## {'prompt_tokens': 4, 'total_tokens': 4}
import pandas as pd

# generate df : 
df = pd.read_csv("ch_6_df.csv")
df.columns
## Index(['ch6_span_vec', 'ch6_eng_vec'], dtype='object')
# Make into lst : 
ch6_eng_lst = list(df["ch6_eng_vec"])
ch6_eng_lst
## ['coat', 'to have just done something', 'department store', 'yellow', 'orange', 'last year', 'last night', 'the day before yesterday', 'yesterday', 'blue', 'cheap', 'white', 'blouse', 'purse; bag', 'boot', 'good', 'each', 'cash register', 'sock(s)', 'shirt', 't-shirt', 'expensive', 'wallet', 'shopping mall', 'jacket', 'belt', 'customer', 'color', 'to buy online', 'to drive', 'to know; to be acquainted with', 'tie', 'short (in length)', 'to cost', 'to give', 'suddenly', 'clerk', 'from', 'money', 'twice', '(cash)', 'elegant', 'skirt', '(sun)glasses', 'to spend (money)', 'gray', 'gloves', 'to match (with)', 'until', 'beautiful', 'raincoat', 'jeans', 'long', 'to wear; to take', 'crazy', 'brown', 'pantyhose; stockings', 'market', '(open-air) market', 'purple', 'black', 'new', 'to offer', 'other; another', 'to pay', 'pants', 'shorts', 'a pair (of shoes)', 'to seem', 'last; past', 'poor', '(fixed; set) price', 'to lend; to loan', 'sale', 'gift', 'to bargain', 'rich', 'red', 'clothes', 'underwear', 'pink', 'to know; to know how', 'sandals', 'last week', 'hat', 'sweater', 'credit card', 'debit card', 'store', 'to translate', 'suit', 'bathing suit', 'to wear; to use', 'salesperson', 'to sell', 'green', 'dress', 'once', 'already', 'sneakers']
# Access each using list comprehension : 
resp = client.embeddings.create(
  # Specify mdl : 
  model = "text-embedding-3-small",
  input = ch6_eng_lst
)
# Generate dict
resp = resp.model_dump()
# Used 221 tokens
resp["usage"]
## {'prompt_tokens': 221, 'total_tokens': 221}
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Create TSNE instance
tsne = TSNE(n_components=2, perplexity=5)

# Extract embeddings
embeddings = np.array([item["embedding"] for item in resp["data"]])
ch6_eng_lst
## ['coat', 'to have just done something', 'department store', 'yellow', 'orange', 'last year', 'last night', 'the day before yesterday', 'yesterday', 'blue', 'cheap', 'white', 'blouse', 'purse; bag', 'boot', 'good', 'each', 'cash register', 'sock(s)', 'shirt', 't-shirt', 'expensive', 'wallet', 'shopping mall', 'jacket', 'belt', 'customer', 'color', 'to buy online', 'to drive', 'to know; to be acquainted with', 'tie', 'short (in length)', 'to cost', 'to give', 'suddenly', 'clerk', 'from', 'money', 'twice', '(cash)', 'elegant', 'skirt', '(sun)glasses', 'to spend (money)', 'gray', 'gloves', 'to match (with)', 'until', 'beautiful', 'raincoat', 'jeans', 'long', 'to wear; to take', 'crazy', 'brown', 'pantyhose; stockings', 'market', '(open-air) market', 'purple', 'black', 'new', 'to offer', 'other; another', 'to pay', 'pants', 'shorts', 'a pair (of shoes)', 'to seem', 'last; past', 'poor', '(fixed; set) price', 'to lend; to loan', 'sale', 'gift', 'to bargain', 'rich', 'red', 'clothes', 'underwear', 'pink', 'to know; to know how', 'sandals', 'last week', 'hat', 'sweater', 'credit card', 'debit card', 'store', 'to translate', 'suit', 'bathing suit', 'to wear; to use', 'salesperson', 'to sell', 'green', 'dress', 'once', 'already', 'sneakers']

# Reduce dimensionality
embeddings_2d = tsne.fit_transform(embeddings)
# Plot
plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1])

# Optional: annotate with labels
for i, word in enumerate(ch6_eng_lst):
    plt.annotate(word, (embeddings_2d[i, 0], embeddings_2d[i, 1]))

plt.show()