google.api_core.exceptions.InvalidArgument: 400 Qu...

OfficeFusm · 02-23-2025 08:24 PM

Trying to use find_nearest function to use vector querying from Firestore.

Below is my main.py file

embeddings = embed_text([body])
print(f"Type of embeddings before find_nearest: {type(embeddings)}")
if isinstance(embeddings, list):
    if len(embeddings) > 0:
        print(f"Type of first element in embeddings: {type(embeddings[0])}")
    else:
        print("Embeddings list is empty")
    print(f"Length of embeddings before find_nearest: {len(embeddings)}")
    print(f"Value of embeddings before find_nearest: {embeddings}")
else:
    print(f"Embeddings is not a list. Value: {embeddings}")
print('embedding type')
print(type(embeddings))
print([type(x) for x in embeddings])

knn = firestore.Client().collection('products').find_nearest(
    vector_field="embedding_field",
    query_vector=embeddings,
    distance_measure=DistanceMeasure.EUCLIDEAN,
    limit=5,
    distance_threshold=1.0,
    distance_result_field="vector_distance",
)
# knn = db.collection('products').order_by_vector("embedding_field", embeddings).limit(5)

# answer = response.choices[0].message.content
print('Knn')
print(knn)
print(type(knn))

docs = knn.get()
print('Docs')
print(docs)
for doc in docs:
    print(f"{doc.id}, Distance: {doc.get('vector_distance')}")

and my embed_text.py is as follow

def embed_text(texts):
    """Embeds texts with a pre-trained, foundational model.

    Returns:
        A list of lists containing the embedding vectors for each input text
    """

    # A list of texts to be embedded.
    # texts = ["banana muffins? ", "banana bread? banana muffins?"]
    # The dimensionality of the output embeddings.
    # dimensionality = 256
    # The task type for embedding. Check the available tasks in the model's documentation.
    task = "RETRIEVAL_DOCUMENT"


    inputs = [TextEmbeddingInput(text, task) for text in texts]
    # kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
    # embeddings = model.get_embeddings(inputs, **kwargs)
    embeddings = model.get_embeddings(inputs)

    print(embeddings)
    # Example response:
    # [[0.006135190837085247, -0.01462465338408947, 0.004978656303137541, ...], [0.1234434666, ...]],
    # return [embedding.values for embedding in embeddings]
    return np.array(embeddings[0].values).tolist()

I already have a vector field in my products collection in firestore, which is the "embedding_field", and i already created a vector index for it.

However, i still got this error:

google.api_core.exceptions.InvalidArgument: 400 Query Value must be of type vector.

I have no clue what is the issue.