Similar items
This page covers similar item queries for finding items closest to a given item. For query fundamentals, see Query Basics.
A similar item query returns the items which are closest to a given item. This is a retrieval operation that uses similarity search.
You can do item similarity in two ways:
- Content similarity, which finds items with similar attributes (e.g., text descriptions, images). The intuition is: "Items that have similar attributes to this item."
- Collaborative similarity, which finds items that are frequently interacted with by the same users. The intuition is: "People who like this also like" - items that are co-interacted with by similar users.
Content similarity
Content similarity finds items that share similar attributes, such as text descriptions, categories, tags, or other metadata. This is useful when you want to find items that are objectively similar in their characteristics.
Intuition: "Items that have similar attributes to this item."
Prerequisites
- An engine with a content-based embedding (e.g., text embedding) configured
- An item to find similar items for
Query example
To find similar items based on content/attribute similarity, use the
similarity retrieve type with attribute pooling:
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM similarity(embedding_ref='text_embedding',
encoder='item_attribute_pooling',
input_item_id='$item_id',
limit=20)
from shaped import RankQueryBuilder, Similarity
# Find similar items based on content similarity
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
Similarity(
embedding_ref='text_embedding',
encoder={'type': 'item_attribute_pooling', 'input_item_id': '$item_id'},
limit=20
)
)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={'item_id': 'item123'}
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Find similar items based on content similarity
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.similarity({
embeddingRef: 'text_embedding',
encoder: { type: 'item_attribute_pooling', inputItemId: '$item_id' },
limit: 20
})
)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: { item_id: 'item123' }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "similarity",
"embedding_ref": "text_embedding",
"query_encoder": {
"type": "item_attribute_pooling",
"input_item_id": "$parameters.item_id"
},
"limit": 20
}
]
},
"parameters": {
"item_id": "item123"
}
}
Content similarity with model scoring
Combine content similarity with a personalization model and text encoding similarity for more nuanced ranking:
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM similarity(embedding_ref='text_embedding',
encoder='item_attribute_pooling',
input_item_id='$item_id', limit=50)
ORDER BY score(expression='0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(item, embedding_ref=''text_embedding''), text_encoding(user, embedding_ref=''text_embedding''))', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, Similarity
# Content similarity with model scoring
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
Similarity(
embedding_ref='text_embedding',
encoder={'type': 'item_attribute_pooling', 'input_item_id': '$item_id'},
limit=50
)
)
.score(
value_model="0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(item, embedding_ref='text_embedding'), text_encoding(user, embedding_ref='text_embedding'))",
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={
# 'item_id': 'item123',
# 'user_id': 'user123'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Content similarity with model scoring
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.similarity({
embeddingRef: 'text_embedding',
encoder: { type: 'item_attribute_pooling', inputItemId: '$item_id' },
limit: 50
})
)
.score({
valueModel: "0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(item, embedding_ref='text_embedding'), text_encoding(user, embedding_ref='text_embedding'))",
inputUserId: '$user_id',
inputInteractionsItemIds: '$interaction_item_ids'
})
.limit(20)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: {
// item_id: 'item123',
// user_id: 'user123'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "similarity",
"embedding_ref": "text_embedding",
"query_encoder": {
"type": "item_attribute_pooling",
"input_item_id": "$parameters.item_id"
},
"limit": 50
}
],
"score": {
"value_model": "0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(item, embedding_ref='text_embedding'), text_encoding(user, embedding_ref='text_embedding'))",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"item_id": "item123",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Collaborative similarity
Collaborative similarity finds items that are frequently interacted with by the same users, based on interaction patterns rather than item attributes. This captures the "wisdom of the crowd" - if users who liked item A also liked item B, then A and B are similar.
Intuition: "People who like this also like" - items that are co-interacted with by similar users.
Prerequisites
- An engine with a trained collaborative embedding (e.g., ALS) configured
- An item to find similar items for
Query example
To find similar items based on collaborative filtering (interaction patterns),
use the similarity retrieve type with a precomputed item embedding:
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM similarity(embedding_ref='als_embedding',
encoder='precomputed_item',
input_item_id='$item_id',
limit=20)
from shaped import RankQueryBuilder, Similarity
# Find similar items based on collaborative filtering
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
Similarity(
embedding_ref='als_embedding',
encoder={'type': 'precomputed_item', 'input_item_id': '$item_id'},
limit=20
)
)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={'item_id': 'item123'}
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Find similar items based on collaborative filtering
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.similarity({
embeddingRef: 'als_embedding',
encoder: { type: 'precomputed_item', inputItemId: '$item_id' },
limit: 20
})
)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: { item_id: 'item123' }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "similarity",
"embedding_ref": "als_embedding",
"query_encoder": {
"type": "precomputed_item",
"input_item_id": "$parameters.item_id"
},
"limit": 20
}
]
},
"parameters": {
"item_id": "item123"
}
}
Collaborative similarity with model scoring
Combine collaborative similarity with a personalization model to rank similar items by predicted user engagement:
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM similarity(embedding_ref='als_embedding',
encoder='precomputed_item',
input_item_id='$item_id', limit=50)
ORDER BY score(expression='0.6 * click_through_rate + 0.4 * conversion_rate', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, Similarity
# Collaborative similarity with model scoring
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
Similarity(
embedding_ref='als_embedding',
encoder={'type': 'precomputed_item', 'input_item_id': '$item_id'},
limit=50
)
)
.score(
value_model='0.6 * click_through_rate + 0.4 * conversion_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={
# 'item_id': 'item123',
# 'user_id': 'user123'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Collaborative similarity with model scoring
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.similarity({
embeddingRef: 'als_embedding',
encoder: { type: 'precomputed_item', inputItemId: '$item_id' },
limit: 50
})
)
.score({
valueModel: '0.6 * click_through_rate + 0.4 * conversion_rate',
inputUserId: '$user_id',
inputInteractionsItemIds: '$interaction_item_ids'
})
.limit(20)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: {
// item_id: 'item123',
// user_id: 'user123'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "similarity",
"embedding_ref": "als_embedding",
"query_encoder": {
"type": "precomputed_item",
"input_item_id": "$parameters.item_id"
},
"limit": 50
}
],
"score": {
"value_model": "0.6 * click_through_rate + 0.4 * conversion_rate",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"item_id": "item123",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Similar items with multi-objective scoring
Combine multiple similarity signals and scoring models to find items that are both similar and likely to engage the user. This approach balances content similarity, collaborative similarity, and personalization.
Prerequisites
- An engine with both content and collaborative embeddings configured
- Multiple trained scoring models (e.g.,
click_through_rate,conversion_rate) - An item to find similar items for
- Optionally, a user ID for personalization
Query example
Retrieve candidates from multiple similarity sources and score using an ensemble:
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM similarity(embedding_ref='text_embedding',
encoder='item_attribute_pooling',
input_item_id='$item_id', limit=50, name='content'),
similarity(embedding_ref='als_embedding',
encoder='precomputed_item',
input_item_id='$item_id', limit=50, name='collab')
ORDER BY score(expression='0.4 * retrieval.content + 0.3 * retrieval.collab + 0.2 * click_through_rate + 0.1 * conversion_rate', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, Similarity
# Multi-objective scoring with multiple similarity sources
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
Similarity(
embedding_ref='text_embedding',
encoder={'type': 'item_attribute_pooling', 'input_item_id': '$item_id'},
limit=50,
name='content'
),
Similarity(
embedding_ref='als_embedding',
encoder={'type': 'precomputed_item', 'input_item_id': '$item_id'},
limit=50,
name='collab'
)
)
.score(
value_model='0.4 * retrieval.content + 0.3 * retrieval.collab + 0.2 * click_through_rate + 0.1 * conversion_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={
# 'item_id': 'item123',
# 'user_id': 'user123'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Multi-objective scoring with multiple similarity sources
const query = new RankQueryBuilder()
.from('item')
.retrieve(
step => step.similarity({
embeddingRef: 'text_embedding',
encoder: { type: 'item_attribute_pooling', inputItemId: '$item_id' },
limit: 50,
name: 'content'
}),
step => step.similarity({
embeddingRef: 'als_embedding',
encoder: { type: 'precomputed_item', inputItemId: '$item_id' },
limit: 50,
name: 'collab'
})
)
.score({
valueModel: '0.4 * retrieval.content + 0.3 * retrieval.collab + 0.2 * click_through_rate + 0.1 * conversion_rate',
inputUserId: '$user_id',
inputInteractionsItemIds: '$interaction_item_ids'
})
.limit(20)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: {
// item_id: 'item123',
// user_id: 'user123'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"name": "content",
"type": "similarity",
"embedding_ref": "text_embedding",
"query_encoder": {
"type": "item_attribute_pooling",
"input_item_id": "$parameters.item_id"
},
"limit": 50
},
{
"name": "collab",
"type": "similarity",
"embedding_ref": "als_embedding",
"query_encoder": {
"type": "precomputed_item",
"input_item_id": "$parameters.item_id"
},
"limit": 50
}
],
"score": {
"value_model": "0.4 * retrieval.content + 0.3 * retrieval.collab + 0.2 * click_through_rate + 0.1 * conversion_rate",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"item_id": "item123",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}