Search
This page covers search queries for finding items based on text queries. For query fundamentals, see Query Basics.
Search queries allow you to find items based on a text query. Shaped supports multiple search modes, from simple keyword matching to advanced personalized search.
Lexical search
Lexical search finds items that contain the exact keywords from the query, with optional fuzzy matching for typos. This is the simplest form of search and works well when users know exactly what they're looking for.
Prerequisites
- Lexical search configured on searchable text fields
- A search query text
Query example
- ShapedQL
- Python SDK
- TypeScript SDK
- JSON
SELECT *
FROM text_search(query='$query_text', mode='lexical', fuzziness=2, limit=20)
from shaped import RankQueryBuilder, TextSearch
# Basic lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=20
)
)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Basic lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 20
})
)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 20
}
]
},
"parameters": {
"query_text": "Blue shirt"
}
}
Semantic search
Semantic search uses vector embeddings to find items that are semantically similar to the query text, even if they don't contain the exact keywords. This enables users to find items by meaning rather than exact word matches.
Prerequisites
- An engine with a text embedding configured
- A search query text
Query example
- ShapedQL
- JSON
from shaped import RankQueryBuilder, TextSearch
# Semantic search with text embeddings
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=20
)
)
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'comfortable summer outfit'
# }
# )
# results = response['items']
import { RankQueryBuilder } from '@shaped-ai/api';
// Semantic search with text embeddings
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: {
type: 'vector',
textEmbeddingRef: 'text_embedding'
},
limit: 20
})
)
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'comfortable summer outfit'
// }
// });
// const results = response.items;
SELECT *
FROM text_search(
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=20
)
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 20
}
]
},
"parameters": {
"query_text": "Blue shirt"
}
}
Text reranking
For text-heavy search, you can retrieve candidates (lexical, vector, or hybrid), then rerank with a heavier-weight scoring expression.
Common zero-shot rerankers include colbert_v2() and cross_encoder(). See the
reference docs in ShapedQL.
Example (hybrid retrieval + ColBERTv2 reranking):
SELECT *
FROM text_search(
query='$params.query',
mode='vector',
text_embedding_ref='text_embedding',
limit=50,
name='vector_search'
),
text_search(
query='$params.query',
mode='lexical',
fuzziness=2,
limit=50,
name='lexical_search'
)
ORDER BY score(expression='colbert_v2(item, $params.query)')
LIMIT 20
Hybrid search
Hybrid search combines semantic and lexical search to return a blended list of results. This balances semantic understanding with exact keyword matching, providing better recall than either approach alone.
Prerequisites
- An engine with a text embedding configured for vector search
- Lexical search configured on searchable text fields
- A search query text
Query example
This example retrieves candidates from two sources (vector search and lexical search), then returns the blended results:
- ShapedQL
- JSON
from shaped import RankQueryBuilder, TextSearch
# Hybrid search combining vector and lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve([
# Vector search for semantic matches
TextSearch(
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=50 # Retrieve 50 semantic matches
),
# Lexical search for keyword matches
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=50 # Retrieve 50 lexical matches
)
])
.limit(20) # Return top 20 results after blending
.build()
)
# Example usage with client
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'comfortable summer outfit'
# }
# )
# results = response['items']
import { RankQueryBuilder } from '@shaped-ai/api';
// Hybrid search combining vector and lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve([
// Vector search for semantic matches
step => step.textSearch({
query: '$query_text',
mode: {
type: 'vector',
textEmbeddingRef: 'text_embedding'
},
limit: 50 // Retrieve 50 semantic matches
}),
// Lexical search for keyword matches
step => step.textSearch({
query: '$query_text',
mode: {
type: 'lexical',
fuzziness: 2
},
limit: 50 // Retrieve 50 lexical matches
})
])
.limit(20) // Return top 20 results after blending
.build();
// Example usage with client
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'comfortable summer outfit'
// }
// });
// const results = response.items;
SELECT *
FROM text_search(
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=50
),
text_search(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=50
)
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Basic lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=20
)
)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Basic lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 20
})
)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
},
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 50
}
],
"limit": 20
},
"parameters": {
"query_text": "Blue shirt"
}
}
Tuning the blend
You can adjust the limit on each retriever to control the balance between
semantic and lexical results. A higher limit on one retriever will give it
more influence on the final results.
Personalized hybrid search
Personalized hybrid search adds a scoring stage that re-ranks the blended search results based on user preferences. This is useful when you want search results that are both relevant to the query and personalized to the user's taste.
Prerequisites
- An engine with a text embedding configured for vector search
- Lexical search configured on searchable text fields
- A trained scoring model (e.g.,
click_through_rate,conversion_rate) - A user ID to personalize for
- A search query text
Query example
This example retrieves candidates from both vector and lexical search, then scores them using a personalization model:
- ShapedQL
- JSON
SELECT *
FROM text_search(query='$query_text', mode='vector',
text_embedding_ref='text_embedding', limit=50),
text_search(query='$query_text', mode='lexical', fuzziness=2, limit=50)
ORDER BY score(expression='click_through_rate', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Basic lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=20
)
)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Basic lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 20
})
)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
},
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 50
}
],
"score": {
"value_model": "click_through_rate",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"query_text": "Blue shirt",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Combining multiple scoring models
You can combine multiple scoring models using a weighted expression:
- ShapedQL
- JSON
SELECT *
FROM text_search(query='$query_text', mode='vector',
text_embedding_ref='text_embedding', limit=50),
text_search(query='$query_text', mode='lexical', fuzziness=2, limit=50)
ORDER BY score(expression='0.7 * click_through_rate + 0.3 * conversion_rate', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Combined search with model scoring
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve([
TextSearch(
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=50
),
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=50
)
])
.score(
value_model='0.7 * click_through_rate + 0.3 * conversion_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt',
# 'user_id': 'user123'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Combined search with model scoring
const query = new RankQueryBuilder()
.from('item')
.retrieve([
step => step.textSearch({
query: '$query_text',
mode: { type: 'vector', textEmbeddingRef: 'text_embedding' },
limit: 50
}),
step => step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 50
})
])
.score({
valueModel: '0.7 * click_through_rate + 0.3 * conversion_rate',
inputUserId: '$user_id',
inputInteractionsItemIds: '$interaction_item_ids'
})
.limit(20)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt',
// user_id: 'user123'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
},
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 50
}
],
"score": {
"value_model": "0.7 * click_through_rate + 0.3 * conversion_rate",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"query_text": "Blue shirt",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Blending retrieval scores with models
Combine retrieval scores from multiple search retrievers with model predictions for more nuanced ranking:
- ShapedQL
- JSON
SELECT *
FROM text_search(query='$query_text', mode='vector',
text_embedding_ref='text_embedding', limit=50,
name='vector_search'),
text_search(query='$query_text', mode='lexical', fuzziness=2, limit=50,
name='lexical_search')
ORDER BY score(expression='0.5 * retrieval.vector_search + 0.3 * retrieval.lexical_search + 0.2 * click_through_rate', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Blending retrieval scores with model predictions
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve([
TextSearch(
name='vector_search',
query='$query_text',
mode='vector',
text_embedding_ref='text_embedding',
limit=50
),
TextSearch(
name='lexical_search',
query='$query_text',
mode='lexical',
fuzziness=2,
limit=50
)
])
.score(
value_model='0.5 * retrieval.vector_search + 0.3 * retrieval.lexical_search + 0.2 * click_through_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt',
# 'user_id': 'user123'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Blending retrieval scores with model predictions
const query = new RankQueryBuilder()
.from('item')
.retrieve([
step => step.textSearch({
name: 'vector_search',
query: '$query_text',
mode: { type: 'vector', textEmbeddingRef: 'text_embedding' },
limit: 50
}),
step => step.textSearch({
name: 'lexical_search',
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 50
})
])
.score({
valueModel: '0.5 * retrieval.vector_search + 0.3 * retrieval.lexical_search + 0.2 * click_through_rate',
inputUserId: '$user_id',
inputInteractionsItemIds: '$interaction_item_ids'
})
.limit(20)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt',
// user_id: 'user123'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"name": "vector_search",
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
},
{
"name": "lexical_search",
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 50
}
],
"score": {
"value_model": "0.5 * retrieval.vector_search + 0.3 * retrieval.lexical_search + 0.2 * click_through_rate",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"query_text": "Blue shirt",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Using text encodings for semantic similarity
Score search results using semantic similarity between user preferences and item text encodings:
- ShapedQL
- JSON
SELECT *
FROM text_search(query='$query_text', mode='vector',
text_embedding_ref='text_embedding', limit=50)
ORDER BY score(expression='0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(user, embedding_ref=''text_embedding''), text_encoding(item, embedding_ref=''text_embedding''))', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Basic lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=20
)
)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Basic lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 20
})
)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
}
],
"score": {
"value_model": "0.7 * click_through_rate + 0.3 * cosine_similarity(text_encoding(user, embedding_ref='text_embedding'), text_encoding(item, embedding_ref='text_embedding'))",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"query_text": "Blue shirt",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}
Incorporating item attributes in search ranking
Boost or penalize search results based on item attributes like price, rating, or other metadata:
- ShapedQL
- JSON
SELECT *
FROM text_search(query='$query_text', mode='vector',
text_embedding_ref='text_embedding', limit=50),
text_search(query='$query_text', mode='lexical', fuzziness=2, limit=50)
ORDER BY score(expression='0.6 * click_through_rate - 0.05 * item.price + 0.2 * item.rating + 0.15 * item.review_count', input_user_id='$user_id', input_interactions_item_ids='$interaction_item_ids')
LIMIT 20
from shaped import RankQueryBuilder, TextSearch
# Basic lexical search
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve(
TextSearch(
query='$query_text',
mode='lexical',
fuzziness=2,
limit=20
)
)
.build()
)
# With parameters
# response = client.rank(
# query=query,
# parameters={
# 'query_text': 'Blue shirt'
# }
# )
import { RankQueryBuilder } from '@shaped-ai/api';
// Basic lexical search
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch({
query: '$query_text',
mode: { type: 'lexical', fuzziness: 2 },
limit: 20
})
)
.build();
// With parameters
// const response = await client.rank({
// query,
// parameters: {
// query_text: 'Blue shirt'
// }
// });
{
"query": {
"type": "rank",
"from": "item",
"retrieve": [
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "vector", "text_embedding_ref": "text_embedding" },
"limit": 50
},
{
"type": "text_search",
"input_text_query": "$parameters.query_text",
"mode": { "type": "lexical", "fuzziness_edit_distance": 2 },
"limit": 50
}
],
"score": {
"value_model": "0.6 * click_through_rate - 0.05 * item.price + 0.2 * item.rating + 0.15 * item.review_count",
"input_user_id": "$parameters.user_id",
"input_interactions_item_ids": "$parameters.interaction_item_ids"
},
"limit": 20
},
"parameters": {
"query_text": "Blue shirt",
"user_id": "user123",
"interaction_item_ids": ["item789", "item012"]
}
}