Personalized search
This guide describes how you can implement personalized search that combines hybrid search with a GBDT scoring model trained on user interactions to personalize results.
First, instantiate the Shaped client:
- Python
- TypeScript
from shaped import Client
client = Client(api_key="YOUR_KEY_HERE")
import { Client } from '@shaped.ai/client';
const client = new Client('YOUR_API_KEY');
Upload data
To do personalized search, you need to connect to both an item table and an interaction table. The interaction table tracks user behavior to train the personalization model.
The first step is to declare your item table:
- Python
- TypeScript
item_table_config = {
"schema_type": "CUSTOM",
"name": "pixar_movies",
"column_schema": {
"item_id": "Int64",
"movie_title": "String",
"poster_url": "String",
"description": "String",
"release_date": "String",
"cast": "Array(String)",
},
}
client.create_table(item_table_config)
import type { TableRequest } from '@shaped.ai/client';
const itemTableConfig: TableRequest = {
schema_type: 'CUSTOM',
name: 'pixar_movies',
column_schema: {
item_id: 'Int64',
movie_title: 'String',
poster_url: 'String',
description: 'String',
release_date: 'String',
cast: 'Array(String)',
},
};
await client.createTable(itemTableConfig);
Upload your item data:
- Python
- TypeScript
try:
client.insert_table_rows("pixar_movies", records)
except NameError:
# records may be defined elsewhere in your application
pass
Now create an interaction table to track user behavior:
- Python
- TypeScript
interaction_table_config = {
"schema_type": "CUSTOM",
"name": "user_interactions",
"column_schema": {
"user_id": "String",
"item_id": "Int64",
"event_type": "String",
"timestamp": "String",
},
}
client.create_table(interaction_table_config)
const interactionTableConfig: TableRequest = {
schema_type: 'CUSTOM',
name: 'user_interactions',
column_schema: {
user_id: 'String',
item_id: 'Int64',
event_type: 'String',
timestamp: 'String',
},
};
await client.createTable(interactionTableConfig);
Upload sample interaction data:
- Python
- TypeScript
interactions = [
{"user_id": "user1", "item_id": 187541, "event_type": "click", "timestamp": "2024-01-15T10:00:00Z"},
{"user_id": "user1", "item_id": 177765, "event_type": "click", "timestamp": "2024-01-16T14:30:00Z"},
{"user_id": "user1", "item_id": 1, "event_type": "purchase", "timestamp": "2024-01-17T09:15:00Z"},
{"user_id": "user2", "item_id": 134853, "event_type": "click", "timestamp": "2024-01-15T11:20:00Z"},
{"user_id": "user2", "item_id": 170957, "event_type": "click", "timestamp": "2024-01-16T16:45:00Z"},
]
client.insert_table_rows("user_interactions", interactions)
const interactions = [
{ user_id: 'user1', item_id: 187541, event_type: 'click', timestamp: '2024-01-15T10:00:00Z' },
{ user_id: 'user1', item_id: 177765, event_type: 'click', timestamp: '2024-01-16T14:30:00Z' },
{ user_id: 'user1', item_id: 1, event_type: 'purchase', timestamp: '2024-01-17T09:15:00Z' },
{ user_id: 'user2', item_id: 134853, event_type: 'click', timestamp: '2024-01-15T11:20:00Z' },
{ user_id: 'user2', item_id: 170957, event_type: 'click', timestamp: '2024-01-16T16:45:00Z' },
];
await client.insertTableRows('user_interactions', interactions);
Set up your engine
Now you will configure the personalized search engine with hybrid search and GBDT training.
Start by instantiating the engine configuration class:
- Python
- TypeScript
from shaped.autogen.models.engine_config_v2 import EngineConfigV2
from shaped.autogen.models.data_config import DataConfig
personalized_search_engine = EngineConfigV2(
name="personalized_search",
data=DataConfig(),
)
import { Engine } from '@shaped.ai/client';
const personalizedSearchEngine = new Engine('personalized_search');
Connect engine to data
Connect both the item table and interaction table to your engine:
- Python
- TypeScript
from shaped.autogen.models.data_config_interaction_table import DataConfigInteractionTable
from shaped.autogen.models.reference_table_config import ReferenceTableConfig
personalized_search_engine.data = DataConfig(
item_table=DataConfigInteractionTable(
ReferenceTableConfig(name="pixar_movies")
),
interaction_table=DataConfigInteractionTable(
ReferenceTableConfig(name="user_interactions")
),
)
personalizedSearchEngine.items('pixar_movies');
personalizedSearchEngine.interactions('user_interactions');
Configure hybrid search
Configure both lexical and vector search as in the hybrid search example:
- Python
- TypeScript
from shaped.autogen.models.index_config import IndexConfig
from shaped.autogen.models.search_config import SearchConfig
from shaped.autogen.models.embedding_config import EmbeddingConfig
from shaped.autogen.models.encoder import Encoder
from shaped.autogen.models.hugging_face_encoder import HuggingFaceEncoder
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
personalized_search_engine.index = IndexConfig(
lexical_search=SearchConfig(
item_fields=["movie_title", "description"],
fuzziness_edit_distance=0,
),
embeddings=[
EmbeddingConfig(
name="movie_text_embedding",
encoder=Encoder(
HuggingFaceEncoder(
model_name=embedding_model,
item_fields=["movie_title", "description"],
)
),
)
],
)
const embeddingModel = 'sentence-transformers/all-MiniLM-L6-v2';
personalizedSearchEngine.withLexicalSearch({
item_fields: ['movie_title', 'description'],
});
personalizedSearchEngine.withEmbedding({
name: 'movie_text_embedding',
encoder: {
type: 'hugging_face',
model_name: embeddingModel,
item_fields: ['movie_title', 'description'],
},
});
Configure GBDT training
Configure a GBDT model to learn from user interactions and personalize search results:
- Python
- TypeScript
from shaped.autogen.models.training_config import TrainingConfig
from shaped.autogen.models.models_inner import ModelsInner
from shaped.autogen.models.shaped_internal_recsys_policies_gbdt_gbdt_policy_config import ShapedInternalRecsysPoliciesGbdtGBDTPolicyConfig
personalized_search_engine.training = TrainingConfig(
models=[
ModelsInner(
ShapedInternalRecsysPoliciesGbdtGBDTPolicyConfig(
policy_type="gbdt",
name="click_through_rate",
)
)
],
)
personalizedSearchEngine.withTraining({
models: [
{
policy_type: 'gbdt',
name: 'click_through_rate',
},
],
});
Start indexing and training
After configuring your engine's data, index, and training, use the create engine method to start both indexing and model training:
- Python
- TypeScript
client.create_engine(engine_config=personalized_search_engine)
await client.createEngine(personalizedSearchEngine);
Make a personalized search query
After the engine is finished indexing and training, you can search with personalization.
Use hybrid search retrievers combined with a score expression that weights results by the trained GBDT model:
- Python
- TypeScript
- ShapedQL
from shaped import RankQueryBuilder, TextSearch
query = (
RankQueryBuilder()
.from_entity('item')
.retrieve([
TextSearch(
input_text_query='$query',
mode={'type': 'lexical'},
limit=50,
name='lexical_search'
),
TextSearch(
input_text_query='$query',
mode={'type': 'vector', 'text_embedding_ref': 'movie_text_embedding'},
limit=50,
name='vector_search'
)
])
.score(
value_model='click_through_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids'
)
.limit(20)
.build()
)
results = client.execute_query(
engine_name="personalized_search",
query=query,
parameters={
"query": "Incredibles",
"user_id": "user1",
"interaction_item_ids": ["187541", "177765", "1"]
},
return_metadata=True,
)
import { RankQueryBuilder } from '@shaped.ai/client';
const query = new RankQueryBuilder()
.from('item')
.retrieve(step =>
step.textSearch(
'$query',
{ type: 'lexical' },
{ limit: 50, name: 'lexical_search' },
)
)
.retrieve(step =>
step.textSearch(
'$query',
{ type: 'vector', textEmbeddingRef: 'movie_text_embedding' },
{ limit: 50, name: 'vector_search' },
)
)
.limit(20)
.build();
const results = await client.executeQuery(
'personalized_search',
query,
{
query: 'Incredibles',
user_id: 'user1',
interaction_item_ids: ['187541', '177765', '1']
},
true,
);
SELECT score(expression='click_through_rate',
input_user_id='$user_id',
input_interactions_item_ids='$interaction_item_ids') AS s, *
FROM retrieve(
text_search(
query='$query',
mode='lexical',
limit=50,
name='lexical_search'
),
text_search(
query='$query',
mode='vector',
text_embedding_ref='movie_text_embedding',
limit=50,
name='vector_search'
)
)
ORDER BY s LIMIT 20