-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodels.py
63 lines (49 loc) · 1.83 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from django.db import models
from pgvector.django import VectorField, HnswIndex
from rest_framework import serializers
class EmbedSmallExpression(models.Expression):
"""
An expression to automatically embed any text column in the same table.
"""
output_field = VectorField(null=False, blank=False, dimensions=384)
def __init__(self, field):
self.embedding_field = field
def as_sql(self, compiler, connection, template=None):
return f"pgml.embed('intfloat/e5-small', {self.embedding_field})", None
class TodoItem(models.Model):
"""
TODO item.
"""
description = models.CharField(max_length=256)
due_date = models.DateField()
completed = models.BooleanField(default=False)
embedding = models.GeneratedField(
expression=EmbedSmallExpression("description"),
output_field=VectorField(null=False, blank=False, dimensions=384),
db_persist=True,
)
def __str__(self):
return self.description
class Meta:
indexes = [
# Adding an HNSW index on the embedding column
# allows approximate nearest neighbor searches on datasets
# that are so large that the exact search would be too slow.
#
# The trade-off here is the results are approximate and
# may miss some of the nearest neighbors.
HnswIndex(
name="todoitem_embedding_hnsw_index",
fields=["embedding"],
m=16,
ef_construction=64,
opclasses=["vector_cosine_ops"],
)
]
class TodoItemSerializer(serializers.ModelSerializer):
"""
TODO item serializer used by the Django Rest Framework.
"""
class Meta:
model = TodoItem
fields = ["id", "description", "due_date", "completed", "embedding"]