ypdate search
This commit is contained in:
@@ -0,0 +1,102 @@
|
|||||||
|
"""
|
||||||
|
Add an IMMUTABLE ``normalize_persian(text)`` SQL function and GIN trigram
|
||||||
|
indexes that match it. Lets the search view filter and rank without doing a
|
||||||
|
per-row ``translate()`` on a sequential scan — drops query time from seconds
|
||||||
|
to tens of milliseconds.
|
||||||
|
|
||||||
|
The FROM/TO strings here MUST stay aligned with ``_SQL_NORM_FROM`` /
|
||||||
|
``_SQL_NORM_TO`` in ``product/views.py``. If you change one, change the other
|
||||||
|
and add a follow-up migration that recreates the function + indexes (Postgres
|
||||||
|
matches expression indexes by exact SQL form, so a stale function would
|
||||||
|
silently bypass the indexes).
|
||||||
|
"""
|
||||||
|
from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
|
# Mirror of product.views._SQL_NORM_FROM / _SQL_NORM_TO.
|
||||||
|
_SQL_NORM_FROM = (
|
||||||
|
'يك' # Arabic ya/kaf -> Persian
|
||||||
|
'ﻱﻲﻳﻴ' # Arabic ya presentation forms
|
||||||
|
'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
|
||||||
|
'آأإٱ' # alef variants
|
||||||
|
'ؤ' # waw with hamza
|
||||||
|
'ئ' # ya with hamza
|
||||||
|
'ةۀ' # ta marbuta / he with hamza
|
||||||
|
'ﻩﻪﻫﻬ' # he presentation forms
|
||||||
|
'' # ZWNJ, ZWJ -> space
|
||||||
|
'۰۱۲۳۴۵۶۷۸۹' # Persian digits
|
||||||
|
'٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
|
||||||
|
# Deletions (no matching char in TO):
|
||||||
|
'ـ' # tatweel
|
||||||
|
'' # LRM, RLM
|
||||||
|
'ًٌٍَُِّْ' # tashkeel
|
||||||
|
)
|
||||||
|
_SQL_NORM_TO = (
|
||||||
|
'یک'
|
||||||
|
'یییی'
|
||||||
|
'کککک'
|
||||||
|
'اااا'
|
||||||
|
'و'
|
||||||
|
'ی'
|
||||||
|
'هه'
|
||||||
|
'هههه'
|
||||||
|
' '
|
||||||
|
'0123456789'
|
||||||
|
'0123456789'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _pg_str(s):
|
||||||
|
"""Quote a Python string as a PostgreSQL string literal."""
|
||||||
|
return "'" + s.replace("'", "''") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
CREATE_FUNCTION_SQL = f"""
|
||||||
|
CREATE OR REPLACE FUNCTION normalize_persian(t text) RETURNS text AS $$
|
||||||
|
SELECT lower(translate(t, {_pg_str(_SQL_NORM_FROM)}, {_pg_str(_SQL_NORM_TO)}));
|
||||||
|
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
"""
|
||||||
|
|
||||||
|
DROP_FUNCTION_SQL = "DROP FUNCTION IF EXISTS normalize_persian(text);"
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("product", "0075_productvariant_guarantee"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql=CREATE_FUNCTION_SQL,
|
||||||
|
reverse_sql=DROP_FUNCTION_SQL,
|
||||||
|
),
|
||||||
|
# GIN trigram indexes on the normalized expression. PostgreSQL matches
|
||||||
|
# queries that use exactly ``normalize_persian(<col>)`` against these
|
||||||
|
# indexes, so the views.py wrapper must call the SQL function (not
|
||||||
|
# inline translate/lower) for the index to be used.
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql=(
|
||||||
|
"CREATE INDEX IF NOT EXISTS product_norm_name_trgm_idx "
|
||||||
|
"ON product_productmodel "
|
||||||
|
"USING gin (normalize_persian(name) gin_trgm_ops);"
|
||||||
|
),
|
||||||
|
reverse_sql="DROP INDEX IF EXISTS product_norm_name_trgm_idx;",
|
||||||
|
),
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql=(
|
||||||
|
"CREATE INDEX IF NOT EXISTS product_norm_keywords_trgm_idx "
|
||||||
|
"ON product_productmodel "
|
||||||
|
"USING gin (normalize_persian(meta_keywords) gin_trgm_ops);"
|
||||||
|
),
|
||||||
|
reverse_sql="DROP INDEX IF EXISTS product_norm_keywords_trgm_idx;",
|
||||||
|
),
|
||||||
|
migrations.RunSQL(
|
||||||
|
sql=(
|
||||||
|
"CREATE INDEX IF NOT EXISTS subcategory_norm_name_trgm_idx "
|
||||||
|
"ON product_subcategorymodel "
|
||||||
|
"USING gin (normalize_persian(name) gin_trgm_ops);"
|
||||||
|
),
|
||||||
|
reverse_sql="DROP INDEX IF EXISTS subcategory_norm_name_trgm_idx;",
|
||||||
|
),
|
||||||
|
]
|
||||||
+186
-19
@@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from .models import ProductModel
|
from .models import ProductModel
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
from django.core.paginator import Paginator
|
from django.core.paginator import Paginator
|
||||||
@@ -6,8 +7,8 @@ from .models import *
|
|||||||
from .serializers import *
|
from .serializers import *
|
||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
from django.db.models import Q, Value
|
from django.db.models import Q, Value, Case, When, FloatField, F, CharField, Func
|
||||||
from django.db.models.functions import Coalesce
|
from django.db.models.functions import Coalesce, Length
|
||||||
from django.contrib.postgres.search import TrigramSimilarity
|
from django.contrib.postgres.search import TrigramSimilarity
|
||||||
from django.shortcuts import get_object_or_404
|
from django.shortcuts import get_object_or_404
|
||||||
from rest_framework.permissions import IsAuthenticatedOrReadOnly
|
from rest_framework.permissions import IsAuthenticatedOrReadOnly
|
||||||
@@ -21,6 +22,179 @@ from home.models import ShowCaseSlider
|
|||||||
from home.serializers import ShowCaseSliderSerialzier
|
from home.serializers import ShowCaseSliderSerialzier
|
||||||
from order.models import Cart, CartItem
|
from order.models import Cart, CartItem
|
||||||
from django.db.models import Min, Max, Value
|
from django.db.models import Min, Max, Value
|
||||||
|
|
||||||
|
|
||||||
|
_PERSIAN_CHAR_MAP = str.maketrans({
|
||||||
|
# Arabic letters -> Persian equivalents
|
||||||
|
'ي': 'ی', 'ك': 'ک',
|
||||||
|
# Arabic ya/kaf presentation forms -> Persian
|
||||||
|
'ﻱ': 'ی', 'ﻲ': 'ی', 'ﻳ': 'ی', 'ﻴ': 'ی',
|
||||||
|
'ﻙ': 'ک', 'ﻚ': 'ک', 'ﻛ': 'ک', 'ﻜ': 'ک',
|
||||||
|
# Alef variants -> bare alef (so "ایفون" matches "آیفون")
|
||||||
|
'آ': 'ا', 'أ': 'ا', 'إ': 'ا', 'ٱ': 'ا',
|
||||||
|
# Hamza on waw/ya -> bare letter
|
||||||
|
'ؤ': 'و',
|
||||||
|
'ئ': 'ی',
|
||||||
|
# Ta marbuta / he variants -> he
|
||||||
|
'ة': 'ه', 'ۀ': 'ه',
|
||||||
|
'ﻩ': 'ه', 'ﻪ': 'ه', 'ﻫ': 'ه', 'ﻬ': 'ه',
|
||||||
|
# Tatweel - drop
|
||||||
|
'ـ': '',
|
||||||
|
# Tashkeel (diacritics) - drop
|
||||||
|
'ً': '', 'ٌ': '', 'ٍ': '', 'َ': '', 'ُ': '', 'ِ': '', 'ّ': '', 'ْ': '',
|
||||||
|
# Zero-width / direction marks
|
||||||
|
'': ' ', '': ' ',
|
||||||
|
'': '', '': '',
|
||||||
|
# Arabic-Indic / Persian digits -> ASCII
|
||||||
|
'۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
|
||||||
|
'۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9',
|
||||||
|
'٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
|
||||||
|
'٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9',
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_search_text(text):
|
||||||
|
"""Normalize a search string to handle Persian/Arabic variants, ZWNJ, and case."""
|
||||||
|
if not text:
|
||||||
|
return ''
|
||||||
|
return re.sub(r'\s+', ' ', text.translate(_PERSIAN_CHAR_MAP)).strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
# SQL-side equivalent of _PERSIAN_CHAR_MAP for PostgreSQL translate().
|
||||||
|
# Each char at position i in FROM is replaced by char at position i in TO;
|
||||||
|
# chars past len(TO) are deleted entirely. This must mirror the Python map so
|
||||||
|
# stored values and query strings normalize to the same form.
|
||||||
|
_SQL_NORM_FROM = (
|
||||||
|
'يك' # Arabic ya/kaf -> Persian
|
||||||
|
'ﻱﻲﻳﻴ' # Arabic ya presentation forms
|
||||||
|
'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
|
||||||
|
'آأإٱ' # alef variants
|
||||||
|
'ؤ' # waw with hamza
|
||||||
|
'ئ' # ya with hamza
|
||||||
|
'ةۀ' # ta marbuta / he with hamza
|
||||||
|
'ﻩﻪﻫﻬ' # he presentation forms
|
||||||
|
'' # ZWNJ, ZWJ -> space
|
||||||
|
'۰۱۲۳۴۵۶۷۸۹' # Persian digits
|
||||||
|
'٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
|
||||||
|
# Deletions (no matching char in TO):
|
||||||
|
'ـ' # tatweel
|
||||||
|
'' # LRM, RLM
|
||||||
|
'ًٌٍَُِّْ' # tashkeel
|
||||||
|
)
|
||||||
|
_SQL_NORM_TO = (
|
||||||
|
'یک'
|
||||||
|
'یییی'
|
||||||
|
'کککک'
|
||||||
|
'اااا'
|
||||||
|
'و'
|
||||||
|
'ی'
|
||||||
|
'هه'
|
||||||
|
'هههه'
|
||||||
|
' '
|
||||||
|
'0123456789'
|
||||||
|
'0123456789'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def NormalizePersian(expression):
|
||||||
|
"""SQL expression that calls the ``normalize_persian(text)`` Postgres function.
|
||||||
|
|
||||||
|
The function (defined in migration 0076) computes ``lower(translate(t, FROM, TO))``
|
||||||
|
and is marked IMMUTABLE so GIN trigram indexes on ``normalize_persian(name)``
|
||||||
|
etc. can be matched by the planner. Calling the function (instead of inlining
|
||||||
|
translate/lower) is what lets queries use those indexes — otherwise every
|
||||||
|
search is a full sequential scan.
|
||||||
|
"""
|
||||||
|
return Func(expression, function='normalize_persian', output_field=CharField())
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_product_search(queryset, search_query):
|
||||||
|
"""Filter and rank a Product queryset by a (possibly Persian) search query.
|
||||||
|
|
||||||
|
Returns (queryset, normalized_query). The queryset is annotated with
|
||||||
|
``similarity`` so callers can ``order_by('-similarity', ...)``. When no
|
||||||
|
product strictly matches, falls back to a looser similarity-based filter
|
||||||
|
so the user sees suggestions instead of an empty page.
|
||||||
|
"""
|
||||||
|
normalized_query = _normalize_search_text(search_query) if search_query else ''
|
||||||
|
if not normalized_query:
|
||||||
|
return queryset, ''
|
||||||
|
|
||||||
|
tokens = [t for t in normalized_query.split(' ') if len(t) >= 2]
|
||||||
|
|
||||||
|
annotated = queryset.annotate(
|
||||||
|
norm_name=NormalizePersian('name'),
|
||||||
|
norm_keywords=NormalizePersian(Coalesce('meta_keywords', Value(''))),
|
||||||
|
norm_category=NormalizePersian(Coalesce('category__name', Value(''))),
|
||||||
|
norm_desc=NormalizePersian(Coalesce('description', Value(''))),
|
||||||
|
).annotate(
|
||||||
|
name_sim=TrigramSimilarity(F('norm_name'), normalized_query),
|
||||||
|
keywords_sim=TrigramSimilarity(F('norm_keywords'), normalized_query),
|
||||||
|
category_sim=TrigramSimilarity(F('norm_category'), normalized_query),
|
||||||
|
desc_sim=TrigramSimilarity(F('norm_desc'), normalized_query),
|
||||||
|
).annotate(
|
||||||
|
# Word-boundary aware bonuses. The space-padded variants are what make
|
||||||
|
# "چای" rank above "چایساز" — the former matches "چای " (word boundary)
|
||||||
|
# while the latter only matches the glued prefix.
|
||||||
|
#
|
||||||
|
# Uses case-sensitive lookups (__contains, not __icontains) because both
|
||||||
|
# sides are already lowercased: __icontains would wrap the expression in
|
||||||
|
# UPPER(...) and break the GIN trigram index match.
|
||||||
|
match_bonus=Case(
|
||||||
|
When(norm_name__exact=normalized_query, then=Value(10.0)),
|
||||||
|
When(norm_name__startswith=normalized_query + ' ', then=Value(6.0)),
|
||||||
|
When(norm_name__startswith=normalized_query, then=Value(3.5)),
|
||||||
|
When(norm_name__contains=' ' + normalized_query + ' ', then=Value(3.0)),
|
||||||
|
When(norm_name__contains=' ' + normalized_query, then=Value(2.5)),
|
||||||
|
When(norm_name__contains=normalized_query + ' ', then=Value(2.5)),
|
||||||
|
When(norm_name__contains=normalized_query, then=Value(1.5)),
|
||||||
|
default=Value(0.0),
|
||||||
|
output_field=FloatField(),
|
||||||
|
)
|
||||||
|
).annotate(
|
||||||
|
similarity=(
|
||||||
|
F('match_bonus')
|
||||||
|
+ F('name_sim') * Value(2.0)
|
||||||
|
+ F('keywords_sim') * Value(0.8)
|
||||||
|
+ F('category_sim') * Value(0.4)
|
||||||
|
+ F('desc_sim') * Value(0.15)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if tokens:
|
||||||
|
# Token AND filter. Limited to fields we have GIN trigram indexes for
|
||||||
|
# (name, keywords, category.name in migration 0076) — including
|
||||||
|
# description or slug here would force a sequential scan on the OR
|
||||||
|
# branch and undo the index speedup. Description still contributes via
|
||||||
|
# ``desc_sim`` to ranking on the already-narrowed result set.
|
||||||
|
token_filter = Q()
|
||||||
|
for token in tokens:
|
||||||
|
token_filter &= (
|
||||||
|
Q(norm_name__contains=token)
|
||||||
|
| Q(norm_keywords__contains=token)
|
||||||
|
| Q(norm_category__contains=token)
|
||||||
|
)
|
||||||
|
strict_filter = (
|
||||||
|
token_filter
|
||||||
|
| Q(name_sim__gte=0.45)
|
||||||
|
| Q(keywords_sim__gte=0.5)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
strict_filter = Q(name_sim__gte=0.4) | Q(keywords_sim__gte=0.4)
|
||||||
|
|
||||||
|
strict_products = annotated.filter(strict_filter).distinct()
|
||||||
|
if strict_products.exists():
|
||||||
|
return strict_products, normalized_query
|
||||||
|
|
||||||
|
# No strict matches — relax thresholds so the user gets "similar"
|
||||||
|
# suggestions instead of an empty result page.
|
||||||
|
loose_filter = (
|
||||||
|
Q(name_sim__gte=0.18)
|
||||||
|
| Q(keywords_sim__gte=0.22)
|
||||||
|
| Q(category_sim__gte=0.3)
|
||||||
|
| Q(match_bonus__gt=0)
|
||||||
|
)
|
||||||
|
return annotated.filter(loose_filter).distinct(), normalized_query
|
||||||
# class APIView(APIView):
|
# class APIView(APIView):
|
||||||
# def __init__(self, *args, **kwargs):
|
# def __init__(self, *args, **kwargs):
|
||||||
# super().__init__(*args, **kwargs)
|
# super().__init__(*args, **kwargs)
|
||||||
@@ -324,18 +498,9 @@ class AllProductsView(APIView):
|
|||||||
status=status.HTTP_400_BAD_REQUEST
|
status=status.HTTP_400_BAD_REQUEST
|
||||||
)
|
)
|
||||||
|
|
||||||
# Search
|
# Search (Persian-aware, with typo tolerance + similar-results fallback)
|
||||||
search_query = request.query_params.get('search')
|
search_query = request.query_params.get('search')
|
||||||
if search_query:
|
products, normalized_query = _apply_product_search(products, search_query)
|
||||||
products = products.annotate(
|
|
||||||
similarity=(
|
|
||||||
TrigramSimilarity('name', search_query) +
|
|
||||||
TrigramSimilarity(
|
|
||||||
Coalesce('description', Value('')),
|
|
||||||
search_query
|
|
||||||
)
|
|
||||||
)
|
|
||||||
).filter(similarity__gt=0.1)
|
|
||||||
|
|
||||||
# Price annotation (IMPORTANT for sorting)
|
# Price annotation (IMPORTANT for sorting)
|
||||||
products = products.annotate(
|
products = products.annotate(
|
||||||
@@ -376,8 +541,10 @@ class AllProductsView(APIView):
|
|||||||
|
|
||||||
elif sort_by in ['price', '-price']:
|
elif sort_by in ['price', '-price']:
|
||||||
products = products.order_by('min_price' if sort_by == 'price' else '-min_price')
|
products = products.order_by('min_price' if sort_by == 'price' else '-min_price')
|
||||||
elif search_query:
|
elif normalized_query:
|
||||||
products = products.order_by('-similarity', 'name')
|
# Tie-break on shorter name: ensures "چای" outranks "چای ساز"
|
||||||
|
# when their bonus-adjusted similarities are close.
|
||||||
|
products = products.order_by('-similarity', Length('norm_name'), 'name')
|
||||||
else:
|
else:
|
||||||
products = products.order_by('name')
|
products = products.order_by('name')
|
||||||
|
|
||||||
@@ -522,11 +689,9 @@ class ShowCaseProductsView(APIView):
|
|||||||
if has_discount:
|
if has_discount:
|
||||||
products = products.filter(variants__discount__gt=0).distinct()
|
products = products.filter(variants__discount__gt=0).distinct()
|
||||||
|
|
||||||
# Search filter
|
# Search filter (Persian-aware, with typo tolerance + similar-results fallback)
|
||||||
search_query = request.query_params.get('search', None)
|
search_query = request.query_params.get('search', None)
|
||||||
if search_query:
|
products, normalized_query = _apply_product_search(products, search_query)
|
||||||
products = products.filter(Q(name__icontains=search_query) | Q(
|
|
||||||
description__icontains=search_query))
|
|
||||||
|
|
||||||
# Price filters
|
# Price filters
|
||||||
price_gte = request.query_params.get('price_gte', None)
|
price_gte = request.query_params.get('price_gte', None)
|
||||||
@@ -543,6 +708,8 @@ class ShowCaseProductsView(APIView):
|
|||||||
sort_by = request.query_params.get('sort', None)
|
sort_by = request.query_params.get('sort', None)
|
||||||
if sort_by in ['name', '-name', 'created_at', '-created_at']:
|
if sort_by in ['name', '-name', 'created_at', '-created_at']:
|
||||||
products = products.order_by(sort_by)
|
products = products.order_by(sort_by)
|
||||||
|
elif normalized_query:
|
||||||
|
products = products.order_by('-similarity', Length('norm_name'), 'name')
|
||||||
else:
|
else:
|
||||||
products = products.order_by('name')
|
products = products.order_by('name')
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user