ypdate search

This commit is contained in:
Parsa Nazer
2026-05-30 09:06:25 +03:30
parent e6965fe3b8
commit 6f2037309c
2 changed files with 288 additions and 19 deletions
+186 -19
View File
@@ -1,3 +1,4 @@
import re
from .models import ProductModel
from rest_framework import serializers
from django.core.paginator import Paginator
@@ -6,8 +7,8 @@ from .models import *
from .serializers import *
from rest_framework import status
from rest_framework.response import Response
from django.db.models import Q, Value
from django.db.models.functions import Coalesce
from django.db.models import Q, Value, Case, When, FloatField, F, CharField, Func
from django.db.models.functions import Coalesce, Length
from django.contrib.postgres.search import TrigramSimilarity
from django.shortcuts import get_object_or_404
from rest_framework.permissions import IsAuthenticatedOrReadOnly
@@ -21,6 +22,179 @@ from home.models import ShowCaseSlider
from home.serializers import ShowCaseSliderSerialzier
from order.models import Cart, CartItem
from django.db.models import Min, Max, Value
_PERSIAN_CHAR_MAP = str.maketrans({
# Arabic letters -> Persian equivalents
'ي': 'ی', 'ك': 'ک',
# Arabic ya/kaf presentation forms -> Persian
'': 'ی', '': 'ی', '': 'ی', '': 'ی',
'': 'ک', '': 'ک', '': 'ک', '': 'ک',
# Alef variants -> bare alef (so "ایفون" matches "آیفون")
'آ': 'ا', 'أ': 'ا', 'إ': 'ا', 'ٱ': 'ا',
# Hamza on waw/ya -> bare letter
'ؤ': 'و',
'ئ': 'ی',
# Ta marbuta / he variants -> he
'ة': 'ه', 'ۀ': 'ه',
'': 'ه', '': 'ه', '': 'ه', '': 'ه',
# Tatweel - drop
'ـ': '',
# Tashkeel (diacritics) - drop
'ً': '', 'ٌ': '', 'ٍ': '', 'َ': '', 'ُ': '', 'ِ': '', 'ّ': '', 'ْ': '',
# Zero-width / direction marks
'': ' ', '': ' ',
'': '', '': '',
# Arabic-Indic / Persian digits -> ASCII
'۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
'۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9',
'٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
'٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9',
})
def _normalize_search_text(text):
"""Normalize a search string to handle Persian/Arabic variants, ZWNJ, and case."""
if not text:
return ''
return re.sub(r'\s+', ' ', text.translate(_PERSIAN_CHAR_MAP)).strip().lower()
# SQL-side equivalent of _PERSIAN_CHAR_MAP for PostgreSQL translate().
# Each char at position i in FROM is replaced by char at position i in TO;
# chars past len(TO) are deleted entirely. This must mirror the Python map so
# stored values and query strings normalize to the same form.
_SQL_NORM_FROM = (
'يك' # Arabic ya/kaf -> Persian
'ﻱﻲﻳﻴ' # Arabic ya presentation forms
'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
'آأإٱ' # alef variants
'ؤ' # waw with hamza
'ئ' # ya with hamza
'ةۀ' # ta marbuta / he with hamza
'ﻩﻪﻫﻬ' # he presentation forms
'' # ZWNJ, ZWJ -> space
'۰۱۲۳۴۵۶۷۸۹' # Persian digits
'٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
# Deletions (no matching char in TO):
'ـ' # tatweel
'' # LRM, RLM
'ًٌٍَُِّْ' # tashkeel
)
_SQL_NORM_TO = (
'یک'
'یییی'
'کککک'
'اااا'
'و'
'ی'
'هه'
'هههه'
' '
'0123456789'
'0123456789'
)
def NormalizePersian(expression):
"""SQL expression that calls the ``normalize_persian(text)`` Postgres function.
The function (defined in migration 0076) computes ``lower(translate(t, FROM, TO))``
and is marked IMMUTABLE so GIN trigram indexes on ``normalize_persian(name)``
etc. can be matched by the planner. Calling the function (instead of inlining
translate/lower) is what lets queries use those indexes — otherwise every
search is a full sequential scan.
"""
return Func(expression, function='normalize_persian', output_field=CharField())
def _apply_product_search(queryset, search_query):
"""Filter and rank a Product queryset by a (possibly Persian) search query.
Returns (queryset, normalized_query). The queryset is annotated with
``similarity`` so callers can ``order_by('-similarity', ...)``. When no
product strictly matches, falls back to a looser similarity-based filter
so the user sees suggestions instead of an empty page.
"""
normalized_query = _normalize_search_text(search_query) if search_query else ''
if not normalized_query:
return queryset, ''
tokens = [t for t in normalized_query.split(' ') if len(t) >= 2]
annotated = queryset.annotate(
norm_name=NormalizePersian('name'),
norm_keywords=NormalizePersian(Coalesce('meta_keywords', Value(''))),
norm_category=NormalizePersian(Coalesce('category__name', Value(''))),
norm_desc=NormalizePersian(Coalesce('description', Value(''))),
).annotate(
name_sim=TrigramSimilarity(F('norm_name'), normalized_query),
keywords_sim=TrigramSimilarity(F('norm_keywords'), normalized_query),
category_sim=TrigramSimilarity(F('norm_category'), normalized_query),
desc_sim=TrigramSimilarity(F('norm_desc'), normalized_query),
).annotate(
# Word-boundary aware bonuses. The space-padded variants are what make
# "چای" rank above "چایساز" — the former matches "چای " (word boundary)
# while the latter only matches the glued prefix.
#
# Uses case-sensitive lookups (__contains, not __icontains) because both
# sides are already lowercased: __icontains would wrap the expression in
# UPPER(...) and break the GIN trigram index match.
match_bonus=Case(
When(norm_name__exact=normalized_query, then=Value(10.0)),
When(norm_name__startswith=normalized_query + ' ', then=Value(6.0)),
When(norm_name__startswith=normalized_query, then=Value(3.5)),
When(norm_name__contains=' ' + normalized_query + ' ', then=Value(3.0)),
When(norm_name__contains=' ' + normalized_query, then=Value(2.5)),
When(norm_name__contains=normalized_query + ' ', then=Value(2.5)),
When(norm_name__contains=normalized_query, then=Value(1.5)),
default=Value(0.0),
output_field=FloatField(),
)
).annotate(
similarity=(
F('match_bonus')
+ F('name_sim') * Value(2.0)
+ F('keywords_sim') * Value(0.8)
+ F('category_sim') * Value(0.4)
+ F('desc_sim') * Value(0.15)
)
)
if tokens:
# Token AND filter. Limited to fields we have GIN trigram indexes for
# (name, keywords, category.name in migration 0076) — including
# description or slug here would force a sequential scan on the OR
# branch and undo the index speedup. Description still contributes via
# ``desc_sim`` to ranking on the already-narrowed result set.
token_filter = Q()
for token in tokens:
token_filter &= (
Q(norm_name__contains=token)
| Q(norm_keywords__contains=token)
| Q(norm_category__contains=token)
)
strict_filter = (
token_filter
| Q(name_sim__gte=0.45)
| Q(keywords_sim__gte=0.5)
)
else:
strict_filter = Q(name_sim__gte=0.4) | Q(keywords_sim__gte=0.4)
strict_products = annotated.filter(strict_filter).distinct()
if strict_products.exists():
return strict_products, normalized_query
# No strict matches — relax thresholds so the user gets "similar"
# suggestions instead of an empty result page.
loose_filter = (
Q(name_sim__gte=0.18)
| Q(keywords_sim__gte=0.22)
| Q(category_sim__gte=0.3)
| Q(match_bonus__gt=0)
)
return annotated.filter(loose_filter).distinct(), normalized_query
# class APIView(APIView):
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
@@ -324,18 +498,9 @@ class AllProductsView(APIView):
status=status.HTTP_400_BAD_REQUEST
)
# Search
# Search (Persian-aware, with typo tolerance + similar-results fallback)
search_query = request.query_params.get('search')
if search_query:
products = products.annotate(
similarity=(
TrigramSimilarity('name', search_query) +
TrigramSimilarity(
Coalesce('description', Value('')),
search_query
)
)
).filter(similarity__gt=0.1)
products, normalized_query = _apply_product_search(products, search_query)
# Price annotation (IMPORTANT for sorting)
products = products.annotate(
@@ -376,8 +541,10 @@ class AllProductsView(APIView):
elif sort_by in ['price', '-price']:
products = products.order_by('min_price' if sort_by == 'price' else '-min_price')
elif search_query:
products = products.order_by('-similarity', 'name')
elif normalized_query:
# Tie-break on shorter name: ensures "چای" outranks "چای ساز"
# when their bonus-adjusted similarities are close.
products = products.order_by('-similarity', Length('norm_name'), 'name')
else:
products = products.order_by('name')
@@ -522,11 +689,9 @@ class ShowCaseProductsView(APIView):
if has_discount:
products = products.filter(variants__discount__gt=0).distinct()
# Search filter
# Search filter (Persian-aware, with typo tolerance + similar-results fallback)
search_query = request.query_params.get('search', None)
if search_query:
products = products.filter(Q(name__icontains=search_query) | Q(
description__icontains=search_query))
products, normalized_query = _apply_product_search(products, search_query)
# Price filters
price_gte = request.query_params.get('price_gte', None)
@@ -543,6 +708,8 @@ class ShowCaseProductsView(APIView):
sort_by = request.query_params.get('sort', None)
if sort_by in ['name', '-name', 'created_at', '-created_at']:
products = products.order_by(sort_by)
elif normalized_query:
products = products.order_by('-similarity', Length('norm_name'), 'name')
else:
products = products.order_by('name')