ypdate search
This commit is contained in:
+186
-19
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
from .models import ProductModel
|
||||
from rest_framework import serializers
|
||||
from django.core.paginator import Paginator
|
||||
@@ -6,8 +7,8 @@ from .models import *
|
||||
from .serializers import *
|
||||
from rest_framework import status
|
||||
from rest_framework.response import Response
|
||||
from django.db.models import Q, Value
|
||||
from django.db.models.functions import Coalesce
|
||||
from django.db.models import Q, Value, Case, When, FloatField, F, CharField, Func
|
||||
from django.db.models.functions import Coalesce, Length
|
||||
from django.contrib.postgres.search import TrigramSimilarity
|
||||
from django.shortcuts import get_object_or_404
|
||||
from rest_framework.permissions import IsAuthenticatedOrReadOnly
|
||||
@@ -21,6 +22,179 @@ from home.models import ShowCaseSlider
|
||||
from home.serializers import ShowCaseSliderSerialzier
|
||||
from order.models import Cart, CartItem
|
||||
from django.db.models import Min, Max, Value
|
||||
|
||||
|
||||
_PERSIAN_CHAR_MAP = str.maketrans({
|
||||
# Arabic letters -> Persian equivalents
|
||||
'ي': 'ی', 'ك': 'ک',
|
||||
# Arabic ya/kaf presentation forms -> Persian
|
||||
'ﻱ': 'ی', 'ﻲ': 'ی', 'ﻳ': 'ی', 'ﻴ': 'ی',
|
||||
'ﻙ': 'ک', 'ﻚ': 'ک', 'ﻛ': 'ک', 'ﻜ': 'ک',
|
||||
# Alef variants -> bare alef (so "ایفون" matches "آیفون")
|
||||
'آ': 'ا', 'أ': 'ا', 'إ': 'ا', 'ٱ': 'ا',
|
||||
# Hamza on waw/ya -> bare letter
|
||||
'ؤ': 'و',
|
||||
'ئ': 'ی',
|
||||
# Ta marbuta / he variants -> he
|
||||
'ة': 'ه', 'ۀ': 'ه',
|
||||
'ﻩ': 'ه', 'ﻪ': 'ه', 'ﻫ': 'ه', 'ﻬ': 'ه',
|
||||
# Tatweel - drop
|
||||
'ـ': '',
|
||||
# Tashkeel (diacritics) - drop
|
||||
'ً': '', 'ٌ': '', 'ٍ': '', 'َ': '', 'ُ': '', 'ِ': '', 'ّ': '', 'ْ': '',
|
||||
# Zero-width / direction marks
|
||||
'': ' ', '': ' ',
|
||||
'': '', '': '',
|
||||
# Arabic-Indic / Persian digits -> ASCII
|
||||
'۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
|
||||
'۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9',
|
||||
'٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
|
||||
'٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9',
|
||||
})
|
||||
|
||||
|
||||
def _normalize_search_text(text):
|
||||
"""Normalize a search string to handle Persian/Arabic variants, ZWNJ, and case."""
|
||||
if not text:
|
||||
return ''
|
||||
return re.sub(r'\s+', ' ', text.translate(_PERSIAN_CHAR_MAP)).strip().lower()
|
||||
|
||||
|
||||
# SQL-side equivalent of _PERSIAN_CHAR_MAP for PostgreSQL translate().
|
||||
# Each char at position i in FROM is replaced by char at position i in TO;
|
||||
# chars past len(TO) are deleted entirely. This must mirror the Python map so
|
||||
# stored values and query strings normalize to the same form.
|
||||
_SQL_NORM_FROM = (
|
||||
'يك' # Arabic ya/kaf -> Persian
|
||||
'ﻱﻲﻳﻴ' # Arabic ya presentation forms
|
||||
'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
|
||||
'آأإٱ' # alef variants
|
||||
'ؤ' # waw with hamza
|
||||
'ئ' # ya with hamza
|
||||
'ةۀ' # ta marbuta / he with hamza
|
||||
'ﻩﻪﻫﻬ' # he presentation forms
|
||||
'' # ZWNJ, ZWJ -> space
|
||||
'۰۱۲۳۴۵۶۷۸۹' # Persian digits
|
||||
'٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
|
||||
# Deletions (no matching char in TO):
|
||||
'ـ' # tatweel
|
||||
'' # LRM, RLM
|
||||
'ًٌٍَُِّْ' # tashkeel
|
||||
)
|
||||
_SQL_NORM_TO = (
|
||||
'یک'
|
||||
'یییی'
|
||||
'کککک'
|
||||
'اااا'
|
||||
'و'
|
||||
'ی'
|
||||
'هه'
|
||||
'هههه'
|
||||
' '
|
||||
'0123456789'
|
||||
'0123456789'
|
||||
)
|
||||
|
||||
|
||||
def NormalizePersian(expression):
|
||||
"""SQL expression that calls the ``normalize_persian(text)`` Postgres function.
|
||||
|
||||
The function (defined in migration 0076) computes ``lower(translate(t, FROM, TO))``
|
||||
and is marked IMMUTABLE so GIN trigram indexes on ``normalize_persian(name)``
|
||||
etc. can be matched by the planner. Calling the function (instead of inlining
|
||||
translate/lower) is what lets queries use those indexes — otherwise every
|
||||
search is a full sequential scan.
|
||||
"""
|
||||
return Func(expression, function='normalize_persian', output_field=CharField())
|
||||
|
||||
|
||||
def _apply_product_search(queryset, search_query):
|
||||
"""Filter and rank a Product queryset by a (possibly Persian) search query.
|
||||
|
||||
Returns (queryset, normalized_query). The queryset is annotated with
|
||||
``similarity`` so callers can ``order_by('-similarity', ...)``. When no
|
||||
product strictly matches, falls back to a looser similarity-based filter
|
||||
so the user sees suggestions instead of an empty page.
|
||||
"""
|
||||
normalized_query = _normalize_search_text(search_query) if search_query else ''
|
||||
if not normalized_query:
|
||||
return queryset, ''
|
||||
|
||||
tokens = [t for t in normalized_query.split(' ') if len(t) >= 2]
|
||||
|
||||
annotated = queryset.annotate(
|
||||
norm_name=NormalizePersian('name'),
|
||||
norm_keywords=NormalizePersian(Coalesce('meta_keywords', Value(''))),
|
||||
norm_category=NormalizePersian(Coalesce('category__name', Value(''))),
|
||||
norm_desc=NormalizePersian(Coalesce('description', Value(''))),
|
||||
).annotate(
|
||||
name_sim=TrigramSimilarity(F('norm_name'), normalized_query),
|
||||
keywords_sim=TrigramSimilarity(F('norm_keywords'), normalized_query),
|
||||
category_sim=TrigramSimilarity(F('norm_category'), normalized_query),
|
||||
desc_sim=TrigramSimilarity(F('norm_desc'), normalized_query),
|
||||
).annotate(
|
||||
# Word-boundary aware bonuses. The space-padded variants are what make
|
||||
# "چای" rank above "چایساز" — the former matches "چای " (word boundary)
|
||||
# while the latter only matches the glued prefix.
|
||||
#
|
||||
# Uses case-sensitive lookups (__contains, not __icontains) because both
|
||||
# sides are already lowercased: __icontains would wrap the expression in
|
||||
# UPPER(...) and break the GIN trigram index match.
|
||||
match_bonus=Case(
|
||||
When(norm_name__exact=normalized_query, then=Value(10.0)),
|
||||
When(norm_name__startswith=normalized_query + ' ', then=Value(6.0)),
|
||||
When(norm_name__startswith=normalized_query, then=Value(3.5)),
|
||||
When(norm_name__contains=' ' + normalized_query + ' ', then=Value(3.0)),
|
||||
When(norm_name__contains=' ' + normalized_query, then=Value(2.5)),
|
||||
When(norm_name__contains=normalized_query + ' ', then=Value(2.5)),
|
||||
When(norm_name__contains=normalized_query, then=Value(1.5)),
|
||||
default=Value(0.0),
|
||||
output_field=FloatField(),
|
||||
)
|
||||
).annotate(
|
||||
similarity=(
|
||||
F('match_bonus')
|
||||
+ F('name_sim') * Value(2.0)
|
||||
+ F('keywords_sim') * Value(0.8)
|
||||
+ F('category_sim') * Value(0.4)
|
||||
+ F('desc_sim') * Value(0.15)
|
||||
)
|
||||
)
|
||||
|
||||
if tokens:
|
||||
# Token AND filter. Limited to fields we have GIN trigram indexes for
|
||||
# (name, keywords, category.name in migration 0076) — including
|
||||
# description or slug here would force a sequential scan on the OR
|
||||
# branch and undo the index speedup. Description still contributes via
|
||||
# ``desc_sim`` to ranking on the already-narrowed result set.
|
||||
token_filter = Q()
|
||||
for token in tokens:
|
||||
token_filter &= (
|
||||
Q(norm_name__contains=token)
|
||||
| Q(norm_keywords__contains=token)
|
||||
| Q(norm_category__contains=token)
|
||||
)
|
||||
strict_filter = (
|
||||
token_filter
|
||||
| Q(name_sim__gte=0.45)
|
||||
| Q(keywords_sim__gte=0.5)
|
||||
)
|
||||
else:
|
||||
strict_filter = Q(name_sim__gte=0.4) | Q(keywords_sim__gte=0.4)
|
||||
|
||||
strict_products = annotated.filter(strict_filter).distinct()
|
||||
if strict_products.exists():
|
||||
return strict_products, normalized_query
|
||||
|
||||
# No strict matches — relax thresholds so the user gets "similar"
|
||||
# suggestions instead of an empty result page.
|
||||
loose_filter = (
|
||||
Q(name_sim__gte=0.18)
|
||||
| Q(keywords_sim__gte=0.22)
|
||||
| Q(category_sim__gte=0.3)
|
||||
| Q(match_bonus__gt=0)
|
||||
)
|
||||
return annotated.filter(loose_filter).distinct(), normalized_query
|
||||
# class APIView(APIView):
|
||||
# def __init__(self, *args, **kwargs):
|
||||
# super().__init__(*args, **kwargs)
|
||||
@@ -324,18 +498,9 @@ class AllProductsView(APIView):
|
||||
status=status.HTTP_400_BAD_REQUEST
|
||||
)
|
||||
|
||||
# Search
|
||||
# Search (Persian-aware, with typo tolerance + similar-results fallback)
|
||||
search_query = request.query_params.get('search')
|
||||
if search_query:
|
||||
products = products.annotate(
|
||||
similarity=(
|
||||
TrigramSimilarity('name', search_query) +
|
||||
TrigramSimilarity(
|
||||
Coalesce('description', Value('')),
|
||||
search_query
|
||||
)
|
||||
)
|
||||
).filter(similarity__gt=0.1)
|
||||
products, normalized_query = _apply_product_search(products, search_query)
|
||||
|
||||
# Price annotation (IMPORTANT for sorting)
|
||||
products = products.annotate(
|
||||
@@ -376,8 +541,10 @@ class AllProductsView(APIView):
|
||||
|
||||
elif sort_by in ['price', '-price']:
|
||||
products = products.order_by('min_price' if sort_by == 'price' else '-min_price')
|
||||
elif search_query:
|
||||
products = products.order_by('-similarity', 'name')
|
||||
elif normalized_query:
|
||||
# Tie-break on shorter name: ensures "چای" outranks "چای ساز"
|
||||
# when their bonus-adjusted similarities are close.
|
||||
products = products.order_by('-similarity', Length('norm_name'), 'name')
|
||||
else:
|
||||
products = products.order_by('name')
|
||||
|
||||
@@ -522,11 +689,9 @@ class ShowCaseProductsView(APIView):
|
||||
if has_discount:
|
||||
products = products.filter(variants__discount__gt=0).distinct()
|
||||
|
||||
# Search filter
|
||||
# Search filter (Persian-aware, with typo tolerance + similar-results fallback)
|
||||
search_query = request.query_params.get('search', None)
|
||||
if search_query:
|
||||
products = products.filter(Q(name__icontains=search_query) | Q(
|
||||
description__icontains=search_query))
|
||||
products, normalized_query = _apply_product_search(products, search_query)
|
||||
|
||||
# Price filters
|
||||
price_gte = request.query_params.get('price_gte', None)
|
||||
@@ -543,6 +708,8 @@ class ShowCaseProductsView(APIView):
|
||||
sort_by = request.query_params.get('sort', None)
|
||||
if sort_by in ['name', '-name', 'created_at', '-created_at']:
|
||||
products = products.order_by(sort_by)
|
||||
elif normalized_query:
|
||||
products = products.order_by('-similarity', Length('norm_name'), 'name')
|
||||
else:
|
||||
products = products.order_by('name')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user