diff --git a/backend/product/migrations/0076_normalize_persian_search.py b/backend/product/migrations/0076_normalize_persian_search.py
new file mode 100644
index 0000000..248164b
--- /dev/null
+++ b/backend/product/migrations/0076_normalize_persian_search.py
@@ -0,0 +1,102 @@
+"""
+Add an IMMUTABLE ``normalize_persian(text)`` SQL function and GIN trigram
+indexes that match it. Lets the search view filter and rank without doing a
+per-row ``translate()`` on a sequential scan — drops query time from seconds
+to tens of milliseconds.
+
+The FROM/TO strings here MUST stay aligned with ``_SQL_NORM_FROM`` /
+``_SQL_NORM_TO`` in ``product/views.py``. If you change one, change the other
+and add a follow-up migration that recreates the function + indexes (Postgres
+matches expression indexes by exact SQL form, so a stale function would
+silently bypass the indexes).
+"""
+from django.db import migrations
+
+
+# Mirror of product.views._SQL_NORM_FROM / _SQL_NORM_TO.
+_SQL_NORM_FROM = (
+ 'يك' # Arabic ya/kaf -> Persian
+ 'ﻱﻲﻳﻴ' # Arabic ya presentation forms
+ 'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
+ 'آأإٱ' # alef variants
+ 'ؤ' # waw with hamza
+ 'ئ' # ya with hamza
+ 'ةۀ' # ta marbuta / he with hamza
+ 'ﻩﻪﻫﻬ' # he presentation forms
+ '' # ZWNJ, ZWJ -> space
+ '۰۱۲۳۴۵۶۷۸۹' # Persian digits
+ '٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
+ # Deletions (no matching char in TO):
+ 'ـ' # tatweel
+ '' # LRM, RLM
+ 'ًٌٍَُِّْ' # tashkeel
+)
+_SQL_NORM_TO = (
+ 'یک'
+ 'یییی'
+ 'کککک'
+ 'اااا'
+ 'و'
+ 'ی'
+ 'هه'
+ 'هههه'
+ ' '
+ '0123456789'
+ '0123456789'
+)
+
+
+def _pg_str(s):
+ """Quote a Python string as a PostgreSQL string literal."""
+ return "'" + s.replace("'", "''") + "'"
+
+
+CREATE_FUNCTION_SQL = f"""
+CREATE OR REPLACE FUNCTION normalize_persian(t text) RETURNS text AS $$
+ SELECT lower(translate(t, {_pg_str(_SQL_NORM_FROM)}, {_pg_str(_SQL_NORM_TO)}));
+$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
+"""
+
+DROP_FUNCTION_SQL = "DROP FUNCTION IF EXISTS normalize_persian(text);"
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("product", "0075_productvariant_guarantee"),
+ ]
+
+ operations = [
+ migrations.RunSQL(
+ sql=CREATE_FUNCTION_SQL,
+ reverse_sql=DROP_FUNCTION_SQL,
+ ),
+ # GIN trigram indexes on the normalized expression. PostgreSQL matches
+ # queries that use exactly ``normalize_persian(
)`` against these
+ # indexes, so the views.py wrapper must call the SQL function (not
+ # inline translate/lower) for the index to be used.
+ migrations.RunSQL(
+ sql=(
+ "CREATE INDEX IF NOT EXISTS product_norm_name_trgm_idx "
+ "ON product_productmodel "
+ "USING gin (normalize_persian(name) gin_trgm_ops);"
+ ),
+ reverse_sql="DROP INDEX IF EXISTS product_norm_name_trgm_idx;",
+ ),
+ migrations.RunSQL(
+ sql=(
+ "CREATE INDEX IF NOT EXISTS product_norm_keywords_trgm_idx "
+ "ON product_productmodel "
+ "USING gin (normalize_persian(meta_keywords) gin_trgm_ops);"
+ ),
+ reverse_sql="DROP INDEX IF EXISTS product_norm_keywords_trgm_idx;",
+ ),
+ migrations.RunSQL(
+ sql=(
+ "CREATE INDEX IF NOT EXISTS subcategory_norm_name_trgm_idx "
+ "ON product_subcategorymodel "
+ "USING gin (normalize_persian(name) gin_trgm_ops);"
+ ),
+ reverse_sql="DROP INDEX IF EXISTS subcategory_norm_name_trgm_idx;",
+ ),
+ ]
diff --git a/backend/product/views.py b/backend/product/views.py
index 3bfe627..3802c72 100644
--- a/backend/product/views.py
+++ b/backend/product/views.py
@@ -1,3 +1,4 @@
+import re
from .models import ProductModel
from rest_framework import serializers
from django.core.paginator import Paginator
@@ -6,8 +7,8 @@ from .models import *
from .serializers import *
from rest_framework import status
from rest_framework.response import Response
-from django.db.models import Q, Value
-from django.db.models.functions import Coalesce
+from django.db.models import Q, Value, Case, When, FloatField, F, CharField, Func
+from django.db.models.functions import Coalesce, Length
from django.contrib.postgres.search import TrigramSimilarity
from django.shortcuts import get_object_or_404
from rest_framework.permissions import IsAuthenticatedOrReadOnly
@@ -21,6 +22,179 @@ from home.models import ShowCaseSlider
from home.serializers import ShowCaseSliderSerialzier
from order.models import Cart, CartItem
from django.db.models import Min, Max, Value
+
+
+_PERSIAN_CHAR_MAP = str.maketrans({
+ # Arabic letters -> Persian equivalents
+ 'ي': 'ی', 'ك': 'ک',
+ # Arabic ya/kaf presentation forms -> Persian
+ 'ﻱ': 'ی', 'ﻲ': 'ی', 'ﻳ': 'ی', 'ﻴ': 'ی',
+ 'ﻙ': 'ک', 'ﻚ': 'ک', 'ﻛ': 'ک', 'ﻜ': 'ک',
+ # Alef variants -> bare alef (so "ایفون" matches "آیفون")
+ 'آ': 'ا', 'أ': 'ا', 'إ': 'ا', 'ٱ': 'ا',
+ # Hamza on waw/ya -> bare letter
+ 'ؤ': 'و',
+ 'ئ': 'ی',
+ # Ta marbuta / he variants -> he
+ 'ة': 'ه', 'ۀ': 'ه',
+ 'ﻩ': 'ه', 'ﻪ': 'ه', 'ﻫ': 'ه', 'ﻬ': 'ه',
+ # Tatweel - drop
+ 'ـ': '',
+ # Tashkeel (diacritics) - drop
+ 'ً': '', 'ٌ': '', 'ٍ': '', 'َ': '', 'ُ': '', 'ِ': '', 'ّ': '', 'ْ': '',
+ # Zero-width / direction marks
+ '': ' ', '': ' ',
+ '': '', '': '',
+ # Arabic-Indic / Persian digits -> ASCII
+ '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
+ '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9',
+ '٠': '0', '١': '1', '٢': '2', '٣': '3', '٤': '4',
+ '٥': '5', '٦': '6', '٧': '7', '٨': '8', '٩': '9',
+})
+
+
+def _normalize_search_text(text):
+ """Normalize a search string to handle Persian/Arabic variants, ZWNJ, and case."""
+ if not text:
+ return ''
+ return re.sub(r'\s+', ' ', text.translate(_PERSIAN_CHAR_MAP)).strip().lower()
+
+
+# SQL-side equivalent of _PERSIAN_CHAR_MAP for PostgreSQL translate().
+# Each char at position i in FROM is replaced by char at position i in TO;
+# chars past len(TO) are deleted entirely. This must mirror the Python map so
+# stored values and query strings normalize to the same form.
+_SQL_NORM_FROM = (
+ 'يك' # Arabic ya/kaf -> Persian
+ 'ﻱﻲﻳﻴ' # Arabic ya presentation forms
+ 'ﻙﻚﻛﻜ' # Arabic kaf presentation forms
+ 'آأإٱ' # alef variants
+ 'ؤ' # waw with hamza
+ 'ئ' # ya with hamza
+ 'ةۀ' # ta marbuta / he with hamza
+ 'ﻩﻪﻫﻬ' # he presentation forms
+ '' # ZWNJ, ZWJ -> space
+ '۰۱۲۳۴۵۶۷۸۹' # Persian digits
+ '٠١٢٣٤٥٦٧٨٩' # Arabic-Indic digits
+ # Deletions (no matching char in TO):
+ 'ـ' # tatweel
+ '' # LRM, RLM
+ 'ًٌٍَُِّْ' # tashkeel
+)
+_SQL_NORM_TO = (
+ 'یک'
+ 'یییی'
+ 'کککک'
+ 'اااا'
+ 'و'
+ 'ی'
+ 'هه'
+ 'هههه'
+ ' '
+ '0123456789'
+ '0123456789'
+)
+
+
+def NormalizePersian(expression):
+ """SQL expression that calls the ``normalize_persian(text)`` Postgres function.
+
+ The function (defined in migration 0076) computes ``lower(translate(t, FROM, TO))``
+ and is marked IMMUTABLE so GIN trigram indexes on ``normalize_persian(name)``
+ etc. can be matched by the planner. Calling the function (instead of inlining
+ translate/lower) is what lets queries use those indexes — otherwise every
+ search is a full sequential scan.
+ """
+ return Func(expression, function='normalize_persian', output_field=CharField())
+
+
+def _apply_product_search(queryset, search_query):
+ """Filter and rank a Product queryset by a (possibly Persian) search query.
+
+ Returns (queryset, normalized_query). The queryset is annotated with
+ ``similarity`` so callers can ``order_by('-similarity', ...)``. When no
+ product strictly matches, falls back to a looser similarity-based filter
+ so the user sees suggestions instead of an empty page.
+ """
+ normalized_query = _normalize_search_text(search_query) if search_query else ''
+ if not normalized_query:
+ return queryset, ''
+
+ tokens = [t for t in normalized_query.split(' ') if len(t) >= 2]
+
+ annotated = queryset.annotate(
+ norm_name=NormalizePersian('name'),
+ norm_keywords=NormalizePersian(Coalesce('meta_keywords', Value(''))),
+ norm_category=NormalizePersian(Coalesce('category__name', Value(''))),
+ norm_desc=NormalizePersian(Coalesce('description', Value(''))),
+ ).annotate(
+ name_sim=TrigramSimilarity(F('norm_name'), normalized_query),
+ keywords_sim=TrigramSimilarity(F('norm_keywords'), normalized_query),
+ category_sim=TrigramSimilarity(F('norm_category'), normalized_query),
+ desc_sim=TrigramSimilarity(F('norm_desc'), normalized_query),
+ ).annotate(
+ # Word-boundary aware bonuses. The space-padded variants are what make
+ # "چای" rank above "چایساز" — the former matches "چای " (word boundary)
+ # while the latter only matches the glued prefix.
+ #
+ # Uses case-sensitive lookups (__contains, not __icontains) because both
+ # sides are already lowercased: __icontains would wrap the expression in
+ # UPPER(...) and break the GIN trigram index match.
+ match_bonus=Case(
+ When(norm_name__exact=normalized_query, then=Value(10.0)),
+ When(norm_name__startswith=normalized_query + ' ', then=Value(6.0)),
+ When(norm_name__startswith=normalized_query, then=Value(3.5)),
+ When(norm_name__contains=' ' + normalized_query + ' ', then=Value(3.0)),
+ When(norm_name__contains=' ' + normalized_query, then=Value(2.5)),
+ When(norm_name__contains=normalized_query + ' ', then=Value(2.5)),
+ When(norm_name__contains=normalized_query, then=Value(1.5)),
+ default=Value(0.0),
+ output_field=FloatField(),
+ )
+ ).annotate(
+ similarity=(
+ F('match_bonus')
+ + F('name_sim') * Value(2.0)
+ + F('keywords_sim') * Value(0.8)
+ + F('category_sim') * Value(0.4)
+ + F('desc_sim') * Value(0.15)
+ )
+ )
+
+ if tokens:
+ # Token AND filter. Limited to fields we have GIN trigram indexes for
+ # (name, keywords, category.name in migration 0076) — including
+ # description or slug here would force a sequential scan on the OR
+ # branch and undo the index speedup. Description still contributes via
+ # ``desc_sim`` to ranking on the already-narrowed result set.
+ token_filter = Q()
+ for token in tokens:
+ token_filter &= (
+ Q(norm_name__contains=token)
+ | Q(norm_keywords__contains=token)
+ | Q(norm_category__contains=token)
+ )
+ strict_filter = (
+ token_filter
+ | Q(name_sim__gte=0.45)
+ | Q(keywords_sim__gte=0.5)
+ )
+ else:
+ strict_filter = Q(name_sim__gte=0.4) | Q(keywords_sim__gte=0.4)
+
+ strict_products = annotated.filter(strict_filter).distinct()
+ if strict_products.exists():
+ return strict_products, normalized_query
+
+ # No strict matches — relax thresholds so the user gets "similar"
+ # suggestions instead of an empty result page.
+ loose_filter = (
+ Q(name_sim__gte=0.18)
+ | Q(keywords_sim__gte=0.22)
+ | Q(category_sim__gte=0.3)
+ | Q(match_bonus__gt=0)
+ )
+ return annotated.filter(loose_filter).distinct(), normalized_query
# class APIView(APIView):
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
@@ -324,18 +498,9 @@ class AllProductsView(APIView):
status=status.HTTP_400_BAD_REQUEST
)
- # Search
+ # Search (Persian-aware, with typo tolerance + similar-results fallback)
search_query = request.query_params.get('search')
- if search_query:
- products = products.annotate(
- similarity=(
- TrigramSimilarity('name', search_query) +
- TrigramSimilarity(
- Coalesce('description', Value('')),
- search_query
- )
- )
- ).filter(similarity__gt=0.1)
+ products, normalized_query = _apply_product_search(products, search_query)
# Price annotation (IMPORTANT for sorting)
products = products.annotate(
@@ -376,8 +541,10 @@ class AllProductsView(APIView):
elif sort_by in ['price', '-price']:
products = products.order_by('min_price' if sort_by == 'price' else '-min_price')
- elif search_query:
- products = products.order_by('-similarity', 'name')
+ elif normalized_query:
+ # Tie-break on shorter name: ensures "چای" outranks "چای ساز"
+ # when their bonus-adjusted similarities are close.
+ products = products.order_by('-similarity', Length('norm_name'), 'name')
else:
products = products.order_by('name')
@@ -522,11 +689,9 @@ class ShowCaseProductsView(APIView):
if has_discount:
products = products.filter(variants__discount__gt=0).distinct()
- # Search filter
+ # Search filter (Persian-aware, with typo tolerance + similar-results fallback)
search_query = request.query_params.get('search', None)
- if search_query:
- products = products.filter(Q(name__icontains=search_query) | Q(
- description__icontains=search_query))
+ products, normalized_query = _apply_product_search(products, search_query)
# Price filters
price_gte = request.query_params.get('price_gte', None)
@@ -543,6 +708,8 @@ class ShowCaseProductsView(APIView):
sort_by = request.query_params.get('sort', None)
if sort_by in ['name', '-name', 'created_at', '-created_at']:
products = products.order_by(sort_by)
+ elif normalized_query:
+ products = products.order_by('-similarity', Length('norm_name'), 'name')
else:
products = products.order_by('name')