Compare commits

...

2 Commits

Author SHA1 Message Date
shamoon
0b265aadc6 Add error handling w retty when opening index 2025-12-17 09:14:26 -08:00
shamoon
84511d07cd Add test to mock issue 2025-12-17 09:07:22 -08:00
2 changed files with 58 additions and 5 deletions

View File

@@ -10,6 +10,7 @@ from datetime import time
from datetime import timedelta
from datetime import timezone
from shutil import rmtree
from time import sleep
from typing import TYPE_CHECKING
from typing import Literal
@@ -32,6 +33,7 @@ from whoosh.highlight import HtmlFormatter
from whoosh.idsets import BitSet
from whoosh.idsets import DocIdSet
from whoosh.index import FileIndex
from whoosh.index import LockError
from whoosh.index import create_in
from whoosh.index import exists_in
from whoosh.index import open_dir
@@ -97,11 +99,33 @@ def get_schema() -> Schema:
def open_index(*, recreate=False) -> FileIndex:
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
except Exception:
logger.exception("Error while opening the index, recreating.")
transient_exceptions = (FileNotFoundError, LockError)
max_retries = 3
retry_delay = 0.1
for attempt in range(max_retries + 1):
try:
if exists_in(settings.INDEX_DIR) and not recreate:
return open_dir(settings.INDEX_DIR, schema=get_schema())
break
except transient_exceptions as exc:
is_last_attempt = attempt == max_retries or recreate
if is_last_attempt:
logger.exception(
"Error while opening the index after retries, recreating.",
)
break
logger.warning(
"Transient error while opening the index (attempt %s/%s): %s. Retrying.",
attempt + 1,
max_retries + 1,
exc,
)
sleep(retry_delay)
except Exception:
logger.exception("Error while opening the index, recreating.")
break
# create_in doesn't handle corrupted indexes very well, remove the directory entirely first
if settings.INDEX_DIR.is_dir():

View File

@@ -1,6 +1,7 @@
from datetime import datetime
from unittest import mock
from django.conf import settings
from django.contrib.auth.models import User
from django.test import SimpleTestCase
from django.test import TestCase
@@ -251,3 +252,31 @@ class TestRewriteNaturalDateKeywords(SimpleTestCase):
result = self._rewrite_with_now("added:today", fixed_now)
# Should convert to UTC properly
self.assertIn("added:[20250719", result)
class TestIndexResilience(DirectoriesMixin, SimpleTestCase):
def test_transient_missing_segment_does_not_force_recreate(self):
file_marker = settings.INDEX_DIR / "file_marker.txt"
file_marker.write_text("keep")
expected_index = object()
with (
mock.patch("documents.index.exists_in", return_value=True),
mock.patch(
"documents.index.open_dir",
side_effect=[FileNotFoundError("missing"), expected_index],
) as mock_open_dir,
mock.patch(
"documents.index.create_in",
) as mock_create_in,
mock.patch(
"documents.index.rmtree",
) as mock_rmtree,
):
ix = index.open_index()
self.assertIs(ix, expected_index)
self.assertGreaterEqual(mock_open_dir.call_count, 2)
mock_rmtree.assert_not_called()
mock_create_in.assert_not_called()
self.assertEqual(file_marker.read_text(), "keep")