mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-01-10 12:14:46 +00:00
Normalize filenames and titles to NFC
This commit is contained in:
@@ -46,6 +46,7 @@ from documents.signals.handlers import run_workflows
|
||||
from documents.templating.workflows import parse_w_workflow_placeholders
|
||||
from documents.utils import copy_basic_file_stats
|
||||
from documents.utils import copy_file_with_basic_stats
|
||||
from documents.utils import normalize_nfc
|
||||
from documents.utils import run_subprocess
|
||||
from paperless_mail.parsers import MailDocumentParser
|
||||
|
||||
@@ -111,7 +112,12 @@ class ConsumerPluginMixin:
|
||||
|
||||
self.renew_logging_group()
|
||||
|
||||
self.filename = self.metadata.filename or self.input_doc.original_file.name
|
||||
self.metadata.filename = normalize_nfc(self.metadata.filename)
|
||||
self.metadata.title = normalize_nfc(self.metadata.title)
|
||||
|
||||
self.filename = normalize_nfc(
|
||||
self.metadata.filename or self.input_doc.original_file.name,
|
||||
)
|
||||
|
||||
def _send_progress(
|
||||
self,
|
||||
@@ -652,6 +658,8 @@ class ConsumerPlugin(
|
||||
f"Error occurred parsing title override '{self.metadata.title}', falling back to original. Exception: {e}",
|
||||
)
|
||||
|
||||
title = normalize_nfc(title)
|
||||
|
||||
file_for_checksum = (
|
||||
self.unmodified_original
|
||||
if self.unmodified_original is not None
|
||||
|
||||
@@ -6,6 +6,7 @@ from django.conf import settings
|
||||
from documents.models import Document
|
||||
from documents.templating.filepath import validate_filepath_template_and_render
|
||||
from documents.templating.utils import convert_format_str_to_template_format
|
||||
from documents.utils import normalize_nfc
|
||||
|
||||
|
||||
def create_source_path_directory(source_path: Path) -> None:
|
||||
@@ -55,11 +56,11 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
||||
"""
|
||||
if archive_filename:
|
||||
old_filename: Path | None = (
|
||||
Path(doc.archive_filename) if doc.archive_filename else None
|
||||
Path(normalize_nfc(doc.archive_filename)) if doc.archive_filename else None
|
||||
)
|
||||
root = settings.ARCHIVE_DIR
|
||||
else:
|
||||
old_filename = Path(doc.filename) if doc.filename else None
|
||||
old_filename = Path(normalize_nfc(doc.filename)) if doc.filename else None
|
||||
root = settings.ORIGINALS_DIR
|
||||
|
||||
# If generating archive filenames, try to make a name that is similar to
|
||||
@@ -91,7 +92,7 @@ def generate_unique_filename(doc, *, archive_filename=False) -> Path:
|
||||
)
|
||||
if new_filename == old_filename:
|
||||
# still the same as before.
|
||||
return new_filename
|
||||
return Path(normalize_nfc(str(new_filename)))
|
||||
|
||||
if (root / new_filename).exists():
|
||||
counter += 1
|
||||
@@ -119,7 +120,7 @@ def format_filename(document: Document, template_str: str) -> str | None:
|
||||
"none",
|
||||
) # backward compatibility
|
||||
|
||||
return rendered_filename
|
||||
return normalize_nfc(rendered_filename)
|
||||
|
||||
|
||||
def generate_filename(
|
||||
@@ -174,4 +175,4 @@ def generate_filename(
|
||||
if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG:
|
||||
full_path = full_path.with_suffix(full_path.suffix + ".gpg")
|
||||
|
||||
return full_path
|
||||
return Path(normalize_nfc(str(full_path)))
|
||||
|
||||
@@ -290,6 +290,23 @@ class TestConsumer(
|
||||
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def test_override_filename_normalized(self):
|
||||
filename = self.get_test_file()
|
||||
override_filename = "Inhaltsu\u0308bersicht.pdf"
|
||||
|
||||
with self.get_consumer(
|
||||
filename,
|
||||
DocumentMetadataOverrides(filename=override_filename),
|
||||
) as consumer:
|
||||
consumer.run()
|
||||
|
||||
document = Document.objects.first()
|
||||
|
||||
self.assertIsNotNone(document)
|
||||
self.assertEqual(document.original_filename, "Inhaltsübersicht.pdf")
|
||||
self.assertEqual(document.title, "Inhaltsübersicht")
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testOverrideTitle(self):
|
||||
with self.get_consumer(
|
||||
self.get_test_file(),
|
||||
@@ -304,6 +321,25 @@ class TestConsumer(
|
||||
self.assertEqual(document.title, "Override Title")
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
@override_settings(FILENAME_FORMAT="{{ title }}")
|
||||
def test_filename_format_normalized(self):
|
||||
filename = self.get_test_file()
|
||||
title = "Inhaltsu\u0308bersicht Faszination"
|
||||
|
||||
with self.get_consumer(
|
||||
filename,
|
||||
DocumentMetadataOverrides(title=title),
|
||||
) as consumer:
|
||||
consumer.run()
|
||||
|
||||
document = Document.objects.first()
|
||||
|
||||
self.assertIsNotNone(document)
|
||||
self.assertEqual(document.title, "Inhaltsübersicht Faszination")
|
||||
self.assertEqual(document.filename, "Inhaltsübersicht Faszination.pdf")
|
||||
self.assertIsFile(document.source_path)
|
||||
self._assert_first_last_send_progress()
|
||||
|
||||
def testOverrideCorrespondent(self):
|
||||
c = Correspondent.objects.create(name="test")
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import logging
|
||||
import shutil
|
||||
import unicodedata
|
||||
from os import PathLike
|
||||
from os import utime
|
||||
from pathlib import Path
|
||||
from subprocess import CompletedProcess
|
||||
@@ -16,6 +18,14 @@ def _coerce_to_path(
|
||||
return Path(source).resolve(), Path(dest).resolve()
|
||||
|
||||
|
||||
def normalize_nfc(value: str | PathLike[str] | None) -> str | None:
|
||||
"""Return NFC-normalized string for filesystem-safe comparisons."""
|
||||
|
||||
if value is None:
|
||||
return None
|
||||
return unicodedata.normalize("NFC", str(value))
|
||||
|
||||
|
||||
def copy_basic_file_stats(source: Path | str, dest: Path | str) -> None:
|
||||
"""
|
||||
Copies only the m_time and a_time attributes from source to destination.
|
||||
|
||||
Reference in New Issue
Block a user