Compare commits
137 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94c2950afe | ||
|
|
9f56bf9992 | ||
|
|
6df35e4cb7 | ||
|
|
b4b7d167d1 | ||
|
|
4936fad542 | ||
|
|
3c78105fd7 | ||
|
|
a58a7ce0f7 | ||
|
|
792aeee11e | ||
|
|
5588e86855 | ||
|
|
97f1e4ab16 | ||
|
|
e4dece8e53 | ||
|
|
c5c204f605 | ||
|
|
611ec6840b | ||
|
|
2cd077d12d | ||
|
|
4efb153e86 | ||
|
|
25e953bbf0 | ||
|
|
0509d5a3d2 | ||
|
|
5e674f17af | ||
|
|
7c7a814096 | ||
|
|
43e71cfcaa | ||
|
|
79868930f1 | ||
|
|
0256dcbe32 | ||
|
|
29db177ce2 | ||
|
|
5c1edf78ce | ||
|
|
60e8990a7b | ||
|
|
75a79ac204 | ||
|
|
0c47907dda | ||
|
|
cea8332038 | ||
|
|
5982cb693a | ||
|
|
73a02d40c4 | ||
|
|
b541765817 | ||
|
|
28ffd1ec6b | ||
|
|
5760aa0894 | ||
|
|
562e5f644d | ||
|
|
5ab2009ebf | ||
|
|
637b0d4cc2 | ||
|
|
4a71c33537 | ||
|
|
cf36c8467e | ||
|
|
dafa6a4c71 | ||
|
|
a3c5ec834d | ||
|
|
be57dbe4c8 | ||
|
|
4d50c7e105 | ||
|
|
27af2603f5 | ||
|
|
ff5b34179a | ||
|
|
0334617287 | ||
|
|
f8b43fa74b | ||
|
|
1ff06d0dd9 | ||
|
|
4ad6813d11 | ||
|
|
cbc5f0603f | ||
|
|
0d21bdeffa | ||
|
|
b1f9b18b8c | ||
|
|
4d13521f36 | ||
|
|
7b4785bdb9 | ||
|
|
baf89cad8e | ||
|
|
3c2a1a8c13 | ||
|
|
1c7047bbb8 | ||
|
|
96dafe8c43 | ||
|
|
d6896daece | ||
|
|
d12f0642f2 | ||
|
|
a19f0ef97e | ||
|
|
ec7125b6bb | ||
|
|
e3a616ebc3 | ||
|
|
f898ec792f | ||
|
|
f45b6762f2 | ||
|
|
d544f269e0 | ||
|
|
650db75c2b | ||
|
|
7dbb77e57b | ||
|
|
f1b3312bcb | ||
|
|
ea05ab2b06 | ||
|
|
4f4c515629 | ||
|
|
c1f926a40c | ||
|
|
c1d18c1e83 | ||
|
|
ba452e0524 | ||
|
|
c5488dcb98 | ||
|
|
d6eefbccee | ||
|
|
a813288aaf | ||
|
|
63e2fbe0c9 | ||
|
|
597a7bb391 | ||
|
|
730daa3d6d | ||
|
|
c225281f95 | ||
|
|
e1d8744c66 | ||
|
|
4409f65840 | ||
|
|
c83dc666a4 | ||
|
|
9ab50ed09d | ||
|
|
e0acb4a40b | ||
|
|
eca6250c1b | ||
|
|
33abec0663 | ||
|
|
d825667c9b | ||
|
|
84511f8418 | ||
|
|
81e488b90d | ||
|
|
bff28113df | ||
|
|
0b377a76d0 | ||
|
|
ec1d5c80ff | ||
|
|
bd95804fbf | ||
|
|
8dc355a66f | ||
|
|
fbb389553c | ||
|
|
f8cfbb44d2 | ||
|
|
818780a191 | ||
|
|
b350ec48b7 | ||
|
|
f948ee11be | ||
|
|
2ef2bf873e | ||
|
|
0bb7d27269 | ||
|
|
ce5e8b2658 | ||
|
|
3f572afb8b | ||
|
|
5c3cb1e4ab | ||
|
|
c7f4bfe4f3 | ||
|
|
65d6599964 | ||
|
|
5d32e89c44 | ||
|
|
750ab5bf85 | ||
|
|
2a3f766b93 | ||
|
|
14bb52b6a4 | ||
|
|
b5176d207e | ||
|
|
e4044d0df9 | ||
|
|
bacdd51fd7 | ||
|
|
8010d72f18 | ||
|
|
9dd76f1b87 | ||
|
|
a511d34d69 | ||
|
|
35c5b8e263 | ||
|
|
8726b0316c | ||
|
|
acf6caca2f | ||
|
|
b20d7eca03 | ||
|
|
d17497fd5b | ||
|
|
090565d84c | ||
|
|
79e1e60238 | ||
|
|
ff111f1bde | ||
|
|
6db788a550 | ||
|
|
f4a09013d7 | ||
|
|
4130dd3465 | ||
|
|
117d7dad04 | ||
|
|
b420281be0 | ||
|
|
17f8953a49 | ||
|
|
9682a6f6fc | ||
|
|
425bbe34ef | ||
|
|
60ee08adec | ||
|
|
b4b4d8f25e | ||
|
|
cce6b43062 | ||
|
|
fb6f2e07c9 |
28
.editorconfig
Normal file
@@ -0,0 +1,28 @@
|
||||
# EditorConfig: http://EditorConfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
indent_size = 2
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
max_line_length = 79
|
||||
|
||||
[{*.html,*.css,*.js}]
|
||||
max_line_length = off
|
||||
|
||||
[*.py]
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
|
||||
[*.yml]
|
||||
indent_style = space
|
||||
|
||||
# Tests don't get a line width restriction. It's still a good idea to follow
|
||||
# the 79 character rule, but in the interests of clarity, tests often need to
|
||||
# violate it.
|
||||
[**/test_*.py]
|
||||
max_line_length = off
|
||||
4
.gitignore
vendored
@@ -66,6 +66,7 @@ media/overrides.js
|
||||
|
||||
# Sqlite database
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# PyCharm
|
||||
.idea
|
||||
@@ -73,7 +74,6 @@ db.sqlite3
|
||||
# Other stuff that doesn't belong
|
||||
.virtualenv
|
||||
virtualenv
|
||||
.vagrant
|
||||
docker-compose.yml
|
||||
docker-compose.env
|
||||
|
||||
@@ -81,3 +81,5 @@ docker-compose.env
|
||||
scripts/import-for-development
|
||||
scripts/nuke
|
||||
|
||||
# Static files collected by the collectstatic command
|
||||
./static/
|
||||
|
||||
17
.travis.yml
@@ -2,19 +2,22 @@ language: python
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng
|
||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
|
||||
|
||||
sudo: false
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.4
|
||||
- python: 3.5
|
||||
- python: 3.6
|
||||
- python: "3.4"
|
||||
- python: "3.5"
|
||||
- python: "3.6"
|
||||
- python: "3.7-dev"
|
||||
|
||||
install:
|
||||
- pip install --requirement requirements.txt
|
||||
- pip install sphinx
|
||||
- pip install --upgrade pip pipenv sphinx
|
||||
- pipenv lock -r > requirements.txt
|
||||
- pip install -r requirements.txt
|
||||
|
||||
script:
|
||||
- cd src/
|
||||
- pytest --cov
|
||||
@@ -22,4 +25,4 @@ script:
|
||||
- sphinx-build -b html ../docs ../docs/_build -W
|
||||
|
||||
after_success:
|
||||
- coveralls
|
||||
- coveralls
|
||||
|
||||
20
Dockerfile
@@ -1,28 +1,28 @@
|
||||
FROM alpine:3.7
|
||||
FROM alpine:3.8
|
||||
|
||||
LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
|
||||
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
|
||||
Sven Fischer <git-dev@linux4tw.de>"
|
||||
|
||||
# Copy requirements file and init script
|
||||
COPY requirements.txt /usr/src/paperless/
|
||||
# Copy Pipfiles file and init script
|
||||
COPY Pipfile* /usr/src/paperless/
|
||||
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||
|
||||
# Set export and consumption directories
|
||||
ENV PAPERLESS_EXPORT_DIR=/export \
|
||||
PAPERLESS_CONSUMPTION_DIR=/consume
|
||||
|
||||
# Install dependencies
|
||||
RUN apk --no-cache --update add \
|
||||
python3 gnupg libmagic bash shadow curl \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
|
||||
apk --no-cache add --virtual .build-dependencies \
|
||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
|
||||
RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
|
||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||
apk add --virtual .build-dependencies \
|
||||
python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||
# Install python dependencies
|
||||
python3 -m ensurepip && \
|
||||
rm -r /usr/lib/python*/ensurepip && \
|
||||
cd /usr/src/paperless && \
|
||||
pip3 install --no-cache-dir -r requirements.txt && \
|
||||
pip3 install --upgrade pip pipenv && \
|
||||
pipenv install --system --deploy && \
|
||||
# Remove build dependencies
|
||||
apk del .build-dependencies && \
|
||||
# Create the consumption directory
|
||||
|
||||
7
Pipfile
@@ -25,6 +25,8 @@ python-dateutil = "*"
|
||||
python-dotenv = "*"
|
||||
python-gnupg = "*"
|
||||
pytz = "*"
|
||||
sphinx = "*"
|
||||
tox = "*"
|
||||
pycodestyle = "*"
|
||||
pytest = "*"
|
||||
pytest-cov = "*"
|
||||
@@ -32,9 +34,8 @@ pytest-django = "*"
|
||||
pytest-sugar = "*"
|
||||
pytest-env = "*"
|
||||
pytest-xdist = "*"
|
||||
psycopg2 = "*"
|
||||
djangoql = "*"
|
||||
|
||||
[dev-packages]
|
||||
ipython = "*"
|
||||
sphinx = "*"
|
||||
tox = "*"
|
||||
|
||||
|
||||
719
Pipfile.lock
generated
@@ -1,7 +1,6 @@
|
||||
*[English](README.md)*<br/>
|
||||
*[Greek](README-el.md)*
|
||||
[ [en](README.md) | de | [el](README-el.md) ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
*[English](README.md)*<br/>
|
||||
*[German](README-de.md)*
|
||||
[ [en](README.md) | [de](README-de.md) | el ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
*[German](README-de.md)*<br/>
|
||||
*[Greek](README-el.md)*
|
||||
[ en | [de](README-de.md) | [el](README-el.md) ]
|
||||
|
||||
# Paperless
|
||||

|
||||
|
||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||
|
||||
|
||||
20
Vagrantfile
vendored
@@ -1,20 +0,0 @@
|
||||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
VAGRANT_API_VERSION = "2"
|
||||
Vagrant.configure(VAGRANT_API_VERSION) do |config|
|
||||
config.vm.box = "ubuntu/trusty64"
|
||||
|
||||
# Provision using shell
|
||||
config.vm.host_name = "dev.paperless"
|
||||
config.vm.synced_folder ".", "/opt/paperless"
|
||||
config.vm.provision "shell", path: "scripts/vagrant-provision"
|
||||
|
||||
# Networking details
|
||||
config.vm.network "private_network", ip: "172.28.128.4"
|
||||
|
||||
config.vm.provider "virtualbox" do |vb|
|
||||
# Customize the amount of memory on the VM:
|
||||
vb.memory = "1024"
|
||||
end
|
||||
end
|
||||
@@ -1,38 +1,22 @@
|
||||
# Environment variables to set for Paperless
|
||||
# Commented out variables will be replaced by a default within Paperless.
|
||||
# Commented out variables will be replaced with a default within Paperless.
|
||||
#
|
||||
# In addition to what you see here, you can also define any values you find in
|
||||
# paperless.conf.example here. Values like:
|
||||
#
|
||||
# * PAPERLESS_PASSPHRASE
|
||||
# * PAPERLESS_CONSUMPTION_DIR
|
||||
# * PAPERLESS_CONSUME_MAIL_HOST
|
||||
#
|
||||
# ...are all explained in that file but can be defined here, since the Docker
|
||||
# installation doesn't make use of paperless.conf.
|
||||
|
||||
# Passphrase Paperless uses to encrypt and decrypt your documents, if you want
|
||||
# encryption at all.
|
||||
# PAPERLESS_PASSPHRASE=CHANGE_ME
|
||||
|
||||
# The amount of threads to use for text recognition
|
||||
# PAPERLESS_OCR_THREADS=4
|
||||
|
||||
# Additional languages to install for text recognition
|
||||
# Additional languages to install for text recognition. Note that this is
|
||||
# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
|
||||
# default language used when guessing the language from the OCR output.
|
||||
# PAPERLESS_OCR_LANGUAGES=deu ita
|
||||
|
||||
# You can change the default user and group id to a custom one
|
||||
# USERMAP_UID=1000
|
||||
# USERMAP_GID=1000
|
||||
|
||||
###############################################################################
|
||||
#### Mail Consumption ####
|
||||
###############################################################################
|
||||
|
||||
# These values are required if you want paperless to check a particular email
|
||||
# box every 10 minutes and attempt to consume documents from there. If you
|
||||
# don't define a HOST, mail checking will just be disabled.
|
||||
# Don't use quotes after = or it will crash your docker
|
||||
# PAPERLESS_CONSUME_MAIL_HOST=
|
||||
# PAPERLESS_CONSUME_MAIL_PORT=
|
||||
# PAPERLESS_CONSUME_MAIL_USER=
|
||||
# PAPERLESS_CONSUME_MAIL_PASS=
|
||||
|
||||
# Override the default IMAP inbox here. If it's not set, Paperless defaults to
|
||||
# INBOX.
|
||||
# PAPERLESS_CONSUME_MAIL_INBOX=INBOX
|
||||
|
||||
# Any email sent to the target account that does not contain this text will be
|
||||
# ignored. Mail checking won't work without this.
|
||||
# PAPERLESS_EMAIL_SECRET=
|
||||
|
||||
|
||||
@@ -17,6 +17,9 @@ services:
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
# You have to adapt the local path you want the consumption
|
||||
# directory to mount to by modifying the part before the ':'.
|
||||
- ./consume:/consume
|
||||
env_file: docker-compose.env
|
||||
# The reason the line is here is so that the webserver that doesn't do
|
||||
# any text recognition and doesn't have to install unnecessary
|
||||
@@ -36,8 +39,8 @@ services:
|
||||
volumes:
|
||||
- data:/usr/src/paperless/data
|
||||
- media:/usr/src/paperless/media
|
||||
# You have to adapt the local path you want the consumption
|
||||
# directory to mount to by modifying the part before the ':'.
|
||||
# This should be set to the same value as the consume directory
|
||||
# in the webserver service above.
|
||||
- ./consume:/consume
|
||||
# Likewise, you can add a local path to mount a directory for
|
||||
# exporting. This is not strictly needed for paperless to
|
||||
|
||||
@@ -1,6 +1,121 @@
|
||||
Changelog
|
||||
#########
|
||||
|
||||
2.7.0
|
||||
=====
|
||||
|
||||
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
||||
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
||||
a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
|
||||
which allows a litany of handy search functionality `#492`_.
|
||||
* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
|
||||
how the thumbnails are served up, improving performance considerably `#481`_.
|
||||
* `tsia`_ added a few fields to the tags REST API. `#483`_.
|
||||
* `Brian Cribbs`_ improved the documentation to help people using Paperless
|
||||
over NFS `#484`_.
|
||||
* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
|
||||
``DEBUG`` value. The ``paperless.conf.example`` file was also updated to
|
||||
mirror the project defaults.
|
||||
|
||||
|
||||
2.6.1
|
||||
=====
|
||||
|
||||
* We now have a logo, complete with a favicon :-)
|
||||
* Removed some problematic tests.
|
||||
* Fix the docker-compose example config to include a shared consume volume so
|
||||
that using the push API will work for users of the Docker install. Thanks to
|
||||
`Colin Frei`_ for fixing this in `#466`_.
|
||||
* `khrise`_ submitted a pull request to include the ``added`` property to the
|
||||
REST API `#471`_.
|
||||
|
||||
|
||||
2.6.0
|
||||
=====
|
||||
|
||||
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||
the problem in `#433`_.
|
||||
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
|
||||
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
|
||||
pointed this out. `#423`_.
|
||||
* Updated dependencies to include (among other things) a security patch to
|
||||
requests.
|
||||
* Fix text in sample data for tests so that the language guesser stops thinking
|
||||
that everything is in Catalan because we had *Lorem ipsum* in there.
|
||||
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
|
||||
paths. `#441`_
|
||||
* Added pretty colour boxes next to the hex values in the Tags section, thanks
|
||||
to a pull request from `Joshua Taillon`_ `#442`_.
|
||||
* Added a ``.editorconfig`` file to better specify coding style.
|
||||
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
|
||||
into how it parses file names on import. `#440`_
|
||||
|
||||
|
||||
2.5.0
|
||||
=====
|
||||
|
||||
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||
its location in ``PAPERLESS_OPTIPNG_BINARY``. The Docker image has already
|
||||
been updated on the Docker Hub, so you just need to pull the latest one from
|
||||
there if you're a Docker user.
|
||||
|
||||
* "Login free" instances of Paperless were breaking whenever you tried to edit
|
||||
objects in the admin: adding/deleting tags or correspondents, or even fixing
|
||||
spelling. This was due to the "user hack" we were applying to sessions that
|
||||
weren't using a login, as that hack user didn't have a valid id. The fix was
|
||||
to attribute the first user id in the system to this hack user. `#394`_
|
||||
|
||||
* A problem in how we handle slug values on Tags and Correspondents required a
|
||||
few changes to how we handle this field `#393`_:
|
||||
|
||||
1. Slugs are no longer editable. They're derived from the name of the tag or
|
||||
correspondent at save time, so if you wanna change the slug, you have to
|
||||
change the name, and even then you're restricted to the rules of the
|
||||
``slugify()`` function. The slug value is still visible in the admin
|
||||
though.
|
||||
2. I've added a migration to go over all existing tags & correspondents and
|
||||
rewrite the ``.slug`` values to ones conforming to the ``slugify()``
|
||||
rules.
|
||||
3. The consumption process now uses the same rules as ``.save()`` in
|
||||
determining a slug and using that to check for an existing
|
||||
tag/correspondent.
|
||||
|
||||
* An annoying bug in the date capture code was causing some bogus dates to be
|
||||
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
|
||||
for reporting this. `#414`_.
|
||||
|
||||
* A bug in the Dockerfile meant that Tesseract language files weren't being
|
||||
installed correctly. `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
|
||||
|
||||
* Document consumption is now wrapped in a transaction as per an old ticket
|
||||
`#262`_.
|
||||
|
||||
* The ``get_date()`` functionality of the parsers has been consolidated onto
|
||||
the ``DocumentParser`` class since much of that code was redundant anyway.
|
||||
|
||||
|
||||
2.4.0
|
||||
=====
|
||||
|
||||
* A new set of actions are now available thanks to `jonaswinkler`_'s very first
|
||||
pull request! You can now do nifty things like tag documents in bulk, or set
|
||||
correspondents in bulk. `#405`_
|
||||
* The import/export system is now a little smarter. By default, documents are
|
||||
tagged as ``unencrypted``, since exports are by their nature unencrypted.
|
||||
It's now in the import step that we decide the storage type. This allows you
|
||||
to export from an encrypted system and import into an unencrypted one, or
|
||||
vice-versa.
|
||||
* The migration history has been slightly modified to accommodate PostgreSQL
|
||||
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
||||
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
||||
connect to your Postgres database without a password unless you also set
|
||||
``PAPERLESS_DBPASS``.
|
||||
* A bug was found in the REST API filter system that was the result of an
|
||||
update of django-filter some time ago. This has now been patched in `#412`_.
|
||||
Thanks to `thepill`_ for spotting it!
|
||||
|
||||
|
||||
2.3.0
|
||||
=====
|
||||
|
||||
@@ -15,7 +130,8 @@ Changelog
|
||||
* As his last bit of effort on this release, Joshua also added some code to
|
||||
allow you to view the documents inline rather than download them as an
|
||||
attachment. `#400`_
|
||||
* Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_
|
||||
* Finally, `ahyear`_ found a slip in the Docker documentation and patched it.
|
||||
`#401`_
|
||||
|
||||
|
||||
2.2.1
|
||||
@@ -32,14 +148,14 @@ Changelog
|
||||
version of Paperless that supports Django 2.0! As a result of their hard
|
||||
work, you can now also run Paperless on Python 3.7 as well: `#386`_ &
|
||||
`#390`_.
|
||||
* `Stéphane Brunner`_ added a few lines of code that made tagging interface a lot
|
||||
easier on those of us with lots of different tags: `#391`_.
|
||||
* `Stéphane Brunner`_ added a few lines of code that made tagging interface a
|
||||
lot easier on those of us with lots of different tags: `#391`_.
|
||||
* `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
|
||||
tags, so that's fixed now too: `#384`_.
|
||||
* `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
|
||||
for packaging environments: `#383`_.
|
||||
* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end
|
||||
cleaner & easier: `#387`_.
|
||||
* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based
|
||||
front-end cleaner & easier: `#387`_.
|
||||
|
||||
|
||||
2.1.0
|
||||
@@ -499,8 +615,21 @@ bulk of the work on this big change.
|
||||
.. _Kilian Koeltzsch: https://github.com/kiliankoe
|
||||
.. _Lukasz Soluch: https://github.com/LukaszSolo
|
||||
.. _Joshua Taillon: https://github.com/jat255
|
||||
.. _dubit0: https://github.com/dubit0
|
||||
.. _ahyear: https://github.com/ahyear
|
||||
.. _dubit0: https://github.com/dubit0
|
||||
.. _ahyear: https://github.com/ahyear
|
||||
.. _jonaswinkler: https://github.com/jonaswinkler
|
||||
.. _thepill: https://github.com/thepill
|
||||
.. _Andrew Peng: https://github.com/pengc99
|
||||
.. _euri10: https://github.com/euri10
|
||||
.. _Ulli: https://github.com/Ulli2k
|
||||
.. _tsia: https://github.com/tsia
|
||||
.. _Sblop: https://github.com/Sblop
|
||||
.. _Colin Frei: https://github.com/colinfrei
|
||||
.. _khrise: https://github.com/khrise
|
||||
.. _syntonym: https://github.com/syntonym
|
||||
.. _JOKer: https://github.com/JOKer
|
||||
.. _Brian Cribbs: https://github.com/cribbstechnolog
|
||||
.. _Brendan M. Sleight: https://github.com/bmsleight
|
||||
|
||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||
@@ -566,6 +695,7 @@ bulk of the work on this big change.
|
||||
.. _#322: https://github.com/danielquinn/paperless/pull/322
|
||||
.. _#328: https://github.com/danielquinn/paperless/pull/328
|
||||
.. _#253: https://github.com/danielquinn/paperless/issues/253
|
||||
.. _#262: https://github.com/danielquinn/paperless/issues/262
|
||||
.. _#323: https://github.com/danielquinn/paperless/issues/323
|
||||
.. _#344: https://github.com/danielquinn/paperless/pull/344
|
||||
.. _#351: https://github.com/danielquinn/paperless/pull/351
|
||||
@@ -582,11 +712,33 @@ bulk of the work on this big change.
|
||||
.. _#391: https://github.com/danielquinn/paperless/pull/391
|
||||
.. _#390: https://github.com/danielquinn/paperless/pull/390
|
||||
.. _#392: https://github.com/danielquinn/paperless/issues/392
|
||||
.. _#393: https://github.com/danielquinn/paperless/issues/393
|
||||
.. _#395: https://github.com/danielquinn/paperless/pull/395
|
||||
.. _#394: https://github.com/danielquinn/paperless/issues/394
|
||||
.. _#396: https://github.com/danielquinn/paperless/pull/396
|
||||
.. _#399: https://github.com/danielquinn/paperless/pull/399
|
||||
.. _#400: https://github.com/danielquinn/paperless/pull/400
|
||||
.. _#401: https://github.com/danielquinn/paperless/pull/401
|
||||
.. _#405: https://github.com/danielquinn/paperless/pull/405
|
||||
.. _#406: https://github.com/danielquinn/paperless/issues/406
|
||||
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
||||
.. _#413: https://github.com/danielquinn/paperless/pull/413
|
||||
.. _#414: https://github.com/danielquinn/paperless/issues/414
|
||||
.. _#423: https://github.com/danielquinn/paperless/issues/423
|
||||
.. _#433: https://github.com/danielquinn/paperless/issues/433
|
||||
.. _#440: https://github.com/danielquinn/paperless/pull/440
|
||||
.. _#441: https://github.com/danielquinn/paperless/pull/441
|
||||
.. _#442: https://github.com/danielquinn/paperless/pull/442
|
||||
.. _#466: https://github.com/danielquinn/paperless/pull/466
|
||||
.. _#471: https://github.com/danielquinn/paperless/pull/471
|
||||
.. _#475: https://github.com/danielquinn/paperless/pull/475
|
||||
.. _#481: https://github.com/danielquinn/paperless/pull/481
|
||||
.. _#483: https://github.com/danielquinn/paperless/pull/483
|
||||
.. _#484: https://github.com/danielquinn/paperless/pull/484
|
||||
.. _#488: https://github.com/danielquinn/paperless/pull/488
|
||||
.. _#489: https://github.com/danielquinn/paperless/pull/489
|
||||
.. _#492: https://github.com/danielquinn/paperless/pull/492
|
||||
|
||||
.. _pipenv: https://docs.pipenv.org/
|
||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||
.. _optipng: http://optipng.sourceforge.net/
|
||||
|
||||
@@ -76,6 +76,31 @@ Pre-consumption script
|
||||
|
||||
* Document file name
|
||||
|
||||
A simple but common example for this would be creating a simple script like
|
||||
this:
|
||||
|
||||
``/usr/local/bin/ocr-pdf``
|
||||
|
||||
.. code:: bash
|
||||
|
||||
#!/usr/bin/env bash
|
||||
pdf2pdfocr.py -i ${1}
|
||||
|
||||
``/etc/paperless.conf``
|
||||
|
||||
.. code:: bash
|
||||
|
||||
...
|
||||
PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
|
||||
...
|
||||
|
||||
This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``,
|
||||
which will in turn call `pdf2pdfocr.py`_ on your document, which will then
|
||||
overwrite the file with an OCR'd version of the file and exit. At which point,
|
||||
the consumption process will begin with the newly modified file.
|
||||
|
||||
.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
|
||||
|
||||
|
||||
.. _consumption-director-hook-variables-post:
|
||||
|
||||
|
||||
141
docs/contributing.rst
Normal file
@@ -0,0 +1,141 @@
|
||||
.. _contributing:
|
||||
|
||||
Contributing to Paperless
|
||||
#########################
|
||||
|
||||
Maybe you've been using Paperless for a while and want to add a feature or two,
|
||||
or maybe you've come across a bug that you have some ideas how to solve. The
|
||||
beauty of Free software is that you can see what's wrong and help to get it
|
||||
fixed for everyone!
|
||||
|
||||
|
||||
How to Get Your Changes Rolled Into Paperless
|
||||
=============================================
|
||||
|
||||
If you've found a bug, but don't know how to fix it, you can always post an
|
||||
issue on `GitHub`_ in the hopes that someone will have the time to fix it for
|
||||
you. If however you're the one with the time, pull requests are always
|
||||
welcome, you just have to make sure that your code conforms to a few standards:
|
||||
|
||||
Pep8
|
||||
----
|
||||
|
||||
It's the standard for all Python development, so it's `very well documented`_.
|
||||
The short version is:
|
||||
|
||||
* Lines should wrap at 79 characters
|
||||
* Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS``
|
||||
for constants.
|
||||
* Space out your operators: ``stuff + 7`` instead of ``stuff+7``
|
||||
* Two empty lines between classes, and functions, but 1 empty line between
|
||||
class methods.
|
||||
|
||||
There's more to it than that, but if you follow those, you'll probably be
|
||||
alright. When you submit your pull request, there's a pep8 checker that'll
|
||||
look at your code to see if anything is off. If it finds anything, it'll
|
||||
complain at you until you fix it.
|
||||
|
||||
|
||||
Additional Style Guides
|
||||
-----------------------
|
||||
|
||||
Where pep8 is ambiguous, I've tried to be a little more specific. These rules
|
||||
aren't hard-and-fast, but if you can conform to them, I'll appreciate it and
|
||||
spend less time trying to conform your PR before merging:
|
||||
|
||||
|
||||
Function calls
|
||||
..............
|
||||
|
||||
If you're calling a function and that necessitates more than one line of code,
|
||||
please format it like this:
|
||||
|
||||
.. code:: python
|
||||
|
||||
my_function(
|
||||
argument1,
|
||||
kwarg1="x",
|
||||
kwarg2="y"
|
||||
another_really_long_kwarg="some big value"
|
||||
a_kwarg_calling_another_long_function=another_function(
|
||||
another_arg,
|
||||
another_kwarg="kwarg!"
|
||||
)
|
||||
)
|
||||
|
||||
This is all in the interest of code uniformity rather than anything else. If
|
||||
we stick to a style, everything is understandable in the same way.
|
||||
|
||||
|
||||
Quoting Strings
|
||||
...............
|
||||
|
||||
pep8 is a little too open-minded on this for my liking. Python strings should
|
||||
be quoted with double quotes (``"``) except in cases where the resulting string
|
||||
would require too much escaping of a double quote, in which case, a single
|
||||
quoted, or triple-quoted string will do:
|
||||
|
||||
.. code:: python
|
||||
|
||||
my_string = "This is my string"
|
||||
problematic_string = 'This is a "string" with "quotes" in it'
|
||||
|
||||
In HTML templates, please use double-quotes for tag attributes, and single
|
||||
quotes for arguments passed to Django tempalte tags:
|
||||
|
||||
.. code:: html
|
||||
|
||||
<div class="stuff">
|
||||
<a href="{% url 'some-url-name' pk='w00t' %}">link this</a>
|
||||
</div>
|
||||
|
||||
This is to keep linters happy they look at an HTML file and see an attribute
|
||||
closing the ``"`` before it should have been.
|
||||
|
||||
--
|
||||
|
||||
That's all there is in terms of guidelines, so I hope it's not too daunting.
|
||||
|
||||
|
||||
Indentation & Spacing
|
||||
.....................
|
||||
|
||||
When it comes to indentation:
|
||||
|
||||
* For Python, the rule is: follow pep8 and use 4 spaces.
|
||||
* For Javascript, CSS, and HTML, please use 1 tab.
|
||||
|
||||
Additionally, Django templates making use of block elements like ``{% if %}``,
|
||||
``{% for %}``, and ``{% block %}`` etc. should be indented:
|
||||
|
||||
Good:
|
||||
|
||||
.. code:: html
|
||||
|
||||
{% block stuff %}
|
||||
<h1>This is the stuff</h1>
|
||||
{% endblock %}
|
||||
|
||||
Bad:
|
||||
|
||||
.. code:: html
|
||||
|
||||
{% block stuff %}
|
||||
<h1>This is the stuff</h1>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
The Code of Conduct
|
||||
===================
|
||||
|
||||
Paperless has a `code of conduct`_. It's a lot like the other ones you see out
|
||||
there, with a few small changes, but basically it boils down to:
|
||||
|
||||
> Don't be an ass, or you might get banned.
|
||||
|
||||
I'm proud to say that the CoC has never had to be enforced because everyone has
|
||||
been awesome, friendly, and professional.
|
||||
|
||||
.. _GitHub: https://github.com/danielquinn/paperless/issues
|
||||
.. _very well documented: https://www.python.org/dev/peps/pep-0008/
|
||||
.. _code of conduct: https://github.com/danielquinn/paperless/blob/master/CODE_OF_CONDUCT.md
|
||||
@@ -43,6 +43,16 @@ These however wouldn't work:
|
||||
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
||||
* ``Another Company- Letter of Reference.jpg``
|
||||
|
||||
Do I have to be so strict about naming?
|
||||
---------------------------------------
|
||||
Rather than using the strict document naming rules, one can also set the option
|
||||
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
|
||||
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
|
||||
to any date format that is found in the title, instead of a date pulled from
|
||||
the document's text, without requiring the strict formatting of the document
|
||||
filename as described above.
|
||||
|
||||
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
|
||||
|
||||
.. _guesswork-content:
|
||||
|
||||
@@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
|
||||
uses a regex to match the PDF. If you don't know what a regex is, you
|
||||
probably don't want this option.
|
||||
|
||||
When using the "any" or "all" matching algorithms, you can search for terms that
|
||||
consist of multiple words by enclosing them in double quotes. For example, defining
|
||||
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
|
||||
documents that contain either "Bank of America" or "BofA", but will not match
|
||||
documents containing "Bank of South America".
|
||||
When using the "any" or "all" matching algorithms, you can search for terms
|
||||
that consist of multiple words by enclosing them in double quotes. For example,
|
||||
defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
|
||||
will match documents that contain either "Bank of America" or "BofA", but will
|
||||
not match documents containing "Bank of South America".
|
||||
|
||||
Then just save your tag/correspondent and run another document through the
|
||||
consumer. Once complete, you should see the newly-created document,
|
||||
|
||||
@@ -43,5 +43,6 @@ Contents
|
||||
customising
|
||||
extending
|
||||
troubleshooting
|
||||
contributing
|
||||
scanners
|
||||
changelog
|
||||
|
||||
@@ -82,6 +82,7 @@ rolled in as part of the update:
|
||||
|
||||
$ cd /path/to/project
|
||||
$ git pull
|
||||
$ pip install -r requirements.txt
|
||||
$ cd src
|
||||
$ ./manage.py migrate
|
||||
|
||||
@@ -101,7 +102,7 @@ is similar:
|
||||
$ cd /path/to/project
|
||||
$ git pull
|
||||
$ docker build -t paperless .
|
||||
$ docker-compose run --rm comsumer migrate
|
||||
$ docker-compose run --rm consumer migrate
|
||||
$ docker-compose up -d
|
||||
|
||||
If ``git pull`` doesn't report any changes, there is no need to continue with
|
||||
|
||||
@@ -12,6 +12,7 @@ should work) that has the following software installed:
|
||||
* `Imagemagick`_ version 6.7.5 or higher
|
||||
* `unpaper`_
|
||||
* `libpoppler-cpp-dev`_ PDF rendering library
|
||||
* `optipng`_
|
||||
|
||||
.. _Python3: https://python.org/
|
||||
.. _GNU Privacy Guard: https://gnupg.org
|
||||
@@ -19,6 +20,7 @@ should work) that has the following software installed:
|
||||
.. _Imagemagick: http://imagemagick.org/
|
||||
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
|
||||
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
|
||||
.. _optipng: http://optipng.sourceforge.net/
|
||||
|
||||
Notably, you should confirm how you access your Python3 installation. Many
|
||||
Linux distributions will install Python3 in parallel to Python2, using the
|
||||
@@ -33,7 +35,7 @@ In addition to the above, there are a number of Python requirements, all of
|
||||
which are listed in a file called ``requirements.txt`` in the project root
|
||||
directory.
|
||||
|
||||
If you're not working on a virtual environment (like Vagrant or Docker), you
|
||||
If you're not working on a virtual environment (like Docker), you
|
||||
should probably be using a virtualenv, but that's your call. The reasons why
|
||||
you might choose a virtualenv or not aren't really within the scope of this
|
||||
document. Needless to say if you don't know what a virtualenv is, you should
|
||||
|
||||
@@ -42,18 +42,14 @@ Installation & Configuration
|
||||
You can go multiple routes with setting up and running Paperless:
|
||||
|
||||
* The `bare metal route`_
|
||||
* The `vagrant route`_
|
||||
* The `docker route`_
|
||||
|
||||
|
||||
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
|
||||
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
|
||||
simplicity as Vagrant with lower resource consumption.
|
||||
The `docker route`_ is quick & easy.
|
||||
|
||||
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
||||
should you want to contribute some code back.
|
||||
|
||||
.. _Vagrant route: setup-installation-vagrant_
|
||||
.. _docker route: setup-installation-docker_
|
||||
.. _bare metal route: setup-installation-bare-metal_
|
||||
.. _Docker Machine: https://docs.docker.com/machine/
|
||||
@@ -81,12 +77,16 @@ Standard (Bare Metal)
|
||||
encrypt/decrypt the original documents. Don't worry about defining this
|
||||
if you don't want to use encryption (the default).
|
||||
|
||||
Note also that if you're using the ``runserver`` as mentioned below, you
|
||||
should make sure that PAPERLESS_DEBUG="true" or is just commented out as
|
||||
this is the default.
|
||||
|
||||
4. Initialise the SQLite database with ``./manage.py migrate``.
|
||||
5. Create a user for your Paperless instance with
|
||||
``./manage.py createsuperuser``. Follow the prompts to create your user.
|
||||
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
|
||||
If no specifc IP or port are given, the default is ``127.0.0.1:8000``
|
||||
also known as http://localhost:8000/.
|
||||
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
|
||||
known as http://localhost:8000/.
|
||||
You should now be able to visit your (empty) installation at
|
||||
`Paperless webserver`_ or whatever you chose before. You can login with the
|
||||
user/pass you created in #5.
|
||||
@@ -147,6 +147,15 @@ Docker Method
|
||||
instructions in comments in the file. The only change that is a hard
|
||||
requirement is to specify where the consumption directory should
|
||||
mount.[#dockercomposeyml]_
|
||||
|
||||
.. caution::
|
||||
|
||||
If you are using NFS mounts for the consume directory you also need to
|
||||
change the command to turn off inotify as it doesn't work with NFS
|
||||
|
||||
`command: ["document_consumer", "--no-inotify"]`
|
||||
|
||||
|
||||
5. Modify ``docker-compose.env`` and adapt the following environment variables:
|
||||
|
||||
``PAPERLESS_PASSPHRASE``
|
||||
@@ -267,54 +276,6 @@ Docker Method
|
||||
newer ``docker-compose.yml.example`` file
|
||||
|
||||
|
||||
.. _setup-installation-vagrant:
|
||||
|
||||
Vagrant Method
|
||||
++++++++++++++
|
||||
|
||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
||||
provisioned...
|
||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
||||
``/etc/paperless.conf`` and set the values for:
|
||||
|
||||
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
|
||||
dumped to be consumed by Paperless.
|
||||
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
|
||||
encrypt/decrypt the original document. It's only required if you want
|
||||
your original files to be encrypted, otherwise, just leave it unset.
|
||||
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
|
||||
documents from mail or via the API. If you don't use either, leaving it
|
||||
blank is just fine.
|
||||
|
||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
||||
updates the environment to make use of the changes you made to the config
|
||||
file.
|
||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
||||
create your user.
|
||||
7. Start the webserver with
|
||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
||||
You can login with the user/pass you created in #6.
|
||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
||||
your vagrant instance, you should start the consumer script with
|
||||
``/opt/paperless/src/manage.py document_consumer``.
|
||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
||||
10. Wait a few minutes
|
||||
11. Visit the document list on your webserver, and it should be there, indexed
|
||||
and downloadable.
|
||||
|
||||
.. caution::
|
||||
|
||||
This installation is not secure. Once everything is working head up to
|
||||
`Making things more permanent`_
|
||||
|
||||
.. _Vagrant: https://vagrantup.com/
|
||||
.. _Paperless server: http://172.28.128.4:8000
|
||||
|
||||
|
||||
.. _setup-permanent:
|
||||
|
||||
Making Things a Little more Permanent
|
||||
@@ -398,7 +359,7 @@ instance listening on localhost port 8000.
|
||||
location /static {
|
||||
|
||||
autoindex on;
|
||||
alias <path-to-paperless-static-directory>
|
||||
alias <path-to-paperless-static-directory>;
|
||||
|
||||
}
|
||||
|
||||
@@ -409,7 +370,7 @@ instance listening on localhost port 8000.
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
proxy_pass http://127.0.0.1:8000
|
||||
proxy_pass http://127.0.0.1:8000;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -418,7 +379,7 @@ The gunicorn server can be started with the command:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
|
||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
|
||||
|
||||
|
||||
.. _setup-permanent-standard-systemd:
|
||||
@@ -475,7 +436,7 @@ after restarting your system:
|
||||
respawn limit 10 5
|
||||
|
||||
script
|
||||
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
|
||||
exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
|
||||
end script
|
||||
|
||||
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
||||
@@ -513,13 +474,6 @@ second period.
|
||||
.. _Upstart: http://upstart.ubuntu.com/
|
||||
|
||||
|
||||
Vagrant
|
||||
~~~~~~~
|
||||
|
||||
You may use the Ubuntu explanation above. Replace
|
||||
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
||||
|
||||
|
||||
.. _setup-permanent-docker:
|
||||
|
||||
Docker
|
||||
|
||||
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
|
||||
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
||||
marching your document's languages.
|
||||
|
||||
As an example, if you are running Paperless from the Vagrant setup provided
|
||||
(or from any Ubuntu or Debian box), and your documents are written in Spanish
|
||||
you may need to run::
|
||||
As an example, if you are running Paperless from any Ubuntu or Debian
|
||||
box, and your documents are written in Spanish you may need to run::
|
||||
|
||||
apt-get install -y tesseract-ocr-spa
|
||||
|
||||
|
||||
@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:
|
||||
|
||||
That's it. It'll loop over all of the documents in your database and attempt
|
||||
to match all of your tags to them. If one matches, it'll be applied. And
|
||||
don't worry, you can run this as often as you like, it' won't double-tag
|
||||
don't worry, you can run this as often as you like, it won't double-tag
|
||||
a document.
|
||||
|
||||
11
overrides/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Customizing Paperless
|
||||
|
||||
*See customization
|
||||
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
|
||||
for more detail!*
|
||||
|
||||
The example `.css` and `.js` snippets in this folder can be placed into
|
||||
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
|
||||
`overrides.css`. Please feel free to submit pull requests to the main
|
||||
repository with other examples of customizations that you think others may
|
||||
find useful.
|
||||
@@ -59,6 +59,11 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
#### Security ####
|
||||
###############################################################################
|
||||
|
||||
# Controls whether django's debug mode is enabled. Disable this on production
|
||||
# systems. Debug mode is enabled by default.
|
||||
#PAPERLESS_DEBUG="true"
|
||||
|
||||
|
||||
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
|
||||
# using the PAPERLESS_PASSPHRASE specified below. If however you're not
|
||||
# concerned about encrypting these files (for example if you have disk
|
||||
@@ -122,6 +127,14 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
# "true", the document will instead be opened in the browser, if possible.
|
||||
#PAPERLESS_INLINE_DOC="false"
|
||||
|
||||
# By default, paperless will check the document text for document date information.
|
||||
# Uncomment the line below to enable checking the document filename for date
|
||||
# information. The date order can be set to any option as specified in
|
||||
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
|
||||
# checked first, and if nothing is found, the document text will be checked
|
||||
# as normal.
|
||||
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
|
||||
|
||||
#
|
||||
# The following values use sensible defaults for modern systems, but if you're
|
||||
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
||||
@@ -183,6 +196,17 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
||||
|
||||
|
||||
# By default Paperless stops consuming a document if no language can be
|
||||
# detected. Set to true to consume documents even if the language detection
|
||||
# fails.
|
||||
#PAPERLESS_FORGIVING_OCR="false"
|
||||
|
||||
|
||||
# By default Paperless does not OCR a document if the text can be retrieved from
|
||||
# the document directly. Set to true to always OCR documents.
|
||||
#PAPERLESS_OCR_ALWAYS="false"
|
||||
|
||||
|
||||
###############################################################################
|
||||
#### Interface ####
|
||||
###############################################################################
|
||||
@@ -203,3 +227,28 @@ PAPERLESS_EMAIL_SECRET=""
|
||||
# positive integer, but if you don't define one in paperless.conf, a default of
|
||||
# 100 will be used.
|
||||
#PAPERLESS_LIST_PER_PAGE=100
|
||||
|
||||
|
||||
# The number of years for which a correspondent will be included in the recent
|
||||
# correspondents filter.
|
||||
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
||||
|
||||
###############################################################################
|
||||
#### Third-Party Binaries ####
|
||||
###############################################################################
|
||||
|
||||
# There are a few external software packages that Paperless expects to find on
|
||||
# your system when it starts up. Unless you've done something creative with
|
||||
# their installation, you probably won't need to edit any of these. However,
|
||||
# if you've installed these programs somewhere where simply typing the name of
|
||||
# the program doesn't automatically execute it (ie. the program isn't in your
|
||||
# $PATH), then you'll need to specify the literal path for that program here.
|
||||
|
||||
# Convert (part of the ImageMagick suite)
|
||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||
|
||||
# Unpaper
|
||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||
|
||||
# Optipng (for optimising thumbnail sizes)
|
||||
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
|
||||
|
||||
83
requirements.txt
Normal file → Executable file
@@ -1,51 +1,70 @@
|
||||
-i https://pypi.python.org/simple
|
||||
apipkg==1.5; python_version != '3.1.*'
|
||||
atomicwrites==1.2.1; python_version != '3.1.*'
|
||||
alabaster==0.7.12
|
||||
apipkg==1.5
|
||||
atomicwrites==1.2.1
|
||||
attrs==18.2.0
|
||||
certifi==2018.8.24
|
||||
babel==2.6.0
|
||||
certifi==2018.11.29
|
||||
chardet==3.0.4
|
||||
coverage==4.5.1; python_version != '3.1.*'
|
||||
coveralls==1.5.0
|
||||
coverage==4.5.2
|
||||
coveralls==1.5.1
|
||||
dateparser==0.7.0
|
||||
django-cors-headers==2.4.0
|
||||
django-crispy-forms==1.7.2
|
||||
django-extensions==2.1.2
|
||||
django-filter==2.0.0
|
||||
django==2.0.8
|
||||
djangorestframework==3.8.2
|
||||
django-extensions==2.1.4
|
||||
django-filter==2.1.0
|
||||
django==2.0.10
|
||||
djangoql==0.12.3
|
||||
djangorestframework==3.9.1
|
||||
docopt==0.6.2
|
||||
execnet==1.5.0; python_version != '3.1.*'
|
||||
docutils==0.14
|
||||
execnet==1.5.0
|
||||
factory-boy==2.11.1
|
||||
faker==0.9.0
|
||||
faker==1.0.2
|
||||
filelock==3.0.10
|
||||
filemagic==1.6
|
||||
fuzzywuzzy==0.15.0
|
||||
fuzzywuzzy[speedup]==0.15.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
idna==2.8
|
||||
imagesize==1.1.0
|
||||
inotify-simple==1.1.8
|
||||
jinja2==2.10
|
||||
langdetect==1.0.7
|
||||
more-itertools==4.3.0
|
||||
pdftotext==2.1.0
|
||||
pillow==5.2.0
|
||||
pluggy==0.7.1; python_version != '3.1.*'
|
||||
py==1.6.0; python_version != '3.1.*'
|
||||
markupsafe==1.1.0
|
||||
more-itertools==5.0.0
|
||||
packaging==19.0
|
||||
pdftotext==2.1.1
|
||||
pillow==5.4.1
|
||||
pluggy==0.8.1
|
||||
ply==3.11
|
||||
psycopg2==2.7.7
|
||||
py==1.7.0
|
||||
pycodestyle==2.4.0
|
||||
pygments==2.3.1
|
||||
pyocr==0.5.3
|
||||
pytest-cov==2.5.1
|
||||
pytest-django==3.4.2
|
||||
pyparsing==2.3.1
|
||||
pytest-cov==2.6.1
|
||||
pytest-django==3.4.5
|
||||
pytest-env==0.6.2
|
||||
pytest-forked==0.2
|
||||
pytest-sugar==0.9.1
|
||||
pytest-xdist==1.23.0
|
||||
pytest==3.7.4
|
||||
python-dateutil==2.7.3
|
||||
python-dotenv==0.9.1
|
||||
python-gnupg==0.4.3
|
||||
pytest-forked==1.0.1
|
||||
pytest-sugar==0.9.2
|
||||
pytest-xdist==1.26.0
|
||||
pytest==4.1.1
|
||||
python-dateutil==2.7.5
|
||||
python-dotenv==0.10.1
|
||||
python-gnupg==0.4.4
|
||||
python-levenshtein==0.12.0
|
||||
pytz==2018.5
|
||||
regex==2018.8.29
|
||||
requests==2.19.1
|
||||
six==1.11.0
|
||||
pytz==2018.9
|
||||
regex==2019.1.24
|
||||
requests==2.21.0
|
||||
six==1.12.0
|
||||
snowballstemmer==1.2.1
|
||||
sphinx==1.8.3
|
||||
sphinxcontrib-websupport==1.1.0
|
||||
termcolor==1.1.0
|
||||
text-unidecode==1.2
|
||||
toml==0.10.0
|
||||
tox==3.7.0
|
||||
tzlocal==1.5.1
|
||||
urllib3==1.23; python_version != '3.0.*'
|
||||
urllib3==1.24.1
|
||||
virtualenv==16.3.0
|
||||
|
||||
1086
resources/logo/print/eps/Black logo - no background.eps
Normal file
1090
resources/logo/print/eps/Color logo - no background.eps
Normal file
1099
resources/logo/print/eps/Color logo with background.eps
Normal file
1090
resources/logo/print/eps/White logo - no background.eps
Normal file
BIN
resources/logo/print/pdf/Black logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo with background.pdf
Normal file
BIN
resources/logo/print/pdf/White logo - no background.pdf
Normal file
BIN
resources/logo/web/png/Black logo - no background.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
resources/logo/web/png/Color logo - no background.png
Normal file
|
After Width: | Height: | Size: 111 KiB |
BIN
resources/logo/web/png/Color logo with background.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
resources/logo/web/png/White logo - no background.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
8
resources/logo/web/svg/Black logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.4 KiB |
8
resources/logo/web/svg/Color logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/Color logo with background.svg
Normal file
|
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/White logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.4 KiB |
82
resources/logo/web/svg/square.svg
Normal file
@@ -0,0 +1,82 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
version="1.1"
|
||||
width="900"
|
||||
height="900"
|
||||
id="svg3923"
|
||||
sodipodi:docname="square.svg"
|
||||
inkscape:export-filename="/tmp/test.png"
|
||||
inkscape:export-xdpi="96"
|
||||
inkscape:export-ydpi="96"
|
||||
inkscape:version="0.92.2 2405546, 2018-03-11">
|
||||
<metadata
|
||||
id="metadata3929">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<defs
|
||||
id="defs3927" />
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="3840"
|
||||
inkscape:window-height="2096"
|
||||
id="namedview3925"
|
||||
showgrid="false"
|
||||
inkscape:zoom="1.1360927"
|
||||
inkscape:cx="635.07139"
|
||||
inkscape:cy="606.383"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="27"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="g3921" />
|
||||
<g
|
||||
transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
|
||||
id="g3921">
|
||||
<defs
|
||||
id="SvgjsDefs1018" />
|
||||
<g
|
||||
id="SvgjsG1019"
|
||||
featureKey="root"
|
||||
style="fill:#ffffff" />
|
||||
<g
|
||||
id="SvgjsG1020"
|
||||
featureKey="symbol1"
|
||||
transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
|
||||
inkscape:export-filename="/tmp/test.png"
|
||||
inkscape:export-xdpi="116.02285"
|
||||
inkscape:export-ydpi="116.02285"
|
||||
style="fill:#17541f">
|
||||
<defs
|
||||
id="defs3911" />
|
||||
<g
|
||||
id="g3915">
|
||||
<path
|
||||
d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
|
||||
style="fill:#17541f"
|
||||
id="path3913"
|
||||
inkscape:connector-curvature="0" />
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.6 KiB |
@@ -75,7 +75,7 @@ install_languages() {
|
||||
pkg="tesseract-ocr-data-$lang"
|
||||
|
||||
# English is installed by default
|
||||
if [ "$lang" == "eng" ]; then
|
||||
if [[ "$lang" == "eng" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
|
||||
initialize
|
||||
|
||||
# Install additional languages if specified
|
||||
if [ ! -z "$PAPERLESS_OCR_LANGUAGES" ]; then
|
||||
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
|
||||
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
||||
fi
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ Description=Paperless webserver
|
||||
[Service]
|
||||
User=paperless
|
||||
Group=paperless
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install packages
|
||||
apt-get update
|
||||
apt-get build-dep -y python-imaging
|
||||
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
|
||||
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
|
||||
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
|
||||
|
||||
# Python dependencies
|
||||
pip3 install -r /opt/paperless/requirements.txt
|
||||
|
||||
# Create the environment file
|
||||
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
|
||||
chmod 0640 /etc/paperless.conf
|
||||
chown root:vagrant /etc/paperless.conf
|
||||
|
||||
# Create the consumption directory
|
||||
mkdir /home/vagrant/consumption
|
||||
chown vagrant:vagrant /home/vagrant/consumption
|
||||
|
||||
echo "
|
||||
|
||||
|
||||
Now follow the remaining steps in the Vagrant section of the setup
|
||||
documentation to complete the process:
|
||||
|
||||
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
|
||||
|
||||
|
||||
"
|
||||
146
src/documents/actions.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from django.contrib import messages
|
||||
from django.contrib.admin import helpers
|
||||
from django.contrib.admin.utils import model_ngettext
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.template.response import TemplateResponse
|
||||
|
||||
from documents.models import Correspondent, Tag
|
||||
|
||||
|
||||
def select_action(
|
||||
modeladmin, request, queryset, title, action, modelclass,
|
||||
success_message="", document_action=None, queryset_action=None):
|
||||
|
||||
opts = modeladmin.model._meta
|
||||
app_label = opts.app_label
|
||||
|
||||
if not modeladmin.has_change_permission(request):
|
||||
raise PermissionDenied
|
||||
|
||||
if request.POST.get('post'):
|
||||
n = queryset.count()
|
||||
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
|
||||
if n:
|
||||
for document in queryset:
|
||||
if document_action:
|
||||
document_action(document, selected_object)
|
||||
document_display = str(document)
|
||||
modeladmin.log_change(request, document, document_display)
|
||||
if queryset_action:
|
||||
queryset_action(queryset, selected_object)
|
||||
|
||||
modeladmin.message_user(request, success_message % {
|
||||
"selected_object": selected_object.name,
|
||||
"count": n,
|
||||
"items": model_ngettext(modeladmin.opts, n)
|
||||
}, messages.SUCCESS)
|
||||
|
||||
# Return None to display the change list page again.
|
||||
return None
|
||||
|
||||
context = dict(
|
||||
modeladmin.admin_site.each_context(request),
|
||||
title=title,
|
||||
queryset=queryset,
|
||||
opts=opts,
|
||||
action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
|
||||
media=modeladmin.media,
|
||||
action=action,
|
||||
objects=modelclass.objects.all(),
|
||||
itemname=model_ngettext(modelclass, 1)
|
||||
)
|
||||
|
||||
request.current_app = modeladmin.admin_site.name
|
||||
|
||||
return TemplateResponse(
|
||||
request,
|
||||
"admin/{}/{}/select_object.html".format(app_label, opts.model_name),
|
||||
context
|
||||
)
|
||||
|
||||
|
||||
def simple_action(
|
||||
modeladmin, request, queryset, success_message="",
|
||||
document_action=None, queryset_action=None):
|
||||
|
||||
if not modeladmin.has_change_permission(request):
|
||||
raise PermissionDenied
|
||||
|
||||
n = queryset.count()
|
||||
if n:
|
||||
for document in queryset:
|
||||
if document_action:
|
||||
document_action(document)
|
||||
document_display = str(document)
|
||||
modeladmin.log_change(request, document, document_display)
|
||||
if queryset_action:
|
||||
queryset_action(queryset)
|
||||
modeladmin.message_user(request, success_message % {
|
||||
"count": n, "items": model_ngettext(modeladmin.opts, n)
|
||||
}, messages.SUCCESS)
|
||||
|
||||
# Return None to display the change list page again.
|
||||
return None
|
||||
|
||||
|
||||
def add_tag_to_selected(modeladmin, request, queryset):
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Add tag to multiple documents",
|
||||
action="add_tag_to_selected",
|
||||
modelclass=Tag,
|
||||
success_message="Successfully added tag %(selected_object)s to "
|
||||
"%(count)d %(items)s.",
|
||||
document_action=lambda doc, tag: doc.tags.add(tag)
|
||||
)
|
||||
|
||||
|
||||
def remove_tag_from_selected(modeladmin, request, queryset):
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Remove tag from multiple documents",
|
||||
action="remove_tag_from_selected",
|
||||
modelclass=Tag,
|
||||
success_message="Successfully removed tag %(selected_object)s from "
|
||||
"%(count)d %(items)s.",
|
||||
document_action=lambda doc, tag: doc.tags.remove(tag)
|
||||
)
|
||||
|
||||
|
||||
def set_correspondent_on_selected(modeladmin, request, queryset):
|
||||
|
||||
return select_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
title="Set correspondent on multiple documents",
|
||||
action="set_correspondent_on_selected",
|
||||
modelclass=Correspondent,
|
||||
success_message="Successfully set correspondent %(selected_object)s "
|
||||
"on %(count)d %(items)s.",
|
||||
queryset_action=lambda qs, corr: qs.update(correspondent=corr)
|
||||
)
|
||||
|
||||
|
||||
def remove_correspondent_from_selected(modeladmin, request, queryset):
|
||||
return simple_action(
|
||||
modeladmin=modeladmin,
|
||||
request=request,
|
||||
queryset=queryset,
|
||||
success_message="Successfully removed correspondent from %(count)d "
|
||||
"%(items)s.",
|
||||
queryset_action=lambda qs: qs.update(correspondent=None)
|
||||
)
|
||||
|
||||
|
||||
add_tag_to_selected.short_description = "Add tag to selected documents"
|
||||
remove_tag_from_selected.short_description = \
|
||||
"Remove tag from selected documents"
|
||||
set_correspondent_on_selected.short_description = \
|
||||
"Set correspondent on selected documents"
|
||||
remove_correspondent_from_selected.short_description = \
|
||||
"Remove correspondent from selected documents"
|
||||
@@ -1,42 +1,26 @@
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.models import User, Group
|
||||
try:
|
||||
from django.core.urlresolvers import reverse
|
||||
except ImportError:
|
||||
from django.urls import reverse
|
||||
from django.contrib import admin, messages
|
||||
from django.contrib.admin.templatetags.admin_urls import add_preserved_filters
|
||||
from django.contrib.auth.models import Group, User
|
||||
from django.db import models
|
||||
from django.http import HttpResponseRedirect
|
||||
from django.templatetags.static import static
|
||||
from django.utils.safestring import mark_safe
|
||||
from django.urls import reverse
|
||||
from django.utils.html import format_html, format_html_join
|
||||
from django.utils.http import urlquote
|
||||
from django.utils.safestring import mark_safe
|
||||
from djangoql.admin import DjangoQLSearchMixin
|
||||
|
||||
from .models import Correspondent, Tag, Document, Log
|
||||
from documents.actions import (
|
||||
add_tag_to_selected,
|
||||
remove_correspondent_from_selected,
|
||||
remove_tag_from_selected,
|
||||
set_correspondent_on_selected
|
||||
)
|
||||
|
||||
|
||||
class MonthListFilter(admin.SimpleListFilter):
|
||||
|
||||
title = "Month"
|
||||
|
||||
# Parameter for the filter that will be used in the URL query.
|
||||
parameter_name = "month"
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
r = []
|
||||
for document in Document.objects.all():
|
||||
r.append((
|
||||
document.created.strftime("%Y-%m"),
|
||||
document.created.strftime("%B %Y")
|
||||
))
|
||||
return sorted(set(r), key=lambda x: x[0], reverse=True)
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
|
||||
if not self.value():
|
||||
return None
|
||||
|
||||
year, month = self.value().split("-")
|
||||
return queryset.filter(created__year=year, created__month=month)
|
||||
from .models import Correspondent, Document, Log, Tag
|
||||
|
||||
|
||||
class FinancialYearFilter(admin.SimpleListFilter):
|
||||
@@ -78,12 +62,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
|
||||
# To keep it simple we use the same string for both
|
||||
# query parameter and the display.
|
||||
return (query, query)
|
||||
return query, query
|
||||
|
||||
else:
|
||||
query = "{0}-{0}".format(date.year)
|
||||
display = "{}".format(date.year)
|
||||
return (query, display)
|
||||
return query, display
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
if not settings.FY_START or not settings.FY_END:
|
||||
@@ -104,32 +88,85 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
||||
created__lte=self._fy_end(end))
|
||||
|
||||
|
||||
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
||||
"""
|
||||
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
|
||||
correspondents to documents sent our way over the past ``n`` years.
|
||||
"""
|
||||
|
||||
def field_choices(self, field, request, model_admin):
|
||||
|
||||
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
||||
correspondents = Correspondent.objects.all()
|
||||
|
||||
if years and years > 0:
|
||||
self.title = "Correspondent (Recent)"
|
||||
days = 365 * years
|
||||
correspondents = correspondents.filter(
|
||||
documents__created__gte=datetime.now() - timedelta(days=days)
|
||||
).distinct()
|
||||
|
||||
return [(c.id, c.name) for c in correspondents]
|
||||
|
||||
|
||||
class CommonAdmin(admin.ModelAdmin):
|
||||
list_per_page = settings.PAPERLESS_LIST_PER_PAGE
|
||||
|
||||
|
||||
class CorrespondentAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "match", "matching_algorithm", "document_count")
|
||||
list_display = (
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"document_count",
|
||||
"last_correspondence"
|
||||
)
|
||||
list_filter = ("matching_algorithm",)
|
||||
list_editable = ("match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(CorrespondentAdmin, self).get_queryset(request)
|
||||
qs = qs.annotate(
|
||||
document_count=models.Count("documents"),
|
||||
last_correspondence=models.Max("documents__created")
|
||||
)
|
||||
return qs
|
||||
|
||||
def document_count(self, obj):
|
||||
return obj.documents.count()
|
||||
return obj.document_count
|
||||
document_count.admin_order_field = "document_count"
|
||||
|
||||
def last_correspondence(self, obj):
|
||||
return obj.last_correspondence
|
||||
last_correspondence.admin_order_field = "last_correspondence"
|
||||
|
||||
|
||||
class TagAdmin(CommonAdmin):
|
||||
|
||||
list_display = ("name", "colour", "match", "matching_algorithm",
|
||||
"document_count")
|
||||
list_display = (
|
||||
"name", "colour", "match", "matching_algorithm", "document_count")
|
||||
list_filter = ("colour", "matching_algorithm")
|
||||
list_editable = ("colour", "match", "matching_algorithm")
|
||||
|
||||
readonly_fields = ("slug",)
|
||||
|
||||
class Media:
|
||||
js = ("js/colours.js",)
|
||||
|
||||
def get_queryset(self, request):
|
||||
qs = super(TagAdmin, self).get_queryset(request)
|
||||
qs = qs.annotate(document_count=models.Count("documents"))
|
||||
return qs
|
||||
|
||||
def document_count(self, obj):
|
||||
return obj.documents.count()
|
||||
return obj.document_count
|
||||
document_count.admin_order_field = "document_count"
|
||||
|
||||
|
||||
class DocumentAdmin(CommonAdmin):
|
||||
class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
|
||||
|
||||
class Media:
|
||||
css = {
|
||||
@@ -137,15 +174,32 @@ class DocumentAdmin(CommonAdmin):
|
||||
}
|
||||
|
||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||
readonly_fields = ("added",)
|
||||
readonly_fields = ("added", "file_type", "storage_type",)
|
||||
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
||||
"tags_")
|
||||
list_filter = ("tags", "correspondent", FinancialYearFilter,
|
||||
MonthListFilter)
|
||||
list_filter = (
|
||||
"tags",
|
||||
("correspondent", RecentCorrespondentFilter),
|
||||
FinancialYearFilter
|
||||
)
|
||||
|
||||
filter_horizontal = ("tags",)
|
||||
|
||||
ordering = ["-created", "correspondent"]
|
||||
|
||||
actions = [
|
||||
add_tag_to_selected,
|
||||
remove_tag_from_selected,
|
||||
set_correspondent_on_selected,
|
||||
remove_correspondent_from_selected
|
||||
]
|
||||
|
||||
date_hierarchy = "created"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.document_queue = []
|
||||
|
||||
def has_add_permission(self, request):
|
||||
return False
|
||||
|
||||
@@ -153,6 +207,79 @@ class DocumentAdmin(CommonAdmin):
|
||||
return obj.created.date().strftime("%Y-%m-%d")
|
||||
created_.short_description = "Created"
|
||||
|
||||
def changelist_view(self, request, extra_context=None):
|
||||
|
||||
response = super().changelist_view(
|
||||
request,
|
||||
extra_context=extra_context
|
||||
)
|
||||
|
||||
if request.method == "GET":
|
||||
cl = self.get_changelist_instance(request)
|
||||
self.document_queue = [doc.id for doc in cl.queryset]
|
||||
|
||||
return response
|
||||
|
||||
def change_view(self, request, object_id=None, form_url='',
|
||||
extra_context=None):
|
||||
|
||||
extra_context = extra_context or {}
|
||||
|
||||
if self.document_queue and object_id:
|
||||
if int(object_id) in self.document_queue:
|
||||
# There is a queue of documents
|
||||
current_index = self.document_queue.index(int(object_id))
|
||||
if current_index < len(self.document_queue) - 1:
|
||||
# ... and there are still documents in the queue
|
||||
extra_context["next_object"] = self.document_queue[
|
||||
current_index + 1
|
||||
]
|
||||
|
||||
return super(DocumentAdmin, self).change_view(
|
||||
request,
|
||||
object_id,
|
||||
form_url,
|
||||
extra_context=extra_context,
|
||||
)
|
||||
|
||||
def response_change(self, request, obj):
|
||||
|
||||
# This is mostly copied from ModelAdmin.response_change()
|
||||
opts = self.model._meta
|
||||
preserved_filters = self.get_preserved_filters(request)
|
||||
|
||||
msg_dict = {
|
||||
"name": opts.verbose_name,
|
||||
"obj": format_html(
|
||||
'<a href="{}">{}</a>',
|
||||
urlquote(request.path),
|
||||
obj
|
||||
),
|
||||
}
|
||||
if "_saveandeditnext" in request.POST:
|
||||
msg = format_html(
|
||||
'The {name} "{obj}" was changed successfully. '
|
||||
'Editing next object.',
|
||||
**msg_dict
|
||||
)
|
||||
self.message_user(request, msg, messages.SUCCESS)
|
||||
redirect_url = reverse(
|
||||
"admin:{}_{}_change".format(opts.app_label, opts.model_name),
|
||||
args=(request.POST["_next_object"],),
|
||||
current_app=self.admin_site.name
|
||||
)
|
||||
redirect_url = add_preserved_filters(
|
||||
{
|
||||
"preserved_filters": preserved_filters,
|
||||
"opts": opts
|
||||
},
|
||||
redirect_url
|
||||
)
|
||||
return HttpResponseRedirect(redirect_url)
|
||||
|
||||
return super().response_change(request, obj)
|
||||
|
||||
@mark_safe
|
||||
def thumbnail(self, obj):
|
||||
return self._html_tag(
|
||||
"a",
|
||||
@@ -165,8 +292,8 @@ class DocumentAdmin(CommonAdmin):
|
||||
),
|
||||
href=obj.download_url
|
||||
)
|
||||
thumbnail.allow_tags = True
|
||||
|
||||
@mark_safe
|
||||
def tags_(self, obj):
|
||||
r = ""
|
||||
for tag in obj.tags.all():
|
||||
@@ -183,10 +310,11 @@ class DocumentAdmin(CommonAdmin):
|
||||
)
|
||||
}
|
||||
)
|
||||
return mark_safe(r)
|
||||
tags_.allow_tags = True
|
||||
return r
|
||||
|
||||
@mark_safe
|
||||
def document(self, obj):
|
||||
# TODO: is this method even used anymore?
|
||||
return self._html_tag(
|
||||
"a",
|
||||
self._html_tag(
|
||||
@@ -199,7 +327,6 @@ class DocumentAdmin(CommonAdmin):
|
||||
),
|
||||
href=obj.download_url
|
||||
)
|
||||
document.allow_tags = True
|
||||
|
||||
@staticmethod
|
||||
def _html_tag(kind, inside=None, **kwargs):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from django.db import transaction
|
||||
import datetime
|
||||
import hashlib
|
||||
import logging
|
||||
@@ -111,8 +112,11 @@ class Consumer:
|
||||
if not self.try_consume_file(file):
|
||||
self._ignore.append((file, mtime))
|
||||
|
||||
@transaction.atomic
|
||||
def try_consume_file(self, file):
|
||||
"Return True if file was consumed"
|
||||
"""
|
||||
Return True if file was consumed
|
||||
"""
|
||||
|
||||
if not re.match(FileInfo.REGEXES["title"], file):
|
||||
return False
|
||||
@@ -145,7 +149,7 @@ class Consumer:
|
||||
parsed_document = parser_class(doc)
|
||||
|
||||
try:
|
||||
thumbnail = parsed_document.get_thumbnail()
|
||||
thumbnail = parsed_document.get_optimised_thumbnail()
|
||||
date = parsed_document.get_date()
|
||||
document = self._store(
|
||||
parsed_document.get_text(),
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter
|
||||
from django_filters.rest_framework import BooleanFilter, FilterSet
|
||||
|
||||
from .models import Correspondent, Document, Tag
|
||||
|
||||
|
||||
CHAR_KWARGS = (
|
||||
"startswith", "endswith", "contains",
|
||||
"istartswith", "iendswith", "icontains"
|
||||
)
|
||||
|
||||
|
||||
class CorrespondentFilterSet(FilterSet):
|
||||
|
||||
class Meta:
|
||||
@@ -31,34 +37,24 @@ class TagFilterSet(FilterSet):
|
||||
|
||||
class DocumentFilterSet(FilterSet):
|
||||
|
||||
CHAR_KWARGS = {
|
||||
"lookup_expr": (
|
||||
"startswith",
|
||||
"endswith",
|
||||
"contains",
|
||||
"istartswith",
|
||||
"iendswith",
|
||||
"icontains"
|
||||
)
|
||||
}
|
||||
|
||||
correspondent__name = CharFilter(
|
||||
field_name="correspondent__name", **CHAR_KWARGS)
|
||||
correspondent__slug = CharFilter(
|
||||
field_name="correspondent__slug", **CHAR_KWARGS)
|
||||
tags__name = CharFilter(
|
||||
field_name="tags__name", **CHAR_KWARGS)
|
||||
tags__slug = CharFilter(
|
||||
field_name="tags__slug", **CHAR_KWARGS)
|
||||
tags__empty = BooleanFilter(
|
||||
field_name="tags", lookup_expr="isnull", distinct=True)
|
||||
tags_empty = BooleanFilter(
|
||||
label="Is tagged",
|
||||
field_name="tags",
|
||||
lookup_expr="isnull",
|
||||
exclude=True
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = Document
|
||||
fields = {
|
||||
"title": [
|
||||
"startswith", "endswith", "contains",
|
||||
"istartswith", "iendswith", "icontains"
|
||||
],
|
||||
"content": ["contains", "icontains"],
|
||||
|
||||
"title": CHAR_KWARGS,
|
||||
"content": ("contains", "icontains"),
|
||||
|
||||
"correspondent__name": CHAR_KWARGS,
|
||||
"correspondent__slug": CHAR_KWARGS,
|
||||
|
||||
"tags__name": CHAR_KWARGS,
|
||||
"tags__slug": CHAR_KWARGS,
|
||||
|
||||
}
|
||||
|
||||
@@ -216,7 +216,11 @@ class MailFetcher(Loggable):
|
||||
return r
|
||||
|
||||
def _connect(self):
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
try:
|
||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||
except OSError as e:
|
||||
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
|
||||
raise MailFetcherError(msg)
|
||||
|
||||
def _login(self):
|
||||
|
||||
|
||||
@@ -55,7 +55,12 @@ class Command(Renderable, BaseCommand):
|
||||
documents = Document.objects.all()
|
||||
document_map = {d.pk: d for d in documents}
|
||||
manifest = json.loads(serializers.serialize("json", documents))
|
||||
for document_dict in manifest:
|
||||
|
||||
for index, document_dict in enumerate(manifest):
|
||||
|
||||
# Force output to unencrypted as that will be the current state.
|
||||
# The importer will make the decision to encrypt or not.
|
||||
manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||
|
||||
document = document_map[document_dict["pk"]]
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ class Command(Renderable, BaseCommand):
|
||||
document_path = os.path.join(self.source, doc_file)
|
||||
thumbnail_path = os.path.join(self.source, thumb_file)
|
||||
|
||||
if document.storage_type == Document.STORAGE_TYPE_GPG:
|
||||
if settings.PASSPHRASE:
|
||||
|
||||
with open(document_path, "rb") as unencrypted:
|
||||
with open(document.source_path, "wb") as encrypted:
|
||||
@@ -112,3 +112,15 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
# Reset the storage type to whatever we've used while importing
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
Document.objects.filter(
|
||||
pk__in=[r["pk"] for r in self.manifest]
|
||||
).update(
|
||||
storage_type=storage_type
|
||||
)
|
||||
|
||||
@@ -158,9 +158,4 @@ class Migration(migrations.Migration):
|
||||
name='modified',
|
||||
field=models.DateTimeField(auto_now=True, db_index=True),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
||||
),
|
||||
]
|
||||
|
||||
@@ -12,6 +12,11 @@ class Migration(migrations.Migration):
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='checksum',
|
||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='correspondent',
|
||||
name='is_insensitive',
|
||||
|
||||
52
src/documents/migrations/0022_auto_20181007_1420.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||
|
||||
from django.db import migrations, models
|
||||
from django.utils.text import slugify
|
||||
|
||||
|
||||
def re_slug_all_the_things(apps, schema_editor):
|
||||
"""
|
||||
Rewrite all slug values to make sure they're actually slugs before we brand
|
||||
them as uneditable.
|
||||
"""
|
||||
|
||||
Tag = apps.get_model("documents", "Tag")
|
||||
Correspondent = apps.get_model("documents", "Correspondent")
|
||||
|
||||
for klass in (Tag, Correspondent):
|
||||
for instance in klass.objects.all():
|
||||
klass.objects.filter(
|
||||
pk=instance.pk
|
||||
).update(
|
||||
slug=slugify(instance.slug)
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '0021_document_storage_type'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterModelOptions(
|
||||
name='tag',
|
||||
options={'ordering': ('name',)},
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='correspondent',
|
||||
name='slug',
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='document',
|
||||
name='file_type',
|
||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
|
||||
),
|
||||
migrations.AlterField(
|
||||
model_name='tag',
|
||||
name='slug',
|
||||
field=models.SlugField(blank=True, editable=False),
|
||||
),
|
||||
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
||||
]
|
||||
@@ -11,6 +11,7 @@ from django.conf import settings
|
||||
from django.db import models
|
||||
from django.template.defaultfilters import slugify
|
||||
from django.utils import timezone
|
||||
from django.utils.text import slugify
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
from .managers import LogManager
|
||||
@@ -37,7 +38,7 @@ class MatchingModel(models.Model):
|
||||
)
|
||||
|
||||
name = models.CharField(max_length=128, unique=True)
|
||||
slug = models.SlugField(blank=True)
|
||||
slug = models.SlugField(blank=True, editable=False)
|
||||
|
||||
match = models.CharField(max_length=256, blank=True)
|
||||
matching_algorithm = models.PositiveIntegerField(
|
||||
@@ -147,9 +148,7 @@ class MatchingModel(models.Model):
|
||||
def save(self, *args, **kwargs):
|
||||
|
||||
self.match = self.match.lower()
|
||||
|
||||
if not self.slug:
|
||||
self.slug = slugify(self.name)
|
||||
self.slug = slugify(self.name)
|
||||
|
||||
models.Model.save(self, *args, **kwargs)
|
||||
|
||||
@@ -452,7 +451,7 @@ class FileInfo:
|
||||
r = []
|
||||
for t in tags.split(","):
|
||||
r.append(Tag.objects.get_or_create(
|
||||
slug=t.lower(),
|
||||
slug=slugify(t),
|
||||
defaults={"name": t}
|
||||
)[0])
|
||||
return tuple(r)
|
||||
|
||||
@@ -1,23 +1,31 @@
|
||||
import logging
|
||||
import shutil
|
||||
import tempfile
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import dateparser
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
# This regular expression will try to find dates in the document at
|
||||
# hand and will match the following formats:
|
||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||
DATE_REGEX = re.compile(
|
||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
||||
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
||||
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
||||
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||
)
|
||||
|
||||
|
||||
@@ -32,6 +40,9 @@ class DocumentParser:
|
||||
"""
|
||||
|
||||
SCRATCH = settings.SCRATCH_DIR
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
|
||||
OPTIPNG = settings.OPTIPNG_BINARY
|
||||
|
||||
def __init__(self, path):
|
||||
self.document_path = path
|
||||
@@ -45,6 +56,19 @@ class DocumentParser:
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def optimise_thumbnail(self, in_path):
|
||||
|
||||
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||
|
||||
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
|
||||
if not subprocess.Popen(args).wait() == 0:
|
||||
raise ParseError("Optipng failed at {}".format(args))
|
||||
|
||||
return out_path
|
||||
|
||||
def get_optimised_thumbnail(self):
|
||||
return self.optimise_thumbnail(self.get_thumbnail())
|
||||
|
||||
def get_text(self):
|
||||
"""
|
||||
Returns the text from the document and only the text.
|
||||
@@ -55,7 +79,82 @@ class DocumentParser:
|
||||
"""
|
||||
Returns the date of the document.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def __parser(ds, date_order):
|
||||
"""
|
||||
Call dateparser.parse with a particular date ordering
|
||||
"""
|
||||
return dateparser.parse(
|
||||
ds,
|
||||
settings={
|
||||
"DATE_ORDER": date_order,
|
||||
"PREFER_DAY_OF_MONTH": "first",
|
||||
"RETURN_AS_TIMEZONE_AWARE":
|
||||
True
|
||||
}
|
||||
)
|
||||
|
||||
date = None
|
||||
date_string = None
|
||||
|
||||
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||
title = os.path.basename(self.document_path)
|
||||
|
||||
# if filename date parsing is enabled, search there first:
|
||||
if self.FILENAME_DATE_ORDER:
|
||||
self.log("info", "Checking document title for date")
|
||||
for m in re.finditer(DATE_REGEX, title):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, self.FILENAME_DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None and next_year > date.year > 1900:
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {} "
|
||||
"from document title"
|
||||
"".format(date.isoformat(), date_string)
|
||||
)
|
||||
return date
|
||||
|
||||
try:
|
||||
# getting text after checking filename will save time if only
|
||||
# looking at the filename instead of the whole text
|
||||
text = self.get_text()
|
||||
except ParseError:
|
||||
return None
|
||||
|
||||
# Iterate through all regex matches in text and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
date_string = m.group(0)
|
||||
|
||||
try:
|
||||
date = __parser(date_string, self.DATE_ORDER)
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None and next_year > date.year > 1900:
|
||||
break
|
||||
else:
|
||||
date = None
|
||||
|
||||
if date is not None:
|
||||
self.log(
|
||||
"info",
|
||||
"Detected document date {} based on string {}".format(
|
||||
date.isoformat(),
|
||||
date_string
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
return date
|
||||
|
||||
def log(self, level, message):
|
||||
getattr(self.logger, level)(message, extra={
|
||||
|
||||
@@ -7,7 +7,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
||||
|
||||
class Meta:
|
||||
model = Correspondent
|
||||
fields = ("id", "slug", "name")
|
||||
fields = (
|
||||
"id",
|
||||
"slug",
|
||||
"name",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"is_insensitive"
|
||||
)
|
||||
|
||||
|
||||
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||
@@ -15,7 +22,14 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||
class Meta:
|
||||
model = Tag
|
||||
fields = (
|
||||
"id", "slug", "name", "colour", "match", "matching_algorithm")
|
||||
"id",
|
||||
"slug",
|
||||
"name",
|
||||
"colour",
|
||||
"match",
|
||||
"matching_algorithm",
|
||||
"is_insensitive"
|
||||
)
|
||||
|
||||
|
||||
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
||||
@@ -46,6 +60,7 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||
"checksum",
|
||||
"created",
|
||||
"modified",
|
||||
"added",
|
||||
"file_name",
|
||||
"download_url",
|
||||
"thumbnail_url",
|
||||
|
||||
66
src/documents/static/js/colours.js
Normal file
@@ -0,0 +1,66 @@
|
||||
// The following jQuery snippet will add a small square next to the selection
|
||||
// drop-down on the `Add tag` page that will update to show the selected tag
|
||||
// color as the drop-down value is changed.
|
||||
|
||||
django.jQuery(document).ready(function(){
|
||||
|
||||
if (django.jQuery("#id_colour").length) {
|
||||
|
||||
let colour;
|
||||
let colour_num;
|
||||
|
||||
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
|
||||
|
||||
django.jQuery('.colour_square').css({
|
||||
'float': 'left',
|
||||
'width': '20px',
|
||||
'height': '20px',
|
||||
'margin': '5px',
|
||||
'border': '1px solid rgba(0, 0, 0, .2)',
|
||||
'background': colour
|
||||
});
|
||||
|
||||
django.jQuery('#id_colour').change(function () {
|
||||
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||
django.jQuery('.colour_square').css({'background': colour});
|
||||
});
|
||||
|
||||
} else if (django.jQuery("select[id*='colour']").length) {
|
||||
|
||||
django.jQuery('select[id*="-colour"]').each(function (index, element) {
|
||||
let id;
|
||||
let loop_colour_num;
|
||||
let loop_colour;
|
||||
|
||||
id = "colour_square_" + index;
|
||||
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
|
||||
|
||||
loop_colour_num = django.jQuery(element).val() - 1;
|
||||
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||
|
||||
django.jQuery("<style type='text/css'>\
|
||||
.colour_square{ \
|
||||
float: left; \
|
||||
width: 20px; \
|
||||
height: 20px; \
|
||||
margin: 5px; \
|
||||
border: 1px solid rgba(0,0,0,.2); \
|
||||
} </style>").appendTo("head");
|
||||
django.jQuery('#' + id).css({'background': loop_colour});
|
||||
|
||||
console.log(id, loop_colour_num, loop_colour);
|
||||
|
||||
django.jQuery(element).change(function () {
|
||||
loop_colour_num = django.jQuery(element).val() - 1;
|
||||
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||
django.jQuery('#' + id).css({'background': loop_colour});
|
||||
console.log('#' + id, loop_colour)
|
||||
});
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
@@ -3,10 +3,63 @@
|
||||
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
|
||||
|
||||
|
||||
{% load static %}
|
||||
{% load custom_css from customisation %}
|
||||
{% load custom_js from customisation %}
|
||||
|
||||
|
||||
{% block extrahead %}
|
||||
<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
|
||||
<style>
|
||||
#header {
|
||||
background-color: #90a9b7;
|
||||
line-height: inherit;
|
||||
height: auto;
|
||||
}
|
||||
#branding h1 {
|
||||
font-weight: inherit;
|
||||
font-size: inherit;
|
||||
}
|
||||
.button,
|
||||
.button:active,
|
||||
.button:focus,
|
||||
.button:hover,
|
||||
a.button,
|
||||
.submit-row input,
|
||||
input[type="submit"],
|
||||
input[type="submit"]:active,
|
||||
input[type="submit"]:focus,
|
||||
input[type="submit"]:hover,
|
||||
input[type="button"],
|
||||
input[type="button"]:active,
|
||||
input[type="button"]:focus,
|
||||
input[type="button"]:hover {
|
||||
background-color: #074f57;
|
||||
}
|
||||
.module h2,
|
||||
.module caption,
|
||||
.inline-group h2 {
|
||||
background-color: #90a9b7;
|
||||
}
|
||||
div.breadcrumbs {
|
||||
background-color: #077187;
|
||||
}
|
||||
.module h2,
|
||||
.module caption,
|
||||
.inline-group h2 {
|
||||
background-color: #077187;
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block branding %}
|
||||
<h1 id="site-name">
|
||||
<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
|
||||
</h1>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block blockbots %}
|
||||
|
||||
{% comment %}
|
||||
|
||||
@@ -1,5 +1,21 @@
|
||||
{% extends 'admin/change_form.html' %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
{{ block.super }}
|
||||
|
||||
{% if next_object %}
|
||||
<script type="text/javascript">//<![CDATA[
|
||||
(function($){
|
||||
$('<input type="submit" value="Save and edit next" name="_saveandeditnext" />')
|
||||
.prependTo('div.submit-row');
|
||||
$('<input type="hidden" value="{{next_object}}" name="_next_object" />')
|
||||
.prependTo('div.submit-row');
|
||||
})(django.jQuery);
|
||||
//]]></script>
|
||||
{% endif %}
|
||||
|
||||
{% endblock content %}
|
||||
|
||||
{% block footer %}
|
||||
|
||||
@@ -10,4 +26,4 @@
|
||||
django.jQuery(".field-created input").first().attr("type", "date")
|
||||
</script>
|
||||
|
||||
{% endblock footer %}
|
||||
{% endblock footer %}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
}
|
||||
.result .header {
|
||||
padding: 5px;
|
||||
background-color: #79AEC8;
|
||||
background-color: #90a9b7;
|
||||
position: relative;
|
||||
}
|
||||
.result .header .checkbox {
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
{% extends "admin/base_site.html" %}
|
||||
|
||||
|
||||
{% load i18n l10n admin_urls static %}
|
||||
{% load staticfiles %}
|
||||
|
||||
|
||||
{% block extrahead %}
|
||||
{{ block.super }}
|
||||
{{ media }}
|
||||
<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
|
||||
|
||||
|
||||
{% block breadcrumbs %}
|
||||
<div class="breadcrumbs">
|
||||
<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
|
||||
› <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
|
||||
› <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
|
||||
› {{ title }}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<p>Please select the {{itemname}}.</p>
|
||||
<form method="post">{% csrf_token %}
|
||||
<div>
|
||||
{% for obj in queryset %}
|
||||
<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
|
||||
{% endfor %}
|
||||
<p>
|
||||
<select name="obj_id">
|
||||
{% for obj in objects %}
|
||||
<option value="{{ obj.id }}">{{ obj.name }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</p>
|
||||
|
||||
<input type="hidden" name="action" value="{{ action }}"/>
|
||||
<input type="hidden" name="post" value="yes" />
|
||||
<p>
|
||||
<input type="submit" value="{% trans 'Confirm' %}" />
|
||||
<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
|
||||
</p>
|
||||
</div>
|
||||
</form>
|
||||
{% endblock %}
|
||||
@@ -2,6 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest
|
||||
from django.views.generic import DetailView, FormView, TemplateView
|
||||
from django_filters.rest_framework import DjangoFilterBackend
|
||||
from django.conf import settings
|
||||
from django.utils import cache
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from paperless.mixins import SessionOrBasicAuthMixin
|
||||
@@ -56,10 +57,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
||||
}
|
||||
|
||||
if self.kwargs["kind"] == "thumb":
|
||||
return HttpResponse(
|
||||
response = HttpResponse(
|
||||
self._get_raw_data(self.object.thumbnail_file),
|
||||
content_type=content_types[Document.TYPE_PNG]
|
||||
)
|
||||
cache.patch_cache_control(response, max_age=31536000, private=True)
|
||||
return response
|
||||
|
||||
response = HttpResponse(
|
||||
self._get_raw_data(self.object.source_file),
|
||||
@@ -130,7 +133,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
||||
filter_class = DocumentFilterSet
|
||||
search_fields = ("title", "correspondent__name", "content")
|
||||
ordering_fields = (
|
||||
"id", "title", "correspondent__name", "created", "modified")
|
||||
"id", "title", "correspondent__name", "created", "modified", "added")
|
||||
|
||||
|
||||
class LogViewSet(ReadOnlyModelViewSet):
|
||||
|
||||
@@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
|
||||
error = "Paperless can't find {}. Without it, consumption is impossible."
|
||||
hint = "Either it's not in your ${PATH} or it's not installed."
|
||||
|
||||
binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
|
||||
binaries = (
|
||||
settings.CONVERT_BINARY,
|
||||
settings.OPTIPNG_BINARY,
|
||||
settings.UNPAPER_BINARY,
|
||||
"tesseract"
|
||||
)
|
||||
|
||||
check_messages = []
|
||||
for binary in binaries:
|
||||
|
||||
@@ -1,15 +1,20 @@
|
||||
from django.contrib.auth.models import User as DjangoUser
|
||||
|
||||
|
||||
class User:
|
||||
"""
|
||||
This is a dummy django User used with our middleware to disable
|
||||
login authentication if that is configured in paperless.conf
|
||||
This is a dummy django User used with our middleware to disable
|
||||
login authentication if that is configured in paperless.conf
|
||||
"""
|
||||
|
||||
is_superuser = True
|
||||
is_active = True
|
||||
is_staff = True
|
||||
is_authenticated = True
|
||||
|
||||
# Must be -1 to avoid colliding with real user ID's (which start at 1)
|
||||
id = -1
|
||||
@property
|
||||
def id(self):
|
||||
return DjangoUser.objects.order_by("pk").first().pk
|
||||
|
||||
@property
|
||||
def pk(self):
|
||||
@@ -17,9 +22,9 @@ class User:
|
||||
|
||||
|
||||
"""
|
||||
NOTE: These are here as a hack instead of being in the User definition
|
||||
above due to the way pycodestyle handles lamdbdas.
|
||||
See https://github.com/PyCQA/pycodestyle/issues/379 for more.
|
||||
NOTE: These are here as a hack instead of being in the User definition
|
||||
NOTE: above due to the way pycodestyle handles lamdbdas.
|
||||
NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
|
||||
"""
|
||||
|
||||
User.has_module_perms = lambda *_: True
|
||||
|
||||
@@ -22,12 +22,12 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
|
||||
load_dotenv("/usr/local/etc/paperless.conf")
|
||||
|
||||
|
||||
def __get_boolean(key):
|
||||
def __get_boolean(key, default="NO"):
|
||||
"""
|
||||
Return a boolean value based on whatever the user has supplied in the
|
||||
environment based on whether the value "looks like" it's True or not.
|
||||
"""
|
||||
return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true"))
|
||||
return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
|
||||
|
||||
|
||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
|
||||
@@ -47,7 +47,7 @@ SECRET_KEY = os.getenv(
|
||||
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "YES")
|
||||
|
||||
LOGIN_URL = "admin:login"
|
||||
|
||||
@@ -72,6 +72,7 @@ INSTALLED_APPS = [
|
||||
"corsheaders",
|
||||
"django_extensions",
|
||||
|
||||
"paperless",
|
||||
"documents.apps.DocumentsConfig",
|
||||
"reminders.apps.RemindersConfig",
|
||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||
@@ -82,6 +83,7 @@ INSTALLED_APPS = [
|
||||
"rest_framework",
|
||||
"crispy_forms",
|
||||
"django_filters",
|
||||
"djangoql",
|
||||
|
||||
]
|
||||
|
||||
@@ -144,13 +146,18 @@ DATABASES = {
|
||||
}
|
||||
}
|
||||
|
||||
if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"):
|
||||
if os.getenv("PAPERLESS_DBUSER"):
|
||||
DATABASES["default"] = {
|
||||
"ENGINE": "django.db.backends.postgresql_psycopg2",
|
||||
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
||||
"USER": os.getenv("PAPERLESS_DBUSER"),
|
||||
"PASSWORD": os.getenv("PAPERLESS_DBPASS")
|
||||
}
|
||||
if os.getenv("PAPERLESS_DBPASS"):
|
||||
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
||||
if os.getenv("PAPERLESS_DBHOST"):
|
||||
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
|
||||
if os.getenv("PAPERLESS_DBPORT"):
|
||||
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
||||
|
||||
|
||||
# Password validation
|
||||
@@ -198,6 +205,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
|
||||
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
||||
|
||||
|
||||
# Other
|
||||
|
||||
# Disable Django's artificial limit on the number of form fields to submit at
|
||||
# once. This is a protection against overloading the server, but since this is
|
||||
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
|
||||
# of log entries outweight the benefits of such a safeguard.
|
||||
|
||||
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
||||
|
||||
|
||||
# Paperless-specific stuff
|
||||
# You shouldn't have to edit any of these values. Rather, you can set these
|
||||
# values in /etc/paperless.conf instead.
|
||||
@@ -246,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||
|
||||
# OptiPNG
|
||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||
|
||||
# Unpaper
|
||||
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
||||
|
||||
@@ -292,3 +312,10 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
||||
|
||||
# Specify the default date order (for autodetected dates)
|
||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||
|
||||
# Specify for how many years a correspondent is considered recent. Recent
|
||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||
# well. Set to 0 to disable this filter.
|
||||
PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv(
|
||||
"PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0))
|
||||
|
||||
BIN
src/paperless/static/paperless/img/favicon.ico
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
src/paperless/static/paperless/img/logo-dark.png
Normal file
|
After Width: | Height: | Size: 6.2 KiB |
BIN
src/paperless/static/paperless/img/logo-light.png
Normal file
|
After Width: | Height: | Size: 8.6 KiB |
@@ -6,6 +6,7 @@ from django.views.decorators.csrf import csrf_exempt
|
||||
from django.views.generic import RedirectView
|
||||
from rest_framework.routers import DefaultRouter
|
||||
|
||||
from paperless.views import FaviconView
|
||||
from documents.views import (
|
||||
CorrespondentViewSet,
|
||||
DocumentViewSet,
|
||||
@@ -44,6 +45,9 @@ urlpatterns = [
|
||||
# File uploads
|
||||
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
|
||||
|
||||
# Favicon
|
||||
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||
|
||||
# The Django admin
|
||||
url(r"admin/", admin.site.urls),
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = (2, 3, 0)
|
||||
__version__ = (2, 6, 1)
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
import os
|
||||
|
||||
from django.http import HttpResponse
|
||||
from django.views.generic import View
|
||||
from rest_framework.pagination import PageNumberPagination
|
||||
|
||||
|
||||
@@ -5,3 +9,17 @@ class StandardPagination(PageNumberPagination):
|
||||
page_size = 25
|
||||
page_size_query_param = "page-size"
|
||||
max_page_size = 100000
|
||||
|
||||
|
||||
class FaviconView(View):
|
||||
|
||||
def get(self, request, *args, **kwargs):
|
||||
favicon = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"static",
|
||||
"paperless",
|
||||
"img",
|
||||
"favicon.ico"
|
||||
)
|
||||
with open(favicon, "rb") as f:
|
||||
return HttpResponse(f, content_type="image/x-icon")
|
||||
|
||||
@@ -4,7 +4,6 @@ import re
|
||||
import subprocess
|
||||
from multiprocessing.pool import Pool
|
||||
|
||||
import dateparser
|
||||
import langdetect
|
||||
import pyocr
|
||||
from django.conf import settings
|
||||
@@ -14,7 +13,7 @@ from pyocr.libtesseract.tesseract_raw import \
|
||||
from pyocr.tesseract import TesseractError
|
||||
|
||||
import pdftotext
|
||||
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
||||
from documents.parsers import DocumentParser, ParseError
|
||||
|
||||
from .languages import ISO639
|
||||
|
||||
@@ -33,7 +32,6 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||
UNPAPER = settings.UNPAPER_BINARY
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||
|
||||
@@ -46,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||
"""
|
||||
|
||||
out_path = os.path.join(self.tempdir, "convert.png")
|
||||
|
||||
# Run convert to get a decent thumbnail
|
||||
run_convert(
|
||||
self.CONVERT,
|
||||
"-scale", "500x5000",
|
||||
"-alpha", "remove",
|
||||
"{}[0]".format(self.document_path),
|
||||
os.path.join(self.tempdir, "convert.png")
|
||||
out_path
|
||||
)
|
||||
|
||||
return os.path.join(self.tempdir, "convert.png")
|
||||
return out_path
|
||||
|
||||
def _is_ocred(self):
|
||||
|
||||
@@ -152,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
)
|
||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||
return raw_text
|
||||
raise OCRError("Language detection failed")
|
||||
error_msg = ("Language detection failed. Set "
|
||||
"PAPERLESS_FORGIVING_OCR in config file to continue "
|
||||
"anyway.")
|
||||
raise OCRError(error_msg)
|
||||
|
||||
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||
@@ -172,8 +176,8 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||
return raw_text
|
||||
raise OCRError(
|
||||
"The guessed language is not available in this instance of "
|
||||
"Tesseract."
|
||||
"The guessed language ({}) is not available in this instance "
|
||||
"of Tesseract.".format(guessed_language)
|
||||
)
|
||||
|
||||
def _ocr(self, imgs, lang):
|
||||
@@ -202,40 +206,6 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
|
||||
return text
|
||||
|
||||
def get_date(self):
|
||||
date = None
|
||||
datestring = None
|
||||
|
||||
try:
|
||||
text = self.get_text()
|
||||
except ParseError as e:
|
||||
return None
|
||||
|
||||
# Iterate through all regex matches and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
datestring = m.group(0)
|
||||
|
||||
try:
|
||||
date = dateparser.parse(
|
||||
datestring,
|
||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||
'PREFER_DAY_OF_MONTH': 'first',
|
||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None:
|
||||
break
|
||||
|
||||
if date is not None:
|
||||
self.log("info", "Detected document date " + date.isoformat() +
|
||||
" based on string " + datestring)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
return date
|
||||
|
||||
|
||||
def run_convert(*args):
|
||||
|
||||
@@ -251,7 +221,8 @@ def run_convert(*args):
|
||||
|
||||
def run_unpaper(args):
|
||||
unpaper, pnm = args
|
||||
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
|
||||
command_args = (unpaper, "--overwrite", pnm,
|
||||
pnm.replace(".pnm", ".unpaper.pnm"))
|
||||
if not subprocess.Popen(command_args).wait() == 0:
|
||||
raise ParseError("Unpaper failed at {}".format(command_args))
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 135 KiB |
|
Before Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 136 KiB |
@@ -8,6 +8,7 @@ from dateutil import tz
|
||||
from django.test import TestCase
|
||||
|
||||
from ..parsers import RasterisedDocumentParser
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class TestDate(TestCase):
|
||||
@@ -15,73 +16,67 @@ class TestDate(TestCase):
|
||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||
|
||||
MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501
|
||||
|
||||
def setUp(self):
|
||||
os.makedirs(self.SCRATCH, exist_ok=True)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.SCRATCH)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_1(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 130218 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_2(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 2018 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_3(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 20180213 lorem ipsum"
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_4(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_5(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = (
|
||||
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
|
||||
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
||||
"ipsum"
|
||||
)
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2018, 2, 13, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_6(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@@ -98,10 +93,7 @@ class TestDate(TestCase):
|
||||
)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_7(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@@ -110,277 +102,83 @@ class TestDate(TestCase):
|
||||
"März 2019\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
date = document.get_date()
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
date,
|
||||
datetime.datetime(
|
||||
2019, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_8(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = ("lorem ipsum\n"
|
||||
"Wohnort\n"
|
||||
"3100\n"
|
||||
"IBAN\n"
|
||||
"AT87 4534\n"
|
||||
"1234\n"
|
||||
"1234 5678\n"
|
||||
"BIC\n"
|
||||
"lorem ipsum\n"
|
||||
"März 2020")
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
document._text = (
|
||||
"lorem ipsum\n"
|
||||
"Wohnort\n"
|
||||
"3100\n"
|
||||
"IBAN\n"
|
||||
"AT87 4534\n"
|
||||
"1234\n"
|
||||
"1234 5678\n"
|
||||
"BIC\n"
|
||||
"lorem ipsum\n"
|
||||
"März 2020"
|
||||
)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_date_format_9(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document._text = ("lorem ipsum\n"
|
||||
"27. Nullmonth 2020\n"
|
||||
"März 2020\n"
|
||||
"lorem ipsum")
|
||||
self.assertEqual(document.get_date(),
|
||||
datetime.datetime(2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.tzutc()))
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_1_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
document._text = (
|
||||
"lorem ipsum\n"
|
||||
"27. Nullmonth 2020\n"
|
||||
"März 2020\n"
|
||||
"lorem ipsum"
|
||||
)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
datetime.datetime(
|
||||
2020, 3, 1, 0, 0,
|
||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||
)
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-0590 00:00:00"
|
||||
)
|
||||
def test_get_text_1_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_past(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
self.assertIsNone(document.get_date())
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-2350 00:00:00"
|
||||
)
|
||||
def test_get_text_2_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_future(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
self.assertIsNone(document.get_date())
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||
return_value="01-07-0590 00:00:00"
|
||||
)
|
||||
def test_get_text_2_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||
def test_crazy_date_past(self, *args):
|
||||
document = RasterisedDocumentParser("/dev/null")
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_3_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_4_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_5_png(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_us(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
document.DATE_ORDER = "MDY"
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_pdf_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_6_png_eu(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), False)
|
||||
self.assertEqual(document.get_date(), None)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_7_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_8_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
|
||||
@mock.patch(
|
||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||
SCRATCH
|
||||
)
|
||||
def test_get_text_9_pdf(self):
|
||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
|
||||
document = RasterisedDocumentParser(input_file)
|
||||
document.get_text()
|
||||
self.assertEqual(document._is_ocred(), True)
|
||||
self.assertEqual(
|
||||
document.get_date(),
|
||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
||||
)
|
||||
self.assertIsNone(document.get_date())
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import dateparser
|
||||
from django.conf import settings
|
||||
|
||||
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
||||
from documents.parsers import DocumentParser, ParseError
|
||||
|
||||
|
||||
class TextDocumentParser(DocumentParser):
|
||||
@@ -16,7 +14,6 @@ class TextDocumentParser(DocumentParser):
|
||||
CONVERT = settings.CONVERT_BINARY
|
||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||
UNPAPER = settings.UNPAPER_BINARY
|
||||
DATE_ORDER = settings.DATE_ORDER
|
||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||
|
||||
@@ -26,7 +23,7 @@ class TextDocumentParser(DocumentParser):
|
||||
|
||||
def get_thumbnail(self):
|
||||
"""
|
||||
The thumbnail of a txt is just a 500px wide image of the text
|
||||
The thumbnail of a text file is just a 500px wide image of the text
|
||||
rendered onto a letter-sized page.
|
||||
"""
|
||||
# The below is heavily cribbed from https://askubuntu.com/a/590951
|
||||
@@ -35,7 +32,7 @@ class TextDocumentParser(DocumentParser):
|
||||
text_color = "black" # text color
|
||||
psize = [500, 647] # icon size
|
||||
n_lines = 50 # number of lines to show
|
||||
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
||||
out_path = os.path.join(self.tempdir, "convert.png")
|
||||
|
||||
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||
@@ -46,9 +43,13 @@ class TextDocumentParser(DocumentParser):
|
||||
work_size = ",".join([str(n - 1) for n in psize])
|
||||
r = str(round(psize[0] / 10))
|
||||
rounded = ",".join([r, r])
|
||||
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
||||
'"fill ', bg_color, ' roundrectangle 0,0,',
|
||||
work_size, ",", rounded, '" ', temp_bg)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
"-size ", picsize,
|
||||
' xc:none -draw ',
|
||||
'"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501
|
||||
temp_bg
|
||||
)
|
||||
|
||||
def read_text():
|
||||
with open(self.document_path, 'r') as src:
|
||||
@@ -57,22 +58,29 @@ class TextDocumentParser(DocumentParser):
|
||||
return text.replace('"', "'")
|
||||
|
||||
def create_txlayer():
|
||||
run_command(self.CONVERT,
|
||||
"-background none",
|
||||
"-fill",
|
||||
text_color,
|
||||
"-pointsize", "12",
|
||||
"-border 4 -bordercolor none",
|
||||
"-size ", txsize,
|
||||
' caption:"', read_text(), '" ',
|
||||
temp_txlayer)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
"-background none",
|
||||
"-fill",
|
||||
text_color,
|
||||
"-pointsize", "12",
|
||||
"-border 4 -bordercolor none",
|
||||
"-size ", txsize,
|
||||
' caption:"', read_text(), '" ',
|
||||
temp_txlayer
|
||||
)
|
||||
|
||||
create_txlayer()
|
||||
create_bg()
|
||||
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
||||
"-background None -layers merge ", output_file)
|
||||
run_command(
|
||||
self.CONVERT,
|
||||
temp_bg,
|
||||
temp_txlayer,
|
||||
"-background None -layers merge ",
|
||||
out_path
|
||||
)
|
||||
|
||||
return output_file
|
||||
return out_path
|
||||
|
||||
def get_text(self):
|
||||
|
||||
@@ -84,40 +92,6 @@ class TextDocumentParser(DocumentParser):
|
||||
|
||||
return self._text
|
||||
|
||||
def get_date(self):
|
||||
date = None
|
||||
datestring = None
|
||||
|
||||
try:
|
||||
text = self.get_text()
|
||||
except ParseError as e:
|
||||
return None
|
||||
|
||||
# Iterate through all regex matches and try to parse the date
|
||||
for m in re.finditer(DATE_REGEX, text):
|
||||
datestring = m.group(0)
|
||||
|
||||
try:
|
||||
date = dateparser.parse(
|
||||
datestring,
|
||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
||||
'PREFER_DAY_OF_MONTH': 'first',
|
||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
||||
except TypeError:
|
||||
# Skip all matches that do not parse to a proper date
|
||||
continue
|
||||
|
||||
if date is not None:
|
||||
break
|
||||
|
||||
if date is not None:
|
||||
self.log("info", "Detected document date " + date.isoformat() +
|
||||
" based on string " + datestring)
|
||||
else:
|
||||
self.log("info", "Unable to detect date for document")
|
||||
|
||||
return date
|
||||
|
||||
|
||||
def run_command(*args):
|
||||
environment = os.environ.copy()
|
||||
|
||||
19
src/reminders/migrations/0002_auto_20181007_1420.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('reminders', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='reminder',
|
||||
name='document',
|
||||
field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='documents.Document'),
|
||||
),
|
||||
]
|
||||
@@ -4,7 +4,6 @@ from django.db import models
|
||||
class Reminder(models.Model):
|
||||
|
||||
document = models.ForeignKey(
|
||||
"documents.Document", on_delete=models.PROTECT
|
||||
)
|
||||
"documents.Document", on_delete=models.PROTECT)
|
||||
date = models.DateTimeField()
|
||||
note = models.TextField(blank=True)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
[tox]
|
||||
skipsdist = True
|
||||
envlist = py34, py35, py36, pycodestyle, doc
|
||||
envlist = py34, py35, py36, py37, pycodestyle, doc
|
||||
|
||||
[testenv]
|
||||
commands = pytest
|
||||
@@ -17,6 +17,5 @@ deps=pycodestyle
|
||||
|
||||
[testenv:doc]
|
||||
deps =
|
||||
-r{toxinidir}/../requirements.txt
|
||||
sphinx
|
||||
-r {toxinidir}/../requirements.txt
|
||||
commands=sphinx-build -b html ../docs ../docs/_build -W
|
||||
|
||||