Compare commits
116 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94c2950afe | ||
|
|
9f56bf9992 | ||
|
|
6df35e4cb7 | ||
|
|
b4b7d167d1 | ||
|
|
4936fad542 | ||
|
|
3c78105fd7 | ||
|
|
a58a7ce0f7 | ||
|
|
792aeee11e | ||
|
|
5588e86855 | ||
|
|
97f1e4ab16 | ||
|
|
e4dece8e53 | ||
|
|
c5c204f605 | ||
|
|
611ec6840b | ||
|
|
2cd077d12d | ||
|
|
4efb153e86 | ||
|
|
25e953bbf0 | ||
|
|
0509d5a3d2 | ||
|
|
5e674f17af | ||
|
|
7c7a814096 | ||
|
|
43e71cfcaa | ||
|
|
79868930f1 | ||
|
|
0256dcbe32 | ||
|
|
29db177ce2 | ||
|
|
5c1edf78ce | ||
|
|
60e8990a7b | ||
|
|
75a79ac204 | ||
|
|
0c47907dda | ||
|
|
cea8332038 | ||
|
|
5982cb693a | ||
|
|
73a02d40c4 | ||
|
|
b541765817 | ||
|
|
28ffd1ec6b | ||
|
|
5760aa0894 | ||
|
|
562e5f644d | ||
|
|
5ab2009ebf | ||
|
|
637b0d4cc2 | ||
|
|
4a71c33537 | ||
|
|
cf36c8467e | ||
|
|
dafa6a4c71 | ||
|
|
a3c5ec834d | ||
|
|
be57dbe4c8 | ||
|
|
4d50c7e105 | ||
|
|
27af2603f5 | ||
|
|
ff5b34179a | ||
|
|
0334617287 | ||
|
|
f8b43fa74b | ||
|
|
1ff06d0dd9 | ||
|
|
4ad6813d11 | ||
|
|
cbc5f0603f | ||
|
|
0d21bdeffa | ||
|
|
b1f9b18b8c | ||
|
|
4d13521f36 | ||
|
|
7b4785bdb9 | ||
|
|
baf89cad8e | ||
|
|
3c2a1a8c13 | ||
|
|
1c7047bbb8 | ||
|
|
96dafe8c43 | ||
|
|
d6896daece | ||
|
|
d12f0642f2 | ||
|
|
a19f0ef97e | ||
|
|
ec7125b6bb | ||
|
|
e3a616ebc3 | ||
|
|
f898ec792f | ||
|
|
f45b6762f2 | ||
|
|
d544f269e0 | ||
|
|
650db75c2b | ||
|
|
7dbb77e57b | ||
|
|
f1b3312bcb | ||
|
|
ea05ab2b06 | ||
|
|
4f4c515629 | ||
|
|
c1f926a40c | ||
|
|
c1d18c1e83 | ||
|
|
ba452e0524 | ||
|
|
c5488dcb98 | ||
|
|
d6eefbccee | ||
|
|
a813288aaf | ||
|
|
63e2fbe0c9 | ||
|
|
597a7bb391 | ||
|
|
730daa3d6d | ||
|
|
c225281f95 | ||
|
|
e1d8744c66 | ||
|
|
4409f65840 | ||
|
|
c83dc666a4 | ||
|
|
9ab50ed09d | ||
|
|
e0acb4a40b | ||
|
|
eca6250c1b | ||
|
|
33abec0663 | ||
|
|
d825667c9b | ||
|
|
84511f8418 | ||
|
|
81e488b90d | ||
|
|
bff28113df | ||
|
|
0b377a76d0 | ||
|
|
ec1d5c80ff | ||
|
|
bd95804fbf | ||
|
|
8dc355a66f | ||
|
|
fbb389553c | ||
|
|
f8cfbb44d2 | ||
|
|
818780a191 | ||
|
|
b350ec48b7 | ||
|
|
f948ee11be | ||
|
|
2ef2bf873e | ||
|
|
0bb7d27269 | ||
|
|
ce5e8b2658 | ||
|
|
3f572afb8b | ||
|
|
5c3cb1e4ab | ||
|
|
c7f4bfe4f3 | ||
|
|
65d6599964 | ||
|
|
5d32e89c44 | ||
|
|
750ab5bf85 | ||
|
|
2a3f766b93 | ||
|
|
14bb52b6a4 | ||
|
|
b5176d207e | ||
|
|
e4044d0df9 | ||
|
|
bacdd51fd7 | ||
|
|
8010d72f18 | ||
|
|
9dd76f1b87 |
28
.editorconfig
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# EditorConfig: http://EditorConfig.org
|
||||||
|
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 2
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
end_of_line = lf
|
||||||
|
charset = utf-8
|
||||||
|
max_line_length = 79
|
||||||
|
|
||||||
|
[{*.html,*.css,*.js}]
|
||||||
|
max_line_length = off
|
||||||
|
|
||||||
|
[*.py]
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = space
|
||||||
|
|
||||||
|
[*.yml]
|
||||||
|
indent_style = space
|
||||||
|
|
||||||
|
# Tests don't get a line width restriction. It's still a good idea to follow
|
||||||
|
# the 79 character rule, but in the interests of clarity, tests often need to
|
||||||
|
# violate it.
|
||||||
|
[**/test_*.py]
|
||||||
|
max_line_length = off
|
||||||
4
.gitignore
vendored
@@ -66,6 +66,7 @@ media/overrides.js
|
|||||||
|
|
||||||
# Sqlite database
|
# Sqlite database
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
# PyCharm
|
# PyCharm
|
||||||
.idea
|
.idea
|
||||||
@@ -73,7 +74,6 @@ db.sqlite3
|
|||||||
# Other stuff that doesn't belong
|
# Other stuff that doesn't belong
|
||||||
.virtualenv
|
.virtualenv
|
||||||
virtualenv
|
virtualenv
|
||||||
.vagrant
|
|
||||||
docker-compose.yml
|
docker-compose.yml
|
||||||
docker-compose.env
|
docker-compose.env
|
||||||
|
|
||||||
@@ -82,4 +82,4 @@ scripts/import-for-development
|
|||||||
scripts/nuke
|
scripts/nuke
|
||||||
|
|
||||||
# Static files collected by the collectstatic command
|
# Static files collected by the collectstatic command
|
||||||
static/
|
./static/
|
||||||
|
|||||||
17
.travis.yml
@@ -2,19 +2,22 @@ language: python
|
|||||||
|
|
||||||
before_install:
|
before_install:
|
||||||
- sudo apt-get update -qq
|
- sudo apt-get update -qq
|
||||||
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng
|
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
|
||||||
|
|
||||||
sudo: false
|
sudo: false
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- python: 3.4
|
- python: "3.4"
|
||||||
- python: 3.5
|
- python: "3.5"
|
||||||
- python: 3.6
|
- python: "3.6"
|
||||||
|
- python: "3.7-dev"
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- pip install --requirement requirements.txt
|
- pip install --upgrade pip pipenv sphinx
|
||||||
- pip install sphinx
|
- pipenv lock -r > requirements.txt
|
||||||
|
- pip install -r requirements.txt
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- cd src/
|
- cd src/
|
||||||
- pytest --cov
|
- pytest --cov
|
||||||
@@ -22,4 +25,4 @@ script:
|
|||||||
- sphinx-build -b html ../docs ../docs/_build -W
|
- sphinx-build -b html ../docs ../docs/_build -W
|
||||||
|
|
||||||
after_success:
|
after_success:
|
||||||
- coveralls
|
- coveralls
|
||||||
|
|||||||
20
Dockerfile
@@ -1,28 +1,28 @@
|
|||||||
FROM alpine:3.7
|
FROM alpine:3.8
|
||||||
|
|
||||||
LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
|
LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
|
||||||
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
|
contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
|
||||||
Sven Fischer <git-dev@linux4tw.de>"
|
Sven Fischer <git-dev@linux4tw.de>"
|
||||||
|
|
||||||
# Copy requirements file and init script
|
# Copy Pipfiles file and init script
|
||||||
COPY requirements.txt /usr/src/paperless/
|
COPY Pipfile* /usr/src/paperless/
|
||||||
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh
|
||||||
|
|
||||||
# Set export and consumption directories
|
# Set export and consumption directories
|
||||||
ENV PAPERLESS_EXPORT_DIR=/export \
|
ENV PAPERLESS_EXPORT_DIR=/export \
|
||||||
PAPERLESS_CONSUMPTION_DIR=/consume
|
PAPERLESS_CONSUMPTION_DIR=/consume
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
RUN apk --no-cache --update add \
|
RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
|
||||||
python3 gnupg libmagic bash shadow curl \
|
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
|
apk add --virtual .build-dependencies \
|
||||||
apk --no-cache add --virtual .build-dependencies \
|
python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
|
||||||
# Install python dependencies
|
# Install python dependencies
|
||||||
python3 -m ensurepip && \
|
python3 -m ensurepip && \
|
||||||
rm -r /usr/lib/python*/ensurepip && \
|
rm -r /usr/lib/python*/ensurepip && \
|
||||||
cd /usr/src/paperless && \
|
cd /usr/src/paperless && \
|
||||||
pip3 install --no-cache-dir -r requirements.txt && \
|
pip3 install --upgrade pip pipenv && \
|
||||||
|
pipenv install --system --deploy && \
|
||||||
# Remove build dependencies
|
# Remove build dependencies
|
||||||
apk del .build-dependencies && \
|
apk del .build-dependencies && \
|
||||||
# Create the consumption directory
|
# Create the consumption directory
|
||||||
|
|||||||
7
Pipfile
@@ -25,6 +25,8 @@ python-dateutil = "*"
|
|||||||
python-dotenv = "*"
|
python-dotenv = "*"
|
||||||
python-gnupg = "*"
|
python-gnupg = "*"
|
||||||
pytz = "*"
|
pytz = "*"
|
||||||
|
sphinx = "*"
|
||||||
|
tox = "*"
|
||||||
pycodestyle = "*"
|
pycodestyle = "*"
|
||||||
pytest = "*"
|
pytest = "*"
|
||||||
pytest-cov = "*"
|
pytest-cov = "*"
|
||||||
@@ -32,9 +34,8 @@ pytest-django = "*"
|
|||||||
pytest-sugar = "*"
|
pytest-sugar = "*"
|
||||||
pytest-env = "*"
|
pytest-env = "*"
|
||||||
pytest-xdist = "*"
|
pytest-xdist = "*"
|
||||||
|
psycopg2 = "*"
|
||||||
|
djangoql = "*"
|
||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
ipython = "*"
|
ipython = "*"
|
||||||
sphinx = "*"
|
|
||||||
tox = "*"
|
|
||||||
|
|
||||||
|
|||||||
719
Pipfile.lock
generated
@@ -1,7 +1,6 @@
|
|||||||
*[English](README.md)*<br/>
|
[ [en](README.md) | de | [el](README-el.md) ]
|
||||||
*[Greek](README-el.md)*
|
|
||||||
|
|
||||||
# Paperless
|

|
||||||
|
|
||||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
*[English](README.md)*<br/>
|
[ [en](README.md) | [de](README-de.md) | el ]
|
||||||
*[German](README-de.md)*
|
|
||||||
|
|
||||||
# Paperless
|

|
||||||
|
|
||||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
*[German](README-de.md)*<br/>
|
[ en | [de](README-de.md) | [el](README-el.md) ]
|
||||||
*[Greek](README-el.md)*
|
|
||||||
|
|
||||||
# Paperless
|

|
||||||
|
|
||||||
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
[](https://paperless.readthedocs.org/) [](https://gitter.im/danielquinn/paperless) [](https://travis-ci.org/danielquinn/paperless) [](https://coveralls.io/github/danielquinn/paperless?branch=master) [](https://github.com/danielquinn/paperless/blob/master/THANKS.md)
|
||||||
|
|
||||||
|
|||||||
20
Vagrantfile
vendored
@@ -1,20 +0,0 @@
|
|||||||
# -*- mode: ruby -*-
|
|
||||||
# vi: set ft=ruby :
|
|
||||||
|
|
||||||
VAGRANT_API_VERSION = "2"
|
|
||||||
Vagrant.configure(VAGRANT_API_VERSION) do |config|
|
|
||||||
config.vm.box = "ubuntu/trusty64"
|
|
||||||
|
|
||||||
# Provision using shell
|
|
||||||
config.vm.host_name = "dev.paperless"
|
|
||||||
config.vm.synced_folder ".", "/opt/paperless"
|
|
||||||
config.vm.provision "shell", path: "scripts/vagrant-provision"
|
|
||||||
|
|
||||||
# Networking details
|
|
||||||
config.vm.network "private_network", ip: "172.28.128.4"
|
|
||||||
|
|
||||||
config.vm.provider "virtualbox" do |vb|
|
|
||||||
# Customize the amount of memory on the VM:
|
|
||||||
vb.memory = "1024"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -17,6 +17,9 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- data:/usr/src/paperless/data
|
- data:/usr/src/paperless/data
|
||||||
- media:/usr/src/paperless/media
|
- media:/usr/src/paperless/media
|
||||||
|
# You have to adapt the local path you want the consumption
|
||||||
|
# directory to mount to by modifying the part before the ':'.
|
||||||
|
- ./consume:/consume
|
||||||
env_file: docker-compose.env
|
env_file: docker-compose.env
|
||||||
# The reason the line is here is so that the webserver that doesn't do
|
# The reason the line is here is so that the webserver that doesn't do
|
||||||
# any text recognition and doesn't have to install unnecessary
|
# any text recognition and doesn't have to install unnecessary
|
||||||
@@ -36,8 +39,8 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- data:/usr/src/paperless/data
|
- data:/usr/src/paperless/data
|
||||||
- media:/usr/src/paperless/media
|
- media:/usr/src/paperless/media
|
||||||
# You have to adapt the local path you want the consumption
|
# This should be set to the same value as the consume directory
|
||||||
# directory to mount to by modifying the part before the ':'.
|
# in the webserver service above.
|
||||||
- ./consume:/consume
|
- ./consume:/consume
|
||||||
# Likewise, you can add a local path to mount a directory for
|
# Likewise, you can add a local path to mount a directory for
|
||||||
# exporting. This is not strictly needed for paperless to
|
# exporting. This is not strictly needed for paperless to
|
||||||
|
|||||||
@@ -1,6 +1,100 @@
|
|||||||
Changelog
|
Changelog
|
||||||
#########
|
#########
|
||||||
|
|
||||||
|
2.7.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
||||||
|
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
||||||
|
a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
|
||||||
|
which allows a litany of handy search functionality `#492`_.
|
||||||
|
* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
|
||||||
|
how the thumbnails are served up, improving performance considerably `#481`_.
|
||||||
|
* `tsia`_ added a few fields to the tags REST API. `#483`_.
|
||||||
|
* `Brian Cribbs`_ improved the documentation to help people using Paperless
|
||||||
|
over NFS `#484`_.
|
||||||
|
* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
|
||||||
|
``DEBUG`` value. The ``paperless.conf.example`` file was also updated to
|
||||||
|
mirror the project defaults.
|
||||||
|
|
||||||
|
|
||||||
|
2.6.1
|
||||||
|
=====
|
||||||
|
|
||||||
|
* We now have a logo, complete with a favicon :-)
|
||||||
|
* Removed some problematic tests.
|
||||||
|
* Fix the docker-compose example config to include a shared consume volume so
|
||||||
|
that using the push API will work for users of the Docker install. Thanks to
|
||||||
|
`Colin Frei`_ for fixing this in `#466`_.
|
||||||
|
* `khrise`_ submitted a pull request to include the ``added`` property to the
|
||||||
|
REST API `#471`_.
|
||||||
|
|
||||||
|
|
||||||
|
2.6.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||||
|
the problem in `#433`_.
|
||||||
|
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
|
||||||
|
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
|
||||||
|
pointed this out. `#423`_.
|
||||||
|
* Updated dependencies to include (among other things) a security patch to
|
||||||
|
requests.
|
||||||
|
* Fix text in sample data for tests so that the language guesser stops thinking
|
||||||
|
that everything is in Catalan because we had *Lorem ipsum* in there.
|
||||||
|
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
|
||||||
|
paths. `#441`_
|
||||||
|
* Added pretty colour boxes next to the hex values in the Tags section, thanks
|
||||||
|
to a pull request from `Joshua Taillon`_ `#442`_.
|
||||||
|
* Added a ``.editorconfig`` file to better specify coding style.
|
||||||
|
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
|
||||||
|
into how it parses file names on import. `#440`_
|
||||||
|
|
||||||
|
|
||||||
|
2.5.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||||
|
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||||
|
its location in ``PAPERLESS_OPTIPNG_BINARY``. The Docker image has already
|
||||||
|
been updated on the Docker Hub, so you just need to pull the latest one from
|
||||||
|
there if you're a Docker user.
|
||||||
|
|
||||||
|
* "Login free" instances of Paperless were breaking whenever you tried to edit
|
||||||
|
objects in the admin: adding/deleting tags or correspondents, or even fixing
|
||||||
|
spelling. This was due to the "user hack" we were applying to sessions that
|
||||||
|
weren't using a login, as that hack user didn't have a valid id. The fix was
|
||||||
|
to attribute the first user id in the system to this hack user. `#394`_
|
||||||
|
|
||||||
|
* A problem in how we handle slug values on Tags and Correspondents required a
|
||||||
|
few changes to how we handle this field `#393`_:
|
||||||
|
|
||||||
|
1. Slugs are no longer editable. They're derived from the name of the tag or
|
||||||
|
correspondent at save time, so if you wanna change the slug, you have to
|
||||||
|
change the name, and even then you're restricted to the rules of the
|
||||||
|
``slugify()`` function. The slug value is still visible in the admin
|
||||||
|
though.
|
||||||
|
2. I've added a migration to go over all existing tags & correspondents and
|
||||||
|
rewrite the ``.slug`` values to ones conforming to the ``slugify()``
|
||||||
|
rules.
|
||||||
|
3. The consumption process now uses the same rules as ``.save()`` in
|
||||||
|
determining a slug and using that to check for an existing
|
||||||
|
tag/correspondent.
|
||||||
|
|
||||||
|
* An annoying bug in the date capture code was causing some bogus dates to be
|
||||||
|
attached to documents, which in turn busted the UI. Thanks to `Andrew Peng`_
|
||||||
|
for reporting this. `#414`_.
|
||||||
|
|
||||||
|
* A bug in the Dockerfile meant that Tesseract language files weren't being
|
||||||
|
installed correctly. `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
|
||||||
|
|
||||||
|
* Document consumption is now wrapped in a transaction as per an old ticket
|
||||||
|
`#262`_.
|
||||||
|
|
||||||
|
* The ``get_date()`` functionality of the parsers has been consolidated onto
|
||||||
|
the ``DocumentParser`` class since much of that code was redundant anyway.
|
||||||
|
|
||||||
|
|
||||||
2.4.0
|
2.4.0
|
||||||
=====
|
=====
|
||||||
|
|
||||||
@@ -12,13 +106,13 @@ Changelog
|
|||||||
It's now in the import step that we decide the storage type. This allows you
|
It's now in the import step that we decide the storage type. This allows you
|
||||||
to export from an encrypted system and import into an unencrypted one, or
|
to export from an encrypted system and import into an unencrypted one, or
|
||||||
vice-versa.
|
vice-versa.
|
||||||
* The migration history has been slightly modified to accomodate PostgreSQL
|
* The migration history has been slightly modified to accommodate PostgreSQL
|
||||||
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
||||||
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
||||||
connect to your Postgres database without a password unless you also set
|
connect to your Postgres database without a password unless you also set
|
||||||
``PAPERLESS_DBPASS``.
|
``PAPERLESS_DBPASS``.
|
||||||
* A bug was found in the REST API filter system that was the result of an
|
* A bug was found in the REST API filter system that was the result of an
|
||||||
update of django-filter some time ago. This has now been patched `#412`_.
|
update of django-filter some time ago. This has now been patched in `#412`_.
|
||||||
Thanks to `thepill`_ for spotting it!
|
Thanks to `thepill`_ for spotting it!
|
||||||
|
|
||||||
|
|
||||||
@@ -525,6 +619,17 @@ bulk of the work on this big change.
|
|||||||
.. _ahyear: https://github.com/ahyear
|
.. _ahyear: https://github.com/ahyear
|
||||||
.. _jonaswinkler: https://github.com/jonaswinkler
|
.. _jonaswinkler: https://github.com/jonaswinkler
|
||||||
.. _thepill: https://github.com/thepill
|
.. _thepill: https://github.com/thepill
|
||||||
|
.. _Andrew Peng: https://github.com/pengc99
|
||||||
|
.. _euri10: https://github.com/euri10
|
||||||
|
.. _Ulli: https://github.com/Ulli2k
|
||||||
|
.. _tsia: https://github.com/tsia
|
||||||
|
.. _Sblop: https://github.com/Sblop
|
||||||
|
.. _Colin Frei: https://github.com/colinfrei
|
||||||
|
.. _khrise: https://github.com/khrise
|
||||||
|
.. _syntonym: https://github.com/syntonym
|
||||||
|
.. _JOKer: https://github.com/JOKer
|
||||||
|
.. _Brian Cribbs: https://github.com/cribbstechnolog
|
||||||
|
.. _Brendan M. Sleight: https://github.com/bmsleight
|
||||||
|
|
||||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||||
@@ -590,6 +695,7 @@ bulk of the work on this big change.
|
|||||||
.. _#322: https://github.com/danielquinn/paperless/pull/322
|
.. _#322: https://github.com/danielquinn/paperless/pull/322
|
||||||
.. _#328: https://github.com/danielquinn/paperless/pull/328
|
.. _#328: https://github.com/danielquinn/paperless/pull/328
|
||||||
.. _#253: https://github.com/danielquinn/paperless/issues/253
|
.. _#253: https://github.com/danielquinn/paperless/issues/253
|
||||||
|
.. _#262: https://github.com/danielquinn/paperless/issues/262
|
||||||
.. _#323: https://github.com/danielquinn/paperless/issues/323
|
.. _#323: https://github.com/danielquinn/paperless/issues/323
|
||||||
.. _#344: https://github.com/danielquinn/paperless/pull/344
|
.. _#344: https://github.com/danielquinn/paperless/pull/344
|
||||||
.. _#351: https://github.com/danielquinn/paperless/pull/351
|
.. _#351: https://github.com/danielquinn/paperless/pull/351
|
||||||
@@ -606,13 +712,33 @@ bulk of the work on this big change.
|
|||||||
.. _#391: https://github.com/danielquinn/paperless/pull/391
|
.. _#391: https://github.com/danielquinn/paperless/pull/391
|
||||||
.. _#390: https://github.com/danielquinn/paperless/pull/390
|
.. _#390: https://github.com/danielquinn/paperless/pull/390
|
||||||
.. _#392: https://github.com/danielquinn/paperless/issues/392
|
.. _#392: https://github.com/danielquinn/paperless/issues/392
|
||||||
|
.. _#393: https://github.com/danielquinn/paperless/issues/393
|
||||||
.. _#395: https://github.com/danielquinn/paperless/pull/395
|
.. _#395: https://github.com/danielquinn/paperless/pull/395
|
||||||
|
.. _#394: https://github.com/danielquinn/paperless/issues/394
|
||||||
.. _#396: https://github.com/danielquinn/paperless/pull/396
|
.. _#396: https://github.com/danielquinn/paperless/pull/396
|
||||||
.. _#399: https://github.com/danielquinn/paperless/pull/399
|
.. _#399: https://github.com/danielquinn/paperless/pull/399
|
||||||
.. _#400: https://github.com/danielquinn/paperless/pull/400
|
.. _#400: https://github.com/danielquinn/paperless/pull/400
|
||||||
.. _#401: https://github.com/danielquinn/paperless/pull/401
|
.. _#401: https://github.com/danielquinn/paperless/pull/401
|
||||||
.. _#405: https://github.com/danielquinn/paperless/pull/405
|
.. _#405: https://github.com/danielquinn/paperless/pull/405
|
||||||
|
.. _#406: https://github.com/danielquinn/paperless/issues/406
|
||||||
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
||||||
|
.. _#413: https://github.com/danielquinn/paperless/pull/413
|
||||||
|
.. _#414: https://github.com/danielquinn/paperless/issues/414
|
||||||
|
.. _#423: https://github.com/danielquinn/paperless/issues/423
|
||||||
|
.. _#433: https://github.com/danielquinn/paperless/issues/433
|
||||||
|
.. _#440: https://github.com/danielquinn/paperless/pull/440
|
||||||
|
.. _#441: https://github.com/danielquinn/paperless/pull/441
|
||||||
|
.. _#442: https://github.com/danielquinn/paperless/pull/442
|
||||||
|
.. _#466: https://github.com/danielquinn/paperless/pull/466
|
||||||
|
.. _#471: https://github.com/danielquinn/paperless/pull/471
|
||||||
|
.. _#475: https://github.com/danielquinn/paperless/pull/475
|
||||||
|
.. _#481: https://github.com/danielquinn/paperless/pull/481
|
||||||
|
.. _#483: https://github.com/danielquinn/paperless/pull/483
|
||||||
|
.. _#484: https://github.com/danielquinn/paperless/pull/484
|
||||||
|
.. _#488: https://github.com/danielquinn/paperless/pull/488
|
||||||
|
.. _#489: https://github.com/danielquinn/paperless/pull/489
|
||||||
|
.. _#492: https://github.com/danielquinn/paperless/pull/492
|
||||||
|
|
||||||
.. _pipenv: https://docs.pipenv.org/
|
.. _pipenv: https://docs.pipenv.org/
|
||||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||||
|
.. _optipng: http://optipng.sourceforge.net/
|
||||||
|
|||||||
@@ -43,6 +43,16 @@ These however wouldn't work:
|
|||||||
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
||||||
* ``Another Company- Letter of Reference.jpg``
|
* ``Another Company- Letter of Reference.jpg``
|
||||||
|
|
||||||
|
Do I have to be so strict about naming?
|
||||||
|
---------------------------------------
|
||||||
|
Rather than using the strict document naming rules, one can also set the option
|
||||||
|
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
|
||||||
|
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
|
||||||
|
to any date format that is found in the title, instead of a date pulled from
|
||||||
|
the document's text, without requiring the strict formatting of the document
|
||||||
|
filename as described above.
|
||||||
|
|
||||||
|
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
|
||||||
|
|
||||||
.. _guesswork-content:
|
.. _guesswork-content:
|
||||||
|
|
||||||
@@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
|
|||||||
uses a regex to match the PDF. If you don't know what a regex is, you
|
uses a regex to match the PDF. If you don't know what a regex is, you
|
||||||
probably don't want this option.
|
probably don't want this option.
|
||||||
|
|
||||||
When using the "any" or "all" matching algorithms, you can search for terms that
|
When using the "any" or "all" matching algorithms, you can search for terms
|
||||||
consist of multiple words by enclosing them in double quotes. For example, defining
|
that consist of multiple words by enclosing them in double quotes. For example,
|
||||||
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
|
defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
|
||||||
documents that contain either "Bank of America" or "BofA", but will not match
|
will match documents that contain either "Bank of America" or "BofA", but will
|
||||||
documents containing "Bank of South America".
|
not match documents containing "Bank of South America".
|
||||||
|
|
||||||
Then just save your tag/correspondent and run another document through the
|
Then just save your tag/correspondent and run another document through the
|
||||||
consumer. Once complete, you should see the newly-created document,
|
consumer. Once complete, you should see the newly-created document,
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ rolled in as part of the update:
|
|||||||
|
|
||||||
$ cd /path/to/project
|
$ cd /path/to/project
|
||||||
$ git pull
|
$ git pull
|
||||||
|
$ pip install -r requirements.txt
|
||||||
$ cd src
|
$ cd src
|
||||||
$ ./manage.py migrate
|
$ ./manage.py migrate
|
||||||
|
|
||||||
@@ -101,7 +102,7 @@ is similar:
|
|||||||
$ cd /path/to/project
|
$ cd /path/to/project
|
||||||
$ git pull
|
$ git pull
|
||||||
$ docker build -t paperless .
|
$ docker build -t paperless .
|
||||||
$ docker-compose run --rm comsumer migrate
|
$ docker-compose run --rm consumer migrate
|
||||||
$ docker-compose up -d
|
$ docker-compose up -d
|
||||||
|
|
||||||
If ``git pull`` doesn't report any changes, there is no need to continue with
|
If ``git pull`` doesn't report any changes, there is no need to continue with
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ should work) that has the following software installed:
|
|||||||
* `Imagemagick`_ version 6.7.5 or higher
|
* `Imagemagick`_ version 6.7.5 or higher
|
||||||
* `unpaper`_
|
* `unpaper`_
|
||||||
* `libpoppler-cpp-dev`_ PDF rendering library
|
* `libpoppler-cpp-dev`_ PDF rendering library
|
||||||
|
* `optipng`_
|
||||||
|
|
||||||
.. _Python3: https://python.org/
|
.. _Python3: https://python.org/
|
||||||
.. _GNU Privacy Guard: https://gnupg.org
|
.. _GNU Privacy Guard: https://gnupg.org
|
||||||
@@ -19,6 +20,7 @@ should work) that has the following software installed:
|
|||||||
.. _Imagemagick: http://imagemagick.org/
|
.. _Imagemagick: http://imagemagick.org/
|
||||||
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
|
.. _unpaper: https://www.flameeyes.eu/projects/unpaper
|
||||||
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
|
.. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
|
||||||
|
.. _optipng: http://optipng.sourceforge.net/
|
||||||
|
|
||||||
Notably, you should confirm how you access your Python3 installation. Many
|
Notably, you should confirm how you access your Python3 installation. Many
|
||||||
Linux distributions will install Python3 in parallel to Python2, using the
|
Linux distributions will install Python3 in parallel to Python2, using the
|
||||||
@@ -33,7 +35,7 @@ In addition to the above, there are a number of Python requirements, all of
|
|||||||
which are listed in a file called ``requirements.txt`` in the project root
|
which are listed in a file called ``requirements.txt`` in the project root
|
||||||
directory.
|
directory.
|
||||||
|
|
||||||
If you're not working on a virtual environment (like Vagrant or Docker), you
|
If you're not working on a virtual environment (like Docker), you
|
||||||
should probably be using a virtualenv, but that's your call. The reasons why
|
should probably be using a virtualenv, but that's your call. The reasons why
|
||||||
you might choose a virtualenv or not aren't really within the scope of this
|
you might choose a virtualenv or not aren't really within the scope of this
|
||||||
document. Needless to say if you don't know what a virtualenv is, you should
|
document. Needless to say if you don't know what a virtualenv is, you should
|
||||||
|
|||||||
@@ -42,18 +42,14 @@ Installation & Configuration
|
|||||||
You can go multiple routes with setting up and running Paperless:
|
You can go multiple routes with setting up and running Paperless:
|
||||||
|
|
||||||
* The `bare metal route`_
|
* The `bare metal route`_
|
||||||
* The `vagrant route`_
|
|
||||||
* The `docker route`_
|
* The `docker route`_
|
||||||
|
|
||||||
|
|
||||||
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
|
The `docker route`_ is quick & easy.
|
||||||
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
|
|
||||||
simplicity as Vagrant with lower resource consumption.
|
|
||||||
|
|
||||||
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
||||||
should you want to contribute some code back.
|
should you want to contribute some code back.
|
||||||
|
|
||||||
.. _Vagrant route: setup-installation-vagrant_
|
|
||||||
.. _docker route: setup-installation-docker_
|
.. _docker route: setup-installation-docker_
|
||||||
.. _bare metal route: setup-installation-bare-metal_
|
.. _bare metal route: setup-installation-bare-metal_
|
||||||
.. _Docker Machine: https://docs.docker.com/machine/
|
.. _Docker Machine: https://docs.docker.com/machine/
|
||||||
@@ -81,12 +77,16 @@ Standard (Bare Metal)
|
|||||||
encrypt/decrypt the original documents. Don't worry about defining this
|
encrypt/decrypt the original documents. Don't worry about defining this
|
||||||
if you don't want to use encryption (the default).
|
if you don't want to use encryption (the default).
|
||||||
|
|
||||||
|
Note also that if you're using the ``runserver`` as mentioned below, you
|
||||||
|
should make sure that PAPERLESS_DEBUG="true" or is just commented out as
|
||||||
|
this is the default.
|
||||||
|
|
||||||
4. Initialise the SQLite database with ``./manage.py migrate``.
|
4. Initialise the SQLite database with ``./manage.py migrate``.
|
||||||
5. Create a user for your Paperless instance with
|
5. Create a user for your Paperless instance with
|
||||||
``./manage.py createsuperuser``. Follow the prompts to create your user.
|
``./manage.py createsuperuser``. Follow the prompts to create your user.
|
||||||
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
|
6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
|
||||||
If no specifc IP or port are given, the default is ``127.0.0.1:8000``
|
If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
|
||||||
also known as http://localhost:8000/.
|
known as http://localhost:8000/.
|
||||||
You should now be able to visit your (empty) installation at
|
You should now be able to visit your (empty) installation at
|
||||||
`Paperless webserver`_ or whatever you chose before. You can login with the
|
`Paperless webserver`_ or whatever you chose before. You can login with the
|
||||||
user/pass you created in #5.
|
user/pass you created in #5.
|
||||||
@@ -147,6 +147,15 @@ Docker Method
|
|||||||
instructions in comments in the file. The only change that is a hard
|
instructions in comments in the file. The only change that is a hard
|
||||||
requirement is to specify where the consumption directory should
|
requirement is to specify where the consumption directory should
|
||||||
mount.[#dockercomposeyml]_
|
mount.[#dockercomposeyml]_
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
If you are using NFS mounts for the consume directory you also need to
|
||||||
|
change the command to turn off inotify as it doesn't work with NFS
|
||||||
|
|
||||||
|
`command: ["document_consumer", "--no-inotify"]`
|
||||||
|
|
||||||
|
|
||||||
5. Modify ``docker-compose.env`` and adapt the following environment variables:
|
5. Modify ``docker-compose.env`` and adapt the following environment variables:
|
||||||
|
|
||||||
``PAPERLESS_PASSPHRASE``
|
``PAPERLESS_PASSPHRASE``
|
||||||
@@ -267,54 +276,6 @@ Docker Method
|
|||||||
newer ``docker-compose.yml.example`` file
|
newer ``docker-compose.yml.example`` file
|
||||||
|
|
||||||
|
|
||||||
.. _setup-installation-vagrant:
|
|
||||||
|
|
||||||
Vagrant Method
|
|
||||||
++++++++++++++
|
|
||||||
|
|
||||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
|
||||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
|
||||||
provisioned...
|
|
||||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
|
||||||
``/etc/paperless.conf`` and set the values for:
|
|
||||||
|
|
||||||
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
|
|
||||||
dumped to be consumed by Paperless.
|
|
||||||
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
|
|
||||||
encrypt/decrypt the original document. It's only required if you want
|
|
||||||
your original files to be encrypted, otherwise, just leave it unset.
|
|
||||||
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
|
|
||||||
documents from mail or via the API. If you don't use either, leaving it
|
|
||||||
blank is just fine.
|
|
||||||
|
|
||||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
|
||||||
updates the environment to make use of the changes you made to the config
|
|
||||||
file.
|
|
||||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
|
||||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
|
||||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
|
||||||
create your user.
|
|
||||||
7. Start the webserver with
|
|
||||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
|
||||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
|
||||||
You can login with the user/pass you created in #6.
|
|
||||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
|
||||||
your vagrant instance, you should start the consumer script with
|
|
||||||
``/opt/paperless/src/manage.py document_consumer``.
|
|
||||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
|
||||||
10. Wait a few minutes
|
|
||||||
11. Visit the document list on your webserver, and it should be there, indexed
|
|
||||||
and downloadable.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
This installation is not secure. Once everything is working head up to
|
|
||||||
`Making things more permanent`_
|
|
||||||
|
|
||||||
.. _Vagrant: https://vagrantup.com/
|
|
||||||
.. _Paperless server: http://172.28.128.4:8000
|
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent:
|
.. _setup-permanent:
|
||||||
|
|
||||||
Making Things a Little more Permanent
|
Making Things a Little more Permanent
|
||||||
@@ -398,7 +359,7 @@ instance listening on localhost port 8000.
|
|||||||
location /static {
|
location /static {
|
||||||
|
|
||||||
autoindex on;
|
autoindex on;
|
||||||
alias <path-to-paperless-static-directory>
|
alias <path-to-paperless-static-directory>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -409,7 +370,7 @@ instance listening on localhost port 8000.
|
|||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
proxy_set_header X-Forwarded-Proto $scheme;
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
|
||||||
proxy_pass http://127.0.0.1:8000
|
proxy_pass http://127.0.0.1:8000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -418,7 +379,7 @@ The gunicorn server can be started with the command:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
|
$ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent-standard-systemd:
|
.. _setup-permanent-standard-systemd:
|
||||||
@@ -475,7 +436,7 @@ after restarting your system:
|
|||||||
respawn limit 10 5
|
respawn limit 10 5
|
||||||
|
|
||||||
script
|
script
|
||||||
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
|
exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
|
||||||
end script
|
end script
|
||||||
|
|
||||||
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
||||||
@@ -513,13 +474,6 @@ second period.
|
|||||||
.. _Upstart: http://upstart.ubuntu.com/
|
.. _Upstart: http://upstart.ubuntu.com/
|
||||||
|
|
||||||
|
|
||||||
Vagrant
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
You may use the Ubuntu explanation above. Replace
|
|
||||||
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent-docker:
|
.. _setup-permanent-docker:
|
||||||
|
|
||||||
Docker
|
Docker
|
||||||
|
|||||||
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
|
|||||||
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
||||||
marching your document's languages.
|
marching your document's languages.
|
||||||
|
|
||||||
As an example, if you are running Paperless from the Vagrant setup provided
|
As an example, if you are running Paperless from any Ubuntu or Debian
|
||||||
(or from any Ubuntu or Debian box), and your documents are written in Spanish
|
box, and your documents are written in Spanish you may need to run::
|
||||||
you may need to run::
|
|
||||||
|
|
||||||
apt-get install -y tesseract-ocr-spa
|
apt-get install -y tesseract-ocr-spa
|
||||||
|
|
||||||
|
|||||||
@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:
|
|||||||
|
|
||||||
That's it. It'll loop over all of the documents in your database and attempt
|
That's it. It'll loop over all of the documents in your database and attempt
|
||||||
to match all of your tags to them. If one matches, it'll be applied. And
|
to match all of your tags to them. If one matches, it'll be applied. And
|
||||||
don't worry, you can run this as often as you like, it' won't double-tag
|
don't worry, you can run this as often as you like, it won't double-tag
|
||||||
a document.
|
a document.
|
||||||
|
|||||||
11
overrides/README.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Customizing Paperless
|
||||||
|
|
||||||
|
*See customization
|
||||||
|
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
|
||||||
|
for more detail!*
|
||||||
|
|
||||||
|
The example `.css` and `.js` snippets in this folder can be placed into
|
||||||
|
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
|
||||||
|
`overrides.css`. Please feel free to submit pull requests to the main
|
||||||
|
repository with other examples of customizations that you think others may
|
||||||
|
find useful.
|
||||||
@@ -61,7 +61,7 @@ PAPERLESS_EMAIL_SECRET=""
|
|||||||
|
|
||||||
# Controls whether django's debug mode is enabled. Disable this on production
|
# Controls whether django's debug mode is enabled. Disable this on production
|
||||||
# systems. Debug mode is enabled by default.
|
# systems. Debug mode is enabled by default.
|
||||||
PAPERLESS_DEBUG="false"
|
#PAPERLESS_DEBUG="true"
|
||||||
|
|
||||||
|
|
||||||
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
|
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
|
||||||
@@ -127,6 +127,14 @@ PAPERLESS_DEBUG="false"
|
|||||||
# "true", the document will instead be opened in the browser, if possible.
|
# "true", the document will instead be opened in the browser, if possible.
|
||||||
#PAPERLESS_INLINE_DOC="false"
|
#PAPERLESS_INLINE_DOC="false"
|
||||||
|
|
||||||
|
# By default, paperless will check the document text for document date information.
|
||||||
|
# Uncomment the line below to enable checking the document filename for date
|
||||||
|
# information. The date order can be set to any option as specified in
|
||||||
|
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
|
||||||
|
# checked first, and if nothing is found, the document text will be checked
|
||||||
|
# as normal.
|
||||||
|
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
|
||||||
|
|
||||||
#
|
#
|
||||||
# The following values use sensible defaults for modern systems, but if you're
|
# The following values use sensible defaults for modern systems, but if you're
|
||||||
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
||||||
@@ -188,6 +196,17 @@ PAPERLESS_DEBUG="false"
|
|||||||
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
||||||
|
|
||||||
|
|
||||||
|
# By default Paperless stops consuming a document if no language can be
|
||||||
|
# detected. Set to true to consume documents even if the language detection
|
||||||
|
# fails.
|
||||||
|
#PAPERLESS_FORGIVING_OCR="false"
|
||||||
|
|
||||||
|
|
||||||
|
# By default Paperless does not OCR a document if the text can be retrieved from
|
||||||
|
# the document directly. Set to true to always OCR documents.
|
||||||
|
#PAPERLESS_OCR_ALWAYS="false"
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#### Interface ####
|
#### Interface ####
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@@ -213,3 +232,23 @@ PAPERLESS_DEBUG="false"
|
|||||||
# The number of years for which a correspondent will be included in the recent
|
# The number of years for which a correspondent will be included in the recent
|
||||||
# correspondents filter.
|
# correspondents filter.
|
||||||
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#### Third-Party Binaries ####
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# There are a few external software packages that Paperless expects to find on
|
||||||
|
# your system when it starts up. Unless you've done something creative with
|
||||||
|
# their installation, you probably won't need to edit any of these. However,
|
||||||
|
# if you've installed these programs somewhere where simply typing the name of
|
||||||
|
# the program doesn't automatically execute it (ie. the program isn't in your
|
||||||
|
# $PATH), then you'll need to specify the literal path for that program here.
|
||||||
|
|
||||||
|
# Convert (part of the ImageMagick suite)
|
||||||
|
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
|
||||||
|
|
||||||
|
# Unpaper
|
||||||
|
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
|
||||||
|
|
||||||
|
# Optipng (for optimising thumbnail sizes)
|
||||||
|
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
|
||||||
|
|||||||
@@ -1,51 +1,70 @@
|
|||||||
-i https://pypi.python.org/simple
|
-i https://pypi.python.org/simple
|
||||||
apipkg==1.5; python_version != '3.3.*'
|
alabaster==0.7.12
|
||||||
atomicwrites==1.2.1; python_version != '3.3.*'
|
apipkg==1.5
|
||||||
|
atomicwrites==1.2.1
|
||||||
attrs==18.2.0
|
attrs==18.2.0
|
||||||
certifi==2018.8.24
|
babel==2.6.0
|
||||||
|
certifi==2018.11.29
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
coverage==4.5.1; python_version < '4'
|
coverage==4.5.2
|
||||||
coveralls==1.5.0
|
coveralls==1.5.1
|
||||||
dateparser==0.7.0
|
dateparser==0.7.0
|
||||||
django-cors-headers==2.4.0
|
django-cors-headers==2.4.0
|
||||||
django-crispy-forms==1.7.2
|
django-crispy-forms==1.7.2
|
||||||
django-extensions==2.1.2
|
django-extensions==2.1.4
|
||||||
django-filter==2.0.0
|
django-filter==2.1.0
|
||||||
django==2.0.8
|
django==2.0.10
|
||||||
djangorestframework==3.8.2
|
djangoql==0.12.3
|
||||||
|
djangorestframework==3.9.1
|
||||||
docopt==0.6.2
|
docopt==0.6.2
|
||||||
execnet==1.5.0; python_version != '3.3.*'
|
docutils==0.14
|
||||||
|
execnet==1.5.0
|
||||||
factory-boy==2.11.1
|
factory-boy==2.11.1
|
||||||
faker==0.9.0; python_version >= '2.7'
|
faker==1.0.2
|
||||||
|
filelock==3.0.10
|
||||||
filemagic==1.6
|
filemagic==1.6
|
||||||
fuzzywuzzy==0.15.0
|
fuzzywuzzy[speedup]==0.15.0
|
||||||
gunicorn==19.9.0
|
gunicorn==19.9.0
|
||||||
idna==2.7
|
idna==2.8
|
||||||
|
imagesize==1.1.0
|
||||||
inotify-simple==1.1.8
|
inotify-simple==1.1.8
|
||||||
|
jinja2==2.10
|
||||||
langdetect==1.0.7
|
langdetect==1.0.7
|
||||||
more-itertools==4.3.0
|
markupsafe==1.1.0
|
||||||
pdftotext==2.1.0
|
more-itertools==5.0.0
|
||||||
pillow==5.2.0
|
packaging==19.0
|
||||||
pluggy==0.7.1; python_version != '3.3.*'
|
pdftotext==2.1.1
|
||||||
py==1.6.0; python_version != '3.3.*'
|
pillow==5.4.1
|
||||||
|
pluggy==0.8.1
|
||||||
|
ply==3.11
|
||||||
|
psycopg2==2.7.7
|
||||||
|
py==1.7.0
|
||||||
pycodestyle==2.4.0
|
pycodestyle==2.4.0
|
||||||
|
pygments==2.3.1
|
||||||
pyocr==0.5.3
|
pyocr==0.5.3
|
||||||
pytest-cov==2.6.0
|
pyparsing==2.3.1
|
||||||
pytest-django==3.4.2
|
pytest-cov==2.6.1
|
||||||
|
pytest-django==3.4.5
|
||||||
pytest-env==0.6.2
|
pytest-env==0.6.2
|
||||||
pytest-forked==0.2; python_version != '3.3.*'
|
pytest-forked==1.0.1
|
||||||
pytest-sugar==0.9.1
|
pytest-sugar==0.9.2
|
||||||
pytest-xdist==1.23.0
|
pytest-xdist==1.26.0
|
||||||
pytest==3.8.0
|
pytest==4.1.1
|
||||||
python-dateutil==2.7.3
|
python-dateutil==2.7.5
|
||||||
python-dotenv==0.9.1
|
python-dotenv==0.10.1
|
||||||
python-gnupg==0.4.3
|
python-gnupg==0.4.4
|
||||||
python-levenshtein==0.12.0
|
python-levenshtein==0.12.0
|
||||||
pytz==2018.5
|
pytz==2018.9
|
||||||
regex==2018.8.29
|
regex==2019.1.24
|
||||||
requests==2.19.1
|
requests==2.21.0
|
||||||
six==1.11.0
|
six==1.12.0
|
||||||
|
snowballstemmer==1.2.1
|
||||||
|
sphinx==1.8.3
|
||||||
|
sphinxcontrib-websupport==1.1.0
|
||||||
termcolor==1.1.0
|
termcolor==1.1.0
|
||||||
text-unidecode==1.2
|
text-unidecode==1.2
|
||||||
|
toml==0.10.0
|
||||||
|
tox==3.7.0
|
||||||
tzlocal==1.5.1
|
tzlocal==1.5.1
|
||||||
urllib3==1.23; python_version != '3.3.*'
|
urllib3==1.24.1
|
||||||
|
virtualenv==16.3.0
|
||||||
|
|||||||
1086
resources/logo/print/eps/Black logo - no background.eps
Normal file
1090
resources/logo/print/eps/Color logo - no background.eps
Normal file
1099
resources/logo/print/eps/Color logo with background.eps
Normal file
1090
resources/logo/print/eps/White logo - no background.eps
Normal file
BIN
resources/logo/print/pdf/Black logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo - no background.pdf
Normal file
BIN
resources/logo/print/pdf/Color logo with background.pdf
Normal file
BIN
resources/logo/print/pdf/White logo - no background.pdf
Normal file
BIN
resources/logo/web/png/Black logo - no background.png
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
resources/logo/web/png/Color logo - no background.png
Normal file
|
After Width: | Height: | Size: 111 KiB |
BIN
resources/logo/web/png/Color logo with background.png
Normal file
|
After Width: | Height: | Size: 116 KiB |
BIN
resources/logo/web/png/White logo - no background.png
Normal file
|
After Width: | Height: | Size: 94 KiB |
8
resources/logo/web/svg/Black logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.4 KiB |
8
resources/logo/web/svg/Color logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/Color logo with background.svg
Normal file
|
After Width: | Height: | Size: 7.5 KiB |
8
resources/logo/web/svg/White logo - no background.svg
Normal file
|
After Width: | Height: | Size: 7.4 KiB |
82
resources/logo/web/svg/square.svg
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
version="1.1"
|
||||||
|
width="900"
|
||||||
|
height="900"
|
||||||
|
id="svg3923"
|
||||||
|
sodipodi:docname="square.svg"
|
||||||
|
inkscape:export-filename="/tmp/test.png"
|
||||||
|
inkscape:export-xdpi="96"
|
||||||
|
inkscape:export-ydpi="96"
|
||||||
|
inkscape:version="0.92.2 2405546, 2018-03-11">
|
||||||
|
<metadata
|
||||||
|
id="metadata3929">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title></dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<defs
|
||||||
|
id="defs3927" />
|
||||||
|
<sodipodi:namedview
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1"
|
||||||
|
objecttolerance="10"
|
||||||
|
gridtolerance="10"
|
||||||
|
guidetolerance="10"
|
||||||
|
inkscape:pageopacity="0"
|
||||||
|
inkscape:pageshadow="2"
|
||||||
|
inkscape:window-width="3840"
|
||||||
|
inkscape:window-height="2096"
|
||||||
|
id="namedview3925"
|
||||||
|
showgrid="false"
|
||||||
|
inkscape:zoom="1.1360927"
|
||||||
|
inkscape:cx="635.07139"
|
||||||
|
inkscape:cy="606.383"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="27"
|
||||||
|
inkscape:window-maximized="1"
|
||||||
|
inkscape:current-layer="g3921" />
|
||||||
|
<g
|
||||||
|
transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
|
||||||
|
id="g3921">
|
||||||
|
<defs
|
||||||
|
id="SvgjsDefs1018" />
|
||||||
|
<g
|
||||||
|
id="SvgjsG1019"
|
||||||
|
featureKey="root"
|
||||||
|
style="fill:#ffffff" />
|
||||||
|
<g
|
||||||
|
id="SvgjsG1020"
|
||||||
|
featureKey="symbol1"
|
||||||
|
transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
|
||||||
|
inkscape:export-filename="/tmp/test.png"
|
||||||
|
inkscape:export-xdpi="116.02285"
|
||||||
|
inkscape:export-ydpi="116.02285"
|
||||||
|
style="fill:#17541f">
|
||||||
|
<defs
|
||||||
|
id="defs3911" />
|
||||||
|
<g
|
||||||
|
id="g3915">
|
||||||
|
<path
|
||||||
|
d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
|
||||||
|
style="fill:#17541f"
|
||||||
|
id="path3913"
|
||||||
|
inkscape:connector-curvature="0" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 2.6 KiB |
@@ -75,7 +75,7 @@ install_languages() {
|
|||||||
pkg="tesseract-ocr-data-$lang"
|
pkg="tesseract-ocr-data-$lang"
|
||||||
|
|
||||||
# English is installed by default
|
# English is installed by default
|
||||||
if [ "$lang" == "eng" ]; then
|
if [[ "$lang" == "eng" ]]; then
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
|
|||||||
initialize
|
initialize
|
||||||
|
|
||||||
# Install additional languages if specified
|
# Install additional languages if specified
|
||||||
if [ ! -z "$PAPERLESS_OCR_LANGUAGES" ]; then
|
if [[ ! -z "$PAPERLESS_OCR_LANGUAGES" ]]; then
|
||||||
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
install_languages "$PAPERLESS_OCR_LANGUAGES"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Description=Paperless webserver
|
|||||||
[Service]
|
[Service]
|
||||||
User=paperless
|
User=paperless
|
||||||
Group=paperless
|
Group=paperless
|
||||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
|
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
@@ -1,31 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Install packages
|
|
||||||
apt-get update
|
|
||||||
apt-get build-dep -y python-imaging
|
|
||||||
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
|
|
||||||
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
|
|
||||||
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
|
|
||||||
|
|
||||||
# Python dependencies
|
|
||||||
pip3 install -r /opt/paperless/requirements.txt
|
|
||||||
|
|
||||||
# Create the environment file
|
|
||||||
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
|
|
||||||
chmod 0640 /etc/paperless.conf
|
|
||||||
chown root:vagrant /etc/paperless.conf
|
|
||||||
|
|
||||||
# Create the consumption directory
|
|
||||||
mkdir /home/vagrant/consumption
|
|
||||||
chown vagrant:vagrant /home/vagrant/consumption
|
|
||||||
|
|
||||||
echo "
|
|
||||||
|
|
||||||
|
|
||||||
Now follow the remaining steps in the Vagrant section of the setup
|
|
||||||
documentation to complete the process:
|
|
||||||
|
|
||||||
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
|
|
||||||
|
|
||||||
|
|
||||||
"
|
|
||||||
@@ -11,6 +11,7 @@ from django.urls import reverse
|
|||||||
from django.utils.html import format_html, format_html_join
|
from django.utils.html import format_html, format_html_join
|
||||||
from django.utils.http import urlquote
|
from django.utils.http import urlquote
|
||||||
from django.utils.safestring import mark_safe
|
from django.utils.safestring import mark_safe
|
||||||
|
from djangoql.admin import DjangoQLSearchMixin
|
||||||
|
|
||||||
from documents.actions import (
|
from documents.actions import (
|
||||||
add_tag_to_selected,
|
add_tag_to_selected,
|
||||||
@@ -61,12 +62,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
|||||||
|
|
||||||
# To keep it simple we use the same string for both
|
# To keep it simple we use the same string for both
|
||||||
# query parameter and the display.
|
# query parameter and the display.
|
||||||
return (query, query)
|
return query, query
|
||||||
|
|
||||||
else:
|
else:
|
||||||
query = "{0}-{0}".format(date.year)
|
query = "{0}-{0}".format(date.year)
|
||||||
display = "{}".format(date.year)
|
display = "{}".format(date.year)
|
||||||
return (query, display)
|
return query, display
|
||||||
|
|
||||||
def lookups(self, request, model_admin):
|
def lookups(self, request, model_admin):
|
||||||
if not settings.FY_START or not settings.FY_END:
|
if not settings.FY_START or not settings.FY_END:
|
||||||
@@ -88,25 +89,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
|||||||
|
|
||||||
|
|
||||||
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
||||||
|
"""
|
||||||
def __init__(self, *args, **kwargs):
|
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
|
||||||
super().__init__(*args, **kwargs)
|
correspondents to documents sent our way over the past ``n`` years.
|
||||||
self.title = "correspondent (recent)"
|
"""
|
||||||
|
|
||||||
def field_choices(self, field, request, model_admin):
|
def field_choices(self, field, request, model_admin):
|
||||||
|
|
||||||
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
||||||
days = 365 * years
|
correspondents = Correspondent.objects.all()
|
||||||
|
|
||||||
lookups = []
|
|
||||||
if years and years > 0:
|
if years and years > 0:
|
||||||
correspondents = Correspondent.objects.filter(
|
self.title = "Correspondent (Recent)"
|
||||||
|
days = 365 * years
|
||||||
|
correspondents = correspondents.filter(
|
||||||
documents__created__gte=datetime.now() - timedelta(days=days)
|
documents__created__gte=datetime.now() - timedelta(days=days)
|
||||||
).distinct()
|
).distinct()
|
||||||
for c in correspondents:
|
|
||||||
lookups.append((c.id, c.name))
|
|
||||||
|
|
||||||
return lookups
|
return [(c.id, c.name) for c in correspondents]
|
||||||
|
|
||||||
|
|
||||||
class CommonAdmin(admin.ModelAdmin):
|
class CommonAdmin(admin.ModelAdmin):
|
||||||
@@ -125,6 +125,8 @@ class CorrespondentAdmin(CommonAdmin):
|
|||||||
list_filter = ("matching_algorithm",)
|
list_filter = ("matching_algorithm",)
|
||||||
list_editable = ("match", "matching_algorithm")
|
list_editable = ("match", "matching_algorithm")
|
||||||
|
|
||||||
|
readonly_fields = ("slug",)
|
||||||
|
|
||||||
def get_queryset(self, request):
|
def get_queryset(self, request):
|
||||||
qs = super(CorrespondentAdmin, self).get_queryset(request)
|
qs = super(CorrespondentAdmin, self).get_queryset(request)
|
||||||
qs = qs.annotate(
|
qs = qs.annotate(
|
||||||
@@ -144,11 +146,16 @@ class CorrespondentAdmin(CommonAdmin):
|
|||||||
|
|
||||||
class TagAdmin(CommonAdmin):
|
class TagAdmin(CommonAdmin):
|
||||||
|
|
||||||
list_display = ("name", "colour", "match", "matching_algorithm",
|
list_display = (
|
||||||
"document_count")
|
"name", "colour", "match", "matching_algorithm", "document_count")
|
||||||
list_filter = ("colour", "matching_algorithm")
|
list_filter = ("colour", "matching_algorithm")
|
||||||
list_editable = ("colour", "match", "matching_algorithm")
|
list_editable = ("colour", "match", "matching_algorithm")
|
||||||
|
|
||||||
|
readonly_fields = ("slug",)
|
||||||
|
|
||||||
|
class Media:
|
||||||
|
js = ("js/colours.js",)
|
||||||
|
|
||||||
def get_queryset(self, request):
|
def get_queryset(self, request):
|
||||||
qs = super(TagAdmin, self).get_queryset(request)
|
qs = super(TagAdmin, self).get_queryset(request)
|
||||||
qs = qs.annotate(document_count=models.Count("documents"))
|
qs = qs.annotate(document_count=models.Count("documents"))
|
||||||
@@ -159,7 +166,7 @@ class TagAdmin(CommonAdmin):
|
|||||||
document_count.admin_order_field = "document_count"
|
document_count.admin_order_field = "document_count"
|
||||||
|
|
||||||
|
|
||||||
class DocumentAdmin(CommonAdmin):
|
class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):
|
||||||
|
|
||||||
class Media:
|
class Media:
|
||||||
css = {
|
css = {
|
||||||
@@ -167,13 +174,12 @@ class DocumentAdmin(CommonAdmin):
|
|||||||
}
|
}
|
||||||
|
|
||||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||||
readonly_fields = ("added",)
|
readonly_fields = ("added", "file_type", "storage_type",)
|
||||||
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
||||||
"tags_")
|
"tags_")
|
||||||
list_filter = (
|
list_filter = (
|
||||||
"tags",
|
"tags",
|
||||||
("correspondent", RecentCorrespondentFilter),
|
("correspondent", RecentCorrespondentFilter),
|
||||||
"correspondent",
|
|
||||||
FinancialYearFilter
|
FinancialYearFilter
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from django.db import transaction
|
||||||
import datetime
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
@@ -111,8 +112,11 @@ class Consumer:
|
|||||||
if not self.try_consume_file(file):
|
if not self.try_consume_file(file):
|
||||||
self._ignore.append((file, mtime))
|
self._ignore.append((file, mtime))
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
def try_consume_file(self, file):
|
def try_consume_file(self, file):
|
||||||
"Return True if file was consumed"
|
"""
|
||||||
|
Return True if file was consumed
|
||||||
|
"""
|
||||||
|
|
||||||
if not re.match(FileInfo.REGEXES["title"], file):
|
if not re.match(FileInfo.REGEXES["title"], file):
|
||||||
return False
|
return False
|
||||||
@@ -145,7 +149,7 @@ class Consumer:
|
|||||||
parsed_document = parser_class(doc)
|
parsed_document = parser_class(doc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
thumbnail = parsed_document.get_thumbnail()
|
thumbnail = parsed_document.get_optimised_thumbnail()
|
||||||
date = parsed_document.get_date()
|
date = parsed_document.get_date()
|
||||||
document = self._store(
|
document = self._store(
|
||||||
parsed_document.get_text(),
|
parsed_document.get_text(),
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter, ModelChoiceFilter
|
from django_filters.rest_framework import BooleanFilter, FilterSet
|
||||||
|
|
||||||
from .models import Correspondent, Document, Tag
|
from .models import Correspondent, Document, Tag
|
||||||
|
|
||||||
|
|||||||
@@ -216,7 +216,11 @@ class MailFetcher(Loggable):
|
|||||||
return r
|
return r
|
||||||
|
|
||||||
def _connect(self):
|
def _connect(self):
|
||||||
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
try:
|
||||||
|
self._connection = imaplib.IMAP4_SSL(self._host, self._port)
|
||||||
|
except OSError as e:
|
||||||
|
msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
|
||||||
|
raise MailFetcherError(msg)
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
|
|
||||||
|
|||||||
52
src/documents/migrations/0022_auto_20181007_1420.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
from django.utils.text import slugify
|
||||||
|
|
||||||
|
|
||||||
|
def re_slug_all_the_things(apps, schema_editor):
|
||||||
|
"""
|
||||||
|
Rewrite all slug values to make sure they're actually slugs before we brand
|
||||||
|
them as uneditable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Tag = apps.get_model("documents", "Tag")
|
||||||
|
Correspondent = apps.get_model("documents", "Correspondent")
|
||||||
|
|
||||||
|
for klass in (Tag, Correspondent):
|
||||||
|
for instance in klass.objects.all():
|
||||||
|
klass.objects.filter(
|
||||||
|
pk=instance.pk
|
||||||
|
).update(
|
||||||
|
slug=slugify(instance.slug)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '0021_document_storage_type'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterModelOptions(
|
||||||
|
name='tag',
|
||||||
|
options={'ordering': ('name',)},
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='correspondent',
|
||||||
|
name='slug',
|
||||||
|
field=models.SlugField(blank=True, editable=False),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='document',
|
||||||
|
name='file_type',
|
||||||
|
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='tag',
|
||||||
|
name='slug',
|
||||||
|
field=models.SlugField(blank=True, editable=False),
|
||||||
|
),
|
||||||
|
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
||||||
|
]
|
||||||
@@ -11,6 +11,7 @@ from django.conf import settings
|
|||||||
from django.db import models
|
from django.db import models
|
||||||
from django.template.defaultfilters import slugify
|
from django.template.defaultfilters import slugify
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.utils.text import slugify
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
from .managers import LogManager
|
from .managers import LogManager
|
||||||
@@ -37,7 +38,7 @@ class MatchingModel(models.Model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
name = models.CharField(max_length=128, unique=True)
|
name = models.CharField(max_length=128, unique=True)
|
||||||
slug = models.SlugField(blank=True)
|
slug = models.SlugField(blank=True, editable=False)
|
||||||
|
|
||||||
match = models.CharField(max_length=256, blank=True)
|
match = models.CharField(max_length=256, blank=True)
|
||||||
matching_algorithm = models.PositiveIntegerField(
|
matching_algorithm = models.PositiveIntegerField(
|
||||||
@@ -147,9 +148,7 @@ class MatchingModel(models.Model):
|
|||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
|
|
||||||
self.match = self.match.lower()
|
self.match = self.match.lower()
|
||||||
|
self.slug = slugify(self.name)
|
||||||
if not self.slug:
|
|
||||||
self.slug = slugify(self.name)
|
|
||||||
|
|
||||||
models.Model.save(self, *args, **kwargs)
|
models.Model.save(self, *args, **kwargs)
|
||||||
|
|
||||||
@@ -452,7 +451,7 @@ class FileInfo:
|
|||||||
r = []
|
r = []
|
||||||
for t in tags.split(","):
|
for t in tags.split(","):
|
||||||
r.append(Tag.objects.get_or_create(
|
r.append(Tag.objects.get_or_create(
|
||||||
slug=t.lower(),
|
slug=slugify(t),
|
||||||
defaults={"name": t}
|
defaults={"name": t}
|
||||||
)[0])
|
)[0])
|
||||||
return tuple(r)
|
return tuple(r)
|
||||||
|
|||||||
@@ -1,23 +1,31 @@
|
|||||||
import logging
|
import logging
|
||||||
import shutil
|
import os
|
||||||
import tempfile
|
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
import dateparser
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
# This regular expression will try to find dates in the document at
|
# This regular expression will try to find dates in the document at
|
||||||
# hand and will match the following formats:
|
# hand and will match the following formats:
|
||||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||||
DATE_REGEX = re.compile(
|
DATE_REGEX = re.compile(
|
||||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||||
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -32,6 +40,9 @@ class DocumentParser:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
SCRATCH = settings.SCRATCH_DIR
|
SCRATCH = settings.SCRATCH_DIR
|
||||||
|
DATE_ORDER = settings.DATE_ORDER
|
||||||
|
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
|
||||||
|
OPTIPNG = settings.OPTIPNG_BINARY
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.document_path = path
|
self.document_path = path
|
||||||
@@ -45,6 +56,19 @@ class DocumentParser:
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def optimise_thumbnail(self, in_path):
|
||||||
|
|
||||||
|
out_path = os.path.join(self.tempdir, "optipng.png")
|
||||||
|
|
||||||
|
args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
|
||||||
|
if not subprocess.Popen(args).wait() == 0:
|
||||||
|
raise ParseError("Optipng failed at {}".format(args))
|
||||||
|
|
||||||
|
return out_path
|
||||||
|
|
||||||
|
def get_optimised_thumbnail(self):
|
||||||
|
return self.optimise_thumbnail(self.get_thumbnail())
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
"""
|
"""
|
||||||
Returns the text from the document and only the text.
|
Returns the text from the document and only the text.
|
||||||
@@ -55,7 +79,82 @@ class DocumentParser:
|
|||||||
"""
|
"""
|
||||||
Returns the date of the document.
|
Returns the date of the document.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
|
||||||
|
def __parser(ds, date_order):
|
||||||
|
"""
|
||||||
|
Call dateparser.parse with a particular date ordering
|
||||||
|
"""
|
||||||
|
return dateparser.parse(
|
||||||
|
ds,
|
||||||
|
settings={
|
||||||
|
"DATE_ORDER": date_order,
|
||||||
|
"PREFER_DAY_OF_MONTH": "first",
|
||||||
|
"RETURN_AS_TIMEZONE_AWARE":
|
||||||
|
True
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
date = None
|
||||||
|
date_string = None
|
||||||
|
|
||||||
|
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||||
|
title = os.path.basename(self.document_path)
|
||||||
|
|
||||||
|
# if filename date parsing is enabled, search there first:
|
||||||
|
if self.FILENAME_DATE_ORDER:
|
||||||
|
self.log("info", "Checking document title for date")
|
||||||
|
for m in re.finditer(DATE_REGEX, title):
|
||||||
|
date_string = m.group(0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = __parser(date_string, self.FILENAME_DATE_ORDER)
|
||||||
|
except TypeError:
|
||||||
|
# Skip all matches that do not parse to a proper date
|
||||||
|
continue
|
||||||
|
|
||||||
|
if date is not None and next_year > date.year > 1900:
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Detected document date {} based on string {} "
|
||||||
|
"from document title"
|
||||||
|
"".format(date.isoformat(), date_string)
|
||||||
|
)
|
||||||
|
return date
|
||||||
|
|
||||||
|
try:
|
||||||
|
# getting text after checking filename will save time if only
|
||||||
|
# looking at the filename instead of the whole text
|
||||||
|
text = self.get_text()
|
||||||
|
except ParseError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Iterate through all regex matches in text and try to parse the date
|
||||||
|
for m in re.finditer(DATE_REGEX, text):
|
||||||
|
date_string = m.group(0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = __parser(date_string, self.DATE_ORDER)
|
||||||
|
except TypeError:
|
||||||
|
# Skip all matches that do not parse to a proper date
|
||||||
|
continue
|
||||||
|
|
||||||
|
if date is not None and next_year > date.year > 1900:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
date = None
|
||||||
|
|
||||||
|
if date is not None:
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Detected document date {} based on string {}".format(
|
||||||
|
date.isoformat(),
|
||||||
|
date_string
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.log("info", "Unable to detect date for document")
|
||||||
|
|
||||||
|
return date
|
||||||
|
|
||||||
def log(self, level, message):
|
def log(self, level, message):
|
||||||
getattr(self.logger, level)(message, extra={
|
getattr(self.logger, level)(message, extra={
|
||||||
|
|||||||
@@ -7,7 +7,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
|||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Correspondent
|
model = Correspondent
|
||||||
fields = ("id", "slug", "name")
|
fields = (
|
||||||
|
"id",
|
||||||
|
"slug",
|
||||||
|
"name",
|
||||||
|
"match",
|
||||||
|
"matching_algorithm",
|
||||||
|
"is_insensitive"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||||
@@ -15,7 +22,14 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
|
|||||||
class Meta:
|
class Meta:
|
||||||
model = Tag
|
model = Tag
|
||||||
fields = (
|
fields = (
|
||||||
"id", "slug", "name", "colour", "match", "matching_algorithm")
|
"id",
|
||||||
|
"slug",
|
||||||
|
"name",
|
||||||
|
"colour",
|
||||||
|
"match",
|
||||||
|
"matching_algorithm",
|
||||||
|
"is_insensitive"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
||||||
@@ -46,6 +60,7 @@ class DocumentSerializer(serializers.ModelSerializer):
|
|||||||
"checksum",
|
"checksum",
|
||||||
"created",
|
"created",
|
||||||
"modified",
|
"modified",
|
||||||
|
"added",
|
||||||
"file_name",
|
"file_name",
|
||||||
"download_url",
|
"download_url",
|
||||||
"thumbnail_url",
|
"thumbnail_url",
|
||||||
|
|||||||
66
src/documents/static/js/colours.js
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
// The following jQuery snippet will add a small square next to the selection
|
||||||
|
// drop-down on the `Add tag` page that will update to show the selected tag
|
||||||
|
// color as the drop-down value is changed.
|
||||||
|
|
||||||
|
django.jQuery(document).ready(function(){
|
||||||
|
|
||||||
|
if (django.jQuery("#id_colour").length) {
|
||||||
|
|
||||||
|
let colour;
|
||||||
|
let colour_num;
|
||||||
|
|
||||||
|
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||||
|
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||||
|
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
|
||||||
|
|
||||||
|
django.jQuery('.colour_square').css({
|
||||||
|
'float': 'left',
|
||||||
|
'width': '20px',
|
||||||
|
'height': '20px',
|
||||||
|
'margin': '5px',
|
||||||
|
'border': '1px solid rgba(0, 0, 0, .2)',
|
||||||
|
'background': colour
|
||||||
|
});
|
||||||
|
|
||||||
|
django.jQuery('#id_colour').change(function () {
|
||||||
|
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||||
|
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||||
|
django.jQuery('.colour_square').css({'background': colour});
|
||||||
|
});
|
||||||
|
|
||||||
|
} else if (django.jQuery("select[id*='colour']").length) {
|
||||||
|
|
||||||
|
django.jQuery('select[id*="-colour"]').each(function (index, element) {
|
||||||
|
let id;
|
||||||
|
let loop_colour_num;
|
||||||
|
let loop_colour;
|
||||||
|
|
||||||
|
id = "colour_square_" + index;
|
||||||
|
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
|
||||||
|
|
||||||
|
loop_colour_num = django.jQuery(element).val() - 1;
|
||||||
|
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||||
|
|
||||||
|
django.jQuery("<style type='text/css'>\
|
||||||
|
.colour_square{ \
|
||||||
|
float: left; \
|
||||||
|
width: 20px; \
|
||||||
|
height: 20px; \
|
||||||
|
margin: 5px; \
|
||||||
|
border: 1px solid rgba(0,0,0,.2); \
|
||||||
|
} </style>").appendTo("head");
|
||||||
|
django.jQuery('#' + id).css({'background': loop_colour});
|
||||||
|
|
||||||
|
console.log(id, loop_colour_num, loop_colour);
|
||||||
|
|
||||||
|
django.jQuery(element).change(function () {
|
||||||
|
loop_colour_num = django.jQuery(element).val() - 1;
|
||||||
|
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||||
|
django.jQuery('#' + id).css({'background': loop_colour});
|
||||||
|
console.log('#' + id, loop_colour)
|
||||||
|
});
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
@@ -3,10 +3,63 @@
|
|||||||
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
|
{# NOTE: This should probably be extending base.html. See CSS comment below details. #}
|
||||||
|
|
||||||
|
|
||||||
|
{% load static %}
|
||||||
{% load custom_css from customisation %}
|
{% load custom_css from customisation %}
|
||||||
{% load custom_js from customisation %}
|
{% load custom_js from customisation %}
|
||||||
|
|
||||||
|
|
||||||
|
{% block extrahead %}
|
||||||
|
<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
|
||||||
|
<style>
|
||||||
|
#header {
|
||||||
|
background-color: #90a9b7;
|
||||||
|
line-height: inherit;
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
#branding h1 {
|
||||||
|
font-weight: inherit;
|
||||||
|
font-size: inherit;
|
||||||
|
}
|
||||||
|
.button,
|
||||||
|
.button:active,
|
||||||
|
.button:focus,
|
||||||
|
.button:hover,
|
||||||
|
a.button,
|
||||||
|
.submit-row input,
|
||||||
|
input[type="submit"],
|
||||||
|
input[type="submit"]:active,
|
||||||
|
input[type="submit"]:focus,
|
||||||
|
input[type="submit"]:hover,
|
||||||
|
input[type="button"],
|
||||||
|
input[type="button"]:active,
|
||||||
|
input[type="button"]:focus,
|
||||||
|
input[type="button"]:hover {
|
||||||
|
background-color: #074f57;
|
||||||
|
}
|
||||||
|
.module h2,
|
||||||
|
.module caption,
|
||||||
|
.inline-group h2 {
|
||||||
|
background-color: #90a9b7;
|
||||||
|
}
|
||||||
|
div.breadcrumbs {
|
||||||
|
background-color: #077187;
|
||||||
|
}
|
||||||
|
.module h2,
|
||||||
|
.module caption,
|
||||||
|
.inline-group h2 {
|
||||||
|
background-color: #077187;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
|
{% block branding %}
|
||||||
|
<h1 id="site-name">
|
||||||
|
<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
|
||||||
|
</h1>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
{% block blockbots %}
|
{% block blockbots %}
|
||||||
|
|
||||||
{% comment %}
|
{% comment %}
|
||||||
|
|||||||
@@ -28,7 +28,7 @@
|
|||||||
}
|
}
|
||||||
.result .header {
|
.result .header {
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
background-color: #79AEC8;
|
background-color: #90a9b7;
|
||||||
position: relative;
|
position: relative;
|
||||||
}
|
}
|
||||||
.result .header .checkbox {
|
.result .header .checkbox {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest
|
|||||||
from django.views.generic import DetailView, FormView, TemplateView
|
from django.views.generic import DetailView, FormView, TemplateView
|
||||||
from django_filters.rest_framework import DjangoFilterBackend
|
from django_filters.rest_framework import DjangoFilterBackend
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from django.utils import cache
|
||||||
|
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from paperless.mixins import SessionOrBasicAuthMixin
|
from paperless.mixins import SessionOrBasicAuthMixin
|
||||||
@@ -56,10 +57,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if self.kwargs["kind"] == "thumb":
|
if self.kwargs["kind"] == "thumb":
|
||||||
return HttpResponse(
|
response = HttpResponse(
|
||||||
self._get_raw_data(self.object.thumbnail_file),
|
self._get_raw_data(self.object.thumbnail_file),
|
||||||
content_type=content_types[Document.TYPE_PNG]
|
content_type=content_types[Document.TYPE_PNG]
|
||||||
)
|
)
|
||||||
|
cache.patch_cache_control(response, max_age=31536000, private=True)
|
||||||
|
return response
|
||||||
|
|
||||||
response = HttpResponse(
|
response = HttpResponse(
|
||||||
self._get_raw_data(self.object.source_file),
|
self._get_raw_data(self.object.source_file),
|
||||||
@@ -130,7 +133,7 @@ class DocumentViewSet(RetrieveModelMixin,
|
|||||||
filter_class = DocumentFilterSet
|
filter_class = DocumentFilterSet
|
||||||
search_fields = ("title", "correspondent__name", "content")
|
search_fields = ("title", "correspondent__name", "content")
|
||||||
ordering_fields = (
|
ordering_fields = (
|
||||||
"id", "title", "correspondent__name", "created", "modified")
|
"id", "title", "correspondent__name", "created", "modified", "added")
|
||||||
|
|
||||||
|
|
||||||
class LogViewSet(ReadOnlyModelViewSet):
|
class LogViewSet(ReadOnlyModelViewSet):
|
||||||
|
|||||||
@@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
|
|||||||
error = "Paperless can't find {}. Without it, consumption is impossible."
|
error = "Paperless can't find {}. Without it, consumption is impossible."
|
||||||
hint = "Either it's not in your ${PATH} or it's not installed."
|
hint = "Either it's not in your ${PATH} or it's not installed."
|
||||||
|
|
||||||
binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
|
binaries = (
|
||||||
|
settings.CONVERT_BINARY,
|
||||||
|
settings.OPTIPNG_BINARY,
|
||||||
|
settings.UNPAPER_BINARY,
|
||||||
|
"tesseract"
|
||||||
|
)
|
||||||
|
|
||||||
check_messages = []
|
check_messages = []
|
||||||
for binary in binaries:
|
for binary in binaries:
|
||||||
|
|||||||
@@ -1,15 +1,20 @@
|
|||||||
|
from django.contrib.auth.models import User as DjangoUser
|
||||||
|
|
||||||
|
|
||||||
class User:
|
class User:
|
||||||
"""
|
"""
|
||||||
This is a dummy django User used with our middleware to disable
|
This is a dummy django User used with our middleware to disable
|
||||||
login authentication if that is configured in paperless.conf
|
login authentication if that is configured in paperless.conf
|
||||||
"""
|
"""
|
||||||
|
|
||||||
is_superuser = True
|
is_superuser = True
|
||||||
is_active = True
|
is_active = True
|
||||||
is_staff = True
|
is_staff = True
|
||||||
is_authenticated = True
|
is_authenticated = True
|
||||||
|
|
||||||
# Must be -1 to avoid colliding with real user ID's (which start at 1)
|
@property
|
||||||
id = -1
|
def id(self):
|
||||||
|
return DjangoUser.objects.order_by("pk").first().pk
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pk(self):
|
def pk(self):
|
||||||
@@ -17,9 +22,9 @@ class User:
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
NOTE: These are here as a hack instead of being in the User definition
|
NOTE: These are here as a hack instead of being in the User definition
|
||||||
above due to the way pycodestyle handles lamdbdas.
|
NOTE: above due to the way pycodestyle handles lamdbdas.
|
||||||
See https://github.com/PyCQA/pycodestyle/issues/379 for more.
|
NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
User.has_module_perms = lambda *_: True
|
User.has_module_perms = lambda *_: True
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ INSTALLED_APPS = [
|
|||||||
"corsheaders",
|
"corsheaders",
|
||||||
"django_extensions",
|
"django_extensions",
|
||||||
|
|
||||||
|
"paperless",
|
||||||
"documents.apps.DocumentsConfig",
|
"documents.apps.DocumentsConfig",
|
||||||
"reminders.apps.RemindersConfig",
|
"reminders.apps.RemindersConfig",
|
||||||
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
"paperless_tesseract.apps.PaperlessTesseractConfig",
|
||||||
@@ -82,6 +83,7 @@ INSTALLED_APPS = [
|
|||||||
"rest_framework",
|
"rest_framework",
|
||||||
"crispy_forms",
|
"crispy_forms",
|
||||||
"django_filters",
|
"django_filters",
|
||||||
|
"djangoql",
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -152,6 +154,10 @@ if os.getenv("PAPERLESS_DBUSER"):
|
|||||||
}
|
}
|
||||||
if os.getenv("PAPERLESS_DBPASS"):
|
if os.getenv("PAPERLESS_DBPASS"):
|
||||||
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
||||||
|
if os.getenv("PAPERLESS_DBHOST"):
|
||||||
|
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
|
||||||
|
if os.getenv("PAPERLESS_DBPORT"):
|
||||||
|
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
||||||
|
|
||||||
|
|
||||||
# Password validation
|
# Password validation
|
||||||
@@ -199,6 +205,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
|
|||||||
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
||||||
|
|
||||||
|
|
||||||
|
# Other
|
||||||
|
|
||||||
|
# Disable Django's artificial limit on the number of form fields to submit at
|
||||||
|
# once. This is a protection against overloading the server, but since this is
|
||||||
|
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
|
||||||
|
# of log entries outweight the benefits of such a safeguard.
|
||||||
|
|
||||||
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
||||||
|
|
||||||
|
|
||||||
# Paperless-specific stuff
|
# Paperless-specific stuff
|
||||||
# You shouldn't have to edit any of these values. Rather, you can set these
|
# You shouldn't have to edit any of these values. Rather, you can set these
|
||||||
# values in /etc/paperless.conf instead.
|
# values in /etc/paperless.conf instead.
|
||||||
@@ -247,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|||||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
||||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
|
||||||
|
|
||||||
|
# OptiPNG
|
||||||
|
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
|
||||||
|
|
||||||
# Unpaper
|
# Unpaper
|
||||||
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")
|
||||||
|
|
||||||
@@ -293,6 +312,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
|||||||
|
|
||||||
# Specify the default date order (for autodetected dates)
|
# Specify the default date order (for autodetected dates)
|
||||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||||
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||||
|
|
||||||
# Specify for how many years a correspondent is considered recent. Recent
|
# Specify for how many years a correspondent is considered recent. Recent
|
||||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||||
|
|||||||
BIN
src/paperless/static/paperless/img/favicon.ico
Normal file
|
After Width: | Height: | Size: 108 KiB |
BIN
src/paperless/static/paperless/img/logo-dark.png
Normal file
|
After Width: | Height: | Size: 6.2 KiB |
BIN
src/paperless/static/paperless/img/logo-light.png
Normal file
|
After Width: | Height: | Size: 8.6 KiB |
@@ -6,6 +6,7 @@ from django.views.decorators.csrf import csrf_exempt
|
|||||||
from django.views.generic import RedirectView
|
from django.views.generic import RedirectView
|
||||||
from rest_framework.routers import DefaultRouter
|
from rest_framework.routers import DefaultRouter
|
||||||
|
|
||||||
|
from paperless.views import FaviconView
|
||||||
from documents.views import (
|
from documents.views import (
|
||||||
CorrespondentViewSet,
|
CorrespondentViewSet,
|
||||||
DocumentViewSet,
|
DocumentViewSet,
|
||||||
@@ -44,6 +45,9 @@ urlpatterns = [
|
|||||||
# File uploads
|
# File uploads
|
||||||
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
|
url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),
|
||||||
|
|
||||||
|
# Favicon
|
||||||
|
url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
|
||||||
|
|
||||||
# The Django admin
|
# The Django admin
|
||||||
url(r"admin/", admin.site.urls),
|
url(r"admin/", admin.site.urls),
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = (2, 3, 0)
|
__version__ = (2, 6, 1)
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from django.http import HttpResponse
|
||||||
|
from django.views.generic import View
|
||||||
from rest_framework.pagination import PageNumberPagination
|
from rest_framework.pagination import PageNumberPagination
|
||||||
|
|
||||||
|
|
||||||
@@ -5,3 +9,17 @@ class StandardPagination(PageNumberPagination):
|
|||||||
page_size = 25
|
page_size = 25
|
||||||
page_size_query_param = "page-size"
|
page_size_query_param = "page-size"
|
||||||
max_page_size = 100000
|
max_page_size = 100000
|
||||||
|
|
||||||
|
|
||||||
|
class FaviconView(View):
|
||||||
|
|
||||||
|
def get(self, request, *args, **kwargs):
|
||||||
|
favicon = os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"static",
|
||||||
|
"paperless",
|
||||||
|
"img",
|
||||||
|
"favicon.ico"
|
||||||
|
)
|
||||||
|
with open(favicon, "rb") as f:
|
||||||
|
return HttpResponse(f, content_type="image/x-icon")
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import re
|
|||||||
import subprocess
|
import subprocess
|
||||||
from multiprocessing.pool import Pool
|
from multiprocessing.pool import Pool
|
||||||
|
|
||||||
import dateparser
|
|
||||||
import langdetect
|
import langdetect
|
||||||
import pyocr
|
import pyocr
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -14,7 +13,7 @@ from pyocr.libtesseract.tesseract_raw import \
|
|||||||
from pyocr.tesseract import TesseractError
|
from pyocr.tesseract import TesseractError
|
||||||
|
|
||||||
import pdftotext
|
import pdftotext
|
||||||
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
from documents.parsers import DocumentParser, ParseError
|
||||||
|
|
||||||
from .languages import ISO639
|
from .languages import ISO639
|
||||||
|
|
||||||
@@ -33,7 +32,6 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
|
||||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||||
UNPAPER = settings.UNPAPER_BINARY
|
UNPAPER = settings.UNPAPER_BINARY
|
||||||
DATE_ORDER = settings.DATE_ORDER
|
|
||||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||||
OCR_ALWAYS = settings.OCR_ALWAYS
|
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||||
|
|
||||||
@@ -46,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
The thumbnail of a PDF is just a 500px wide image of the first page.
|
The thumbnail of a PDF is just a 500px wide image of the first page.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
out_path = os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
|
# Run convert to get a decent thumbnail
|
||||||
run_convert(
|
run_convert(
|
||||||
self.CONVERT,
|
self.CONVERT,
|
||||||
"-scale", "500x5000",
|
"-scale", "500x5000",
|
||||||
"-alpha", "remove",
|
"-alpha", "remove",
|
||||||
"{}[0]".format(self.document_path),
|
"{}[0]".format(self.document_path),
|
||||||
os.path.join(self.tempdir, "convert.png")
|
out_path
|
||||||
)
|
)
|
||||||
|
|
||||||
return os.path.join(self.tempdir, "convert.png")
|
return out_path
|
||||||
|
|
||||||
def _is_ocred(self):
|
def _is_ocred(self):
|
||||||
|
|
||||||
@@ -152,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
)
|
)
|
||||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||||
return raw_text
|
return raw_text
|
||||||
raise OCRError("Language detection failed")
|
error_msg = ("Language detection failed. Set "
|
||||||
|
"PAPERLESS_FORGIVING_OCR in config file to continue "
|
||||||
|
"anyway.")
|
||||||
|
raise OCRError(error_msg)
|
||||||
|
|
||||||
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
||||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||||
@@ -202,40 +206,6 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
|
text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def get_date(self):
|
|
||||||
date = None
|
|
||||||
datestring = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
text = self.get_text()
|
|
||||||
except ParseError as e:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Iterate through all regex matches and try to parse the date
|
|
||||||
for m in re.finditer(DATE_REGEX, text):
|
|
||||||
datestring = m.group(0)
|
|
||||||
|
|
||||||
try:
|
|
||||||
date = dateparser.parse(
|
|
||||||
datestring,
|
|
||||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
|
||||||
'PREFER_DAY_OF_MONTH': 'first',
|
|
||||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
|
||||||
except TypeError:
|
|
||||||
# Skip all matches that do not parse to a proper date
|
|
||||||
continue
|
|
||||||
|
|
||||||
if date is not None:
|
|
||||||
break
|
|
||||||
|
|
||||||
if date is not None:
|
|
||||||
self.log("info", "Detected document date " + date.isoformat() +
|
|
||||||
" based on string " + datestring)
|
|
||||||
else:
|
|
||||||
self.log("info", "Unable to detect date for document")
|
|
||||||
|
|
||||||
return date
|
|
||||||
|
|
||||||
|
|
||||||
def run_convert(*args):
|
def run_convert(*args):
|
||||||
|
|
||||||
@@ -251,7 +221,8 @@ def run_convert(*args):
|
|||||||
|
|
||||||
def run_unpaper(args):
|
def run_unpaper(args):
|
||||||
unpaper, pnm = args
|
unpaper, pnm = args
|
||||||
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
|
command_args = (unpaper, "--overwrite", pnm,
|
||||||
|
pnm.replace(".pnm", ".unpaper.pnm"))
|
||||||
if not subprocess.Popen(command_args).wait() == 0:
|
if not subprocess.Popen(command_args).wait() == 0:
|
||||||
raise ParseError("Unpaper failed at {}".format(command_args))
|
raise ParseError("Unpaper failed at {}".format(command_args))
|
||||||
|
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 135 KiB |
|
Before Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 136 KiB |
@@ -8,6 +8,7 @@ from dateutil import tz
|
|||||||
from django.test import TestCase
|
from django.test import TestCase
|
||||||
|
|
||||||
from ..parsers import RasterisedDocumentParser
|
from ..parsers import RasterisedDocumentParser
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
class TestDate(TestCase):
|
class TestDate(TestCase):
|
||||||
@@ -15,73 +16,67 @@ class TestDate(TestCase):
|
|||||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
|
||||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||||
|
|
||||||
|
MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH" # NOQA: E501
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
os.makedirs(self.SCRATCH, exist_ok=True)
|
os.makedirs(self.SCRATCH, exist_ok=True)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
shutil.rmtree(self.SCRATCH)
|
shutil.rmtree(self.SCRATCH)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_1(self):
|
def test_date_format_1(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = "lorem ipsum 130218 lorem ipsum"
|
document._text = "lorem ipsum 130218 lorem ipsum"
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_2(self):
|
def test_date_format_2(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = "lorem ipsum 2018 lorem ipsum"
|
document._text = "lorem ipsum 2018 lorem ipsum"
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_3(self):
|
def test_date_format_3(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = "lorem ipsum 20180213 lorem ipsum"
|
document._text = "lorem ipsum 20180213 lorem ipsum"
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_4(self):
|
def test_date_format_4(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 2, 13, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_5(self):
|
def test_date_format_5(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = (
|
document._text = (
|
||||||
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
|
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
||||||
|
"ipsum"
|
||||||
|
)
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 2, 13, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_6(self):
|
def test_date_format_6(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
@@ -98,10 +93,7 @@ class TestDate(TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_7(self):
|
def test_date_format_7(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
@@ -110,277 +102,83 @@ class TestDate(TestCase):
|
|||||||
"März 2019\n"
|
"März 2019\n"
|
||||||
"lorem ipsum"
|
"lorem ipsum"
|
||||||
)
|
)
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2019, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_8(self):
|
def test_date_format_8(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = ("lorem ipsum\n"
|
document._text = (
|
||||||
"Wohnort\n"
|
"lorem ipsum\n"
|
||||||
"3100\n"
|
"Wohnort\n"
|
||||||
"IBAN\n"
|
"3100\n"
|
||||||
"AT87 4534\n"
|
"IBAN\n"
|
||||||
"1234\n"
|
"AT87 4534\n"
|
||||||
"1234 5678\n"
|
"1234\n"
|
||||||
"BIC\n"
|
"1234 5678\n"
|
||||||
"lorem ipsum\n"
|
"BIC\n"
|
||||||
"März 2020")
|
"lorem ipsum\n"
|
||||||
self.assertEqual(document.get_date(),
|
"März 2020"
|
||||||
datetime.datetime(2020, 3, 1, 0, 0,
|
)
|
||||||
tzinfo=tz.tzutc()))
|
self.assertEqual(
|
||||||
|
document.get_date(),
|
||||||
|
datetime.datetime(
|
||||||
|
2020, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_date_format_9(self):
|
def test_date_format_9(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = ("lorem ipsum\n"
|
document._text = (
|
||||||
"27. Nullmonth 2020\n"
|
"lorem ipsum\n"
|
||||||
"März 2020\n"
|
"27. Nullmonth 2020\n"
|
||||||
"lorem ipsum")
|
"März 2020\n"
|
||||||
self.assertEqual(document.get_date(),
|
"lorem ipsum"
|
||||||
datetime.datetime(2020, 3, 1, 0, 0,
|
)
|
||||||
tzinfo=tz.tzutc()))
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_1_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2020, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||||
SCRATCH
|
return_value="01-07-0590 00:00:00"
|
||||||
)
|
)
|
||||||
def test_get_text_1_png(self):
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
def test_crazy_date_past(self, *args):
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser("/dev/null")
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertIsNone(document.get_date())
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||||
SCRATCH
|
return_value="01-07-2350 00:00:00"
|
||||||
)
|
)
|
||||||
def test_get_text_2_pdf(self):
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
def test_crazy_date_future(self, *args):
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser("/dev/null")
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertIsNone(document.get_date())
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
|
||||||
SCRATCH
|
return_value="01-07-0590 00:00:00"
|
||||||
)
|
)
|
||||||
def test_get_text_2_png(self):
|
@mock.patch(MOCK_SCRATCH, SCRATCH)
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
def test_crazy_date_past(self, *args):
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser("/dev/null")
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertIsNone(document.get_date())
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_3_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_3_png(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), False)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_4_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_4_png(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), False)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_5_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_5_png(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), False)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_6_pdf_us(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
document.DATE_ORDER = "MDY"
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_6_png_us(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
document.DATE_ORDER = "MDY"
|
|
||||||
self.assertEqual(document._is_ocred(), False)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_6_pdf_eu(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(document.get_date(), None)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_6_png_eu(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), False)
|
|
||||||
self.assertEqual(document.get_date(), None)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_7_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_8_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch(
|
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
|
||||||
SCRATCH
|
|
||||||
)
|
|
||||||
def test_get_text_9_pdf(self):
|
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
|
|
||||||
document = RasterisedDocumentParser(input_file)
|
|
||||||
document.get_text()
|
|
||||||
self.assertEqual(document._is_ocred(), True)
|
|
||||||
self.assertEqual(
|
|
||||||
document.get_date(),
|
|
||||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
import dateparser
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
from documents.parsers import DocumentParser, ParseError, DATE_REGEX
|
from documents.parsers import DocumentParser, ParseError
|
||||||
|
|
||||||
|
|
||||||
class TextDocumentParser(DocumentParser):
|
class TextDocumentParser(DocumentParser):
|
||||||
@@ -16,7 +14,6 @@ class TextDocumentParser(DocumentParser):
|
|||||||
CONVERT = settings.CONVERT_BINARY
|
CONVERT = settings.CONVERT_BINARY
|
||||||
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
|
||||||
UNPAPER = settings.UNPAPER_BINARY
|
UNPAPER = settings.UNPAPER_BINARY
|
||||||
DATE_ORDER = settings.DATE_ORDER
|
|
||||||
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
|
||||||
OCR_ALWAYS = settings.OCR_ALWAYS
|
OCR_ALWAYS = settings.OCR_ALWAYS
|
||||||
|
|
||||||
@@ -26,7 +23,7 @@ class TextDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
def get_thumbnail(self):
|
def get_thumbnail(self):
|
||||||
"""
|
"""
|
||||||
The thumbnail of a txt is just a 500px wide image of the text
|
The thumbnail of a text file is just a 500px wide image of the text
|
||||||
rendered onto a letter-sized page.
|
rendered onto a letter-sized page.
|
||||||
"""
|
"""
|
||||||
# The below is heavily cribbed from https://askubuntu.com/a/590951
|
# The below is heavily cribbed from https://askubuntu.com/a/590951
|
||||||
@@ -35,7 +32,7 @@ class TextDocumentParser(DocumentParser):
|
|||||||
text_color = "black" # text color
|
text_color = "black" # text color
|
||||||
psize = [500, 647] # icon size
|
psize = [500, 647] # icon size
|
||||||
n_lines = 50 # number of lines to show
|
n_lines = 50 # number of lines to show
|
||||||
output_file = os.path.join(self.tempdir, "convert-txt.png")
|
out_path = os.path.join(self.tempdir, "convert.png")
|
||||||
|
|
||||||
temp_bg = os.path.join(self.tempdir, "bg.png")
|
temp_bg = os.path.join(self.tempdir, "bg.png")
|
||||||
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
temp_txlayer = os.path.join(self.tempdir, "tx.png")
|
||||||
@@ -46,9 +43,13 @@ class TextDocumentParser(DocumentParser):
|
|||||||
work_size = ",".join([str(n - 1) for n in psize])
|
work_size = ",".join([str(n - 1) for n in psize])
|
||||||
r = str(round(psize[0] / 10))
|
r = str(round(psize[0] / 10))
|
||||||
rounded = ",".join([r, r])
|
rounded = ",".join([r, r])
|
||||||
run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
|
run_command(
|
||||||
'"fill ', bg_color, ' roundrectangle 0,0,',
|
self.CONVERT,
|
||||||
work_size, ",", rounded, '" ', temp_bg)
|
"-size ", picsize,
|
||||||
|
' xc:none -draw ',
|
||||||
|
'"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ', # NOQA: E501
|
||||||
|
temp_bg
|
||||||
|
)
|
||||||
|
|
||||||
def read_text():
|
def read_text():
|
||||||
with open(self.document_path, 'r') as src:
|
with open(self.document_path, 'r') as src:
|
||||||
@@ -57,22 +58,29 @@ class TextDocumentParser(DocumentParser):
|
|||||||
return text.replace('"', "'")
|
return text.replace('"', "'")
|
||||||
|
|
||||||
def create_txlayer():
|
def create_txlayer():
|
||||||
run_command(self.CONVERT,
|
run_command(
|
||||||
"-background none",
|
self.CONVERT,
|
||||||
"-fill",
|
"-background none",
|
||||||
text_color,
|
"-fill",
|
||||||
"-pointsize", "12",
|
text_color,
|
||||||
"-border 4 -bordercolor none",
|
"-pointsize", "12",
|
||||||
"-size ", txsize,
|
"-border 4 -bordercolor none",
|
||||||
' caption:"', read_text(), '" ',
|
"-size ", txsize,
|
||||||
temp_txlayer)
|
' caption:"', read_text(), '" ',
|
||||||
|
temp_txlayer
|
||||||
|
)
|
||||||
|
|
||||||
create_txlayer()
|
create_txlayer()
|
||||||
create_bg()
|
create_bg()
|
||||||
run_command(self.CONVERT, temp_bg, temp_txlayer,
|
run_command(
|
||||||
"-background None -layers merge ", output_file)
|
self.CONVERT,
|
||||||
|
temp_bg,
|
||||||
|
temp_txlayer,
|
||||||
|
"-background None -layers merge ",
|
||||||
|
out_path
|
||||||
|
)
|
||||||
|
|
||||||
return output_file
|
return out_path
|
||||||
|
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
|
|
||||||
@@ -84,40 +92,6 @@ class TextDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
return self._text
|
return self._text
|
||||||
|
|
||||||
def get_date(self):
|
|
||||||
date = None
|
|
||||||
datestring = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
text = self.get_text()
|
|
||||||
except ParseError as e:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Iterate through all regex matches and try to parse the date
|
|
||||||
for m in re.finditer(DATE_REGEX, text):
|
|
||||||
datestring = m.group(0)
|
|
||||||
|
|
||||||
try:
|
|
||||||
date = dateparser.parse(
|
|
||||||
datestring,
|
|
||||||
settings={'DATE_ORDER': self.DATE_ORDER,
|
|
||||||
'PREFER_DAY_OF_MONTH': 'first',
|
|
||||||
'RETURN_AS_TIMEZONE_AWARE': True})
|
|
||||||
except TypeError:
|
|
||||||
# Skip all matches that do not parse to a proper date
|
|
||||||
continue
|
|
||||||
|
|
||||||
if date is not None:
|
|
||||||
break
|
|
||||||
|
|
||||||
if date is not None:
|
|
||||||
self.log("info", "Detected document date " + date.isoformat() +
|
|
||||||
" based on string " + datestring)
|
|
||||||
else:
|
|
||||||
self.log("info", "Unable to detect date for document")
|
|
||||||
|
|
||||||
return date
|
|
||||||
|
|
||||||
|
|
||||||
def run_command(*args):
|
def run_command(*args):
|
||||||
environment = os.environ.copy()
|
environment = os.environ.copy()
|
||||||
|
|||||||
19
src/reminders/migrations/0002_auto_20181007_1420.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('reminders', '0001_initial'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='reminder',
|
||||||
|
name='document',
|
||||||
|
field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='documents.Document'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -4,7 +4,6 @@ from django.db import models
|
|||||||
class Reminder(models.Model):
|
class Reminder(models.Model):
|
||||||
|
|
||||||
document = models.ForeignKey(
|
document = models.ForeignKey(
|
||||||
"documents.Document", on_delete=models.PROTECT
|
"documents.Document", on_delete=models.PROTECT)
|
||||||
)
|
|
||||||
date = models.DateTimeField()
|
date = models.DateTimeField()
|
||||||
note = models.TextField(blank=True)
|
note = models.TextField(blank=True)
|
||||||
|
|||||||
@@ -17,6 +17,5 @@ deps=pycodestyle
|
|||||||
|
|
||||||
[testenv:doc]
|
[testenv:doc]
|
||||||
deps =
|
deps =
|
||||||
-r{toxinidir}/../requirements.txt
|
-r {toxinidir}/../requirements.txt
|
||||||
sphinx
|
|
||||||
commands=sphinx-build -b html ../docs ../docs/_build -W
|
commands=sphinx-build -b html ../docs ../docs/_build -W
|
||||||
|
|||||||