Ignore sqlite3-journal files too

Fix missing links
Merge branch 'sbrunner-dql'
2025-12-14 23:21:18 +00:00 · 2019-01-27 13:48:05 +00:00 · 2019-01-27 13:47:40 +00:00 · 2019-01-27 13:42:58 +00:00 · 2019-01-27 13:39:56 +00:00 · 2019-01-27 13:06:46 +00:00
94 changed files with 6378 additions and 1085 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,28 @@
+# EditorConfig: http://EditorConfig.org
+
+root = true
+
+[*]
+indent_style = tab
+indent_size = 2
+insert_final_newline = true
+trim_trailing_whitespace = true
+end_of_line = lf
+charset = utf-8
+max_line_length = 79
+
+[{*.html,*.css,*.js}]
+max_line_length = off
+
+[*.py]
+indent_size = 4
+indent_style = space
+
+[*.yml]
+indent_style = space
+
+# Tests don't get a line width restriction.  It's still a good idea to follow
+# the 79 character rule, but in the interests of clarity, tests often need to
+# violate it.
+[**/test_*.py]
+max_line_length = off
--- a/.gitignore
+++ b/.gitignore
@@ -66,6 +66,7 @@ media/overrides.js

 # Sqlite database
 db.sqlite3
+db.sqlite3-journal

 # PyCharm
 .idea
@@ -73,7 +74,6 @@ db.sqlite3
 # Other stuff that doesn't belong
 .virtualenv
 virtualenv
-.vagrant
 docker-compose.yml
 docker-compose.env

@@ -81,3 +81,5 @@ docker-compose.env
 scripts/import-for-development
 scripts/nuke

+# Static files collected by the collectstatic command
+./static/
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,19 +2,22 @@ language: python

 before_install:
 - sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr tesseract-ocr-eng
+- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr

 sudo: false

 matrix:
    include:
-        - python: 3.4
-        - python: 3.5
-        - python: 3.6
+        - python: "3.4"
+        - python: "3.5"
+        - python: "3.6"
+        - python: "3.7-dev"

 install:
-    - pip install --requirement requirements.txt
-    - pip install sphinx
+    - pip install --upgrade pip pipenv sphinx
+    - pipenv lock -r > requirements.txt
+    - pip install -r requirements.txt
+
 script:
    - cd src/
    - pytest --cov
@@ -22,4 +25,4 @@ script:
    - sphinx-build -b html ../docs ../docs/_build -W

 after_success:
-    - coveralls
+  - coveralls
--- a/20
+++ b/20
@@ -1,28 +1,28 @@
-FROM alpine:3.7
+FROM alpine:3.8

 LABEL maintainer="The Paperless Project https://github.com/danielquinn/paperless" \
      contributors="Guy Addadi <addadi@gmail.com>, Pit Kleyersburg <pitkley@googlemail.com>, \
        Sven Fischer <git-dev@linux4tw.de>"

-# Copy requirements file and init script
-COPY requirements.txt /usr/src/paperless/
+# Copy Pipfiles file and init script
+COPY Pipfile* /usr/src/paperless/
 COPY scripts/docker-entrypoint.sh /sbin/docker-entrypoint.sh

 # Set export and consumption directories
 ENV PAPERLESS_EXPORT_DIR=/export \
    PAPERLESS_CONSUMPTION_DIR=/consume

-# Install dependencies
-RUN apk --no-cache --update add \
-        python3 gnupg libmagic bash shadow curl \
-        sudo poppler tesseract-ocr imagemagick ghostscript unpaper && \
-    apk --no-cache add --virtual .build-dependencies \
-        python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
+
+RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
+        sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
+    apk add --virtual .build-dependencies \
+        python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
 # Install python dependencies
    python3 -m ensurepip && \
    rm -r /usr/lib/python*/ensurepip && \
    cd /usr/src/paperless && \
-    pip3 install --no-cache-dir -r requirements.txt && \
+    pip3 install --upgrade pip pipenv && \
+    pipenv install --system --deploy && \
 # Remove build dependencies
    apk del .build-dependencies && \
 # Create the consumption directory
--- a/7
+++ b/7
@@ -25,6 +25,8 @@ python-dateutil = "*"
 python-dotenv = "*"
 python-gnupg = "*"
 pytz = "*"
+sphinx = "*"
+tox = "*"
 pycodestyle = "*"
 pytest = "*"
 pytest-cov = "*"
@@ -32,9 +34,8 @@ pytest-django = "*"
 pytest-sugar = "*"
 pytest-env = "*"
 pytest-xdist = "*"
+psycopg2 = "*"
+djangoql = "*"

 [dev-packages]
 ipython = "*"
-sphinx = "*"
-tox = "*"
-
--- a/Pipfile.lock
+++ b/Pipfile.lock
--- a/README-de.md
+++ b/README-de.md
@@ -1,7 +1,6 @@
-*[English](README.md)*<br/>
-*[Greek](README-el.md)*
+[ [en](README.md) | de | [el](README-el.md) ]

-# Paperless
+![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)

 [![Dokumentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Danke](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

--- a/README-el.md
+++ b/README-el.md
@@ -1,7 +1,6 @@
-*[English](README.md)*<br/>
-*[German](README-de.md)*
+[ [en](README.md) | [de](README-de.md) | el ]

-# Paperless
+![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)

 [![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

--- a/README.md
+++ b/README.md
@@ -1,7 +1,6 @@
-*[German](README-de.md)*<br/>
-*[Greek](README-el.md)*
+[ en | [de](README-de.md) | [el](README-el.md) ]

-# Paperless
+![Paperless](https://raw.githubusercontent.com/danielquinn/paperless/master/src/paperless/static/paperless/img/logo-dark.png)

 [![Documentation](https://readthedocs.org/projects/paperless/badge/?version=latest)](https://paperless.readthedocs.org/) [![Chat](https://badges.gitter.im/danielquinn/paperless.svg)](https://gitter.im/danielquinn/paperless) [![Travis](https://travis-ci.org/danielquinn/paperless.svg?branch=master)](https://travis-ci.org/danielquinn/paperless) [![Coverage Status](https://coveralls.io/repos/github/danielquinn/paperless/badge.svg?branch=master)](https://coveralls.io/github/danielquinn/paperless?branch=master) [![Thanks](https://img.shields.io/badge/THANKS-md-ff69b4.svg)](https://github.com/danielquinn/paperless/blob/master/THANKS.md)

--- a/20
+++ b/20
@@ -1,20 +0,0 @@
-# -*- mode: ruby -*-
-# vi: set ft=ruby :
-
-VAGRANT_API_VERSION = "2"
-Vagrant.configure(VAGRANT_API_VERSION) do |config|
-  config.vm.box = "ubuntu/trusty64"
-
-  # Provision using shell
-  config.vm.host_name = "dev.paperless"
-  config.vm.synced_folder ".", "/opt/paperless"
-  config.vm.provision "shell", path: "scripts/vagrant-provision"
-
-  # Networking details
-  config.vm.network "private_network", ip: "172.28.128.4"
-
-  config.vm.provider "virtualbox" do |vb|
-    # Customize the amount of memory on the VM:
-    vb.memory = "1024"
-  end
-end
--- a/docker-compose.env.example
+++ b/docker-compose.env.example
@@ -1,38 +1,22 @@
 # Environment variables to set for Paperless
-# Commented out variables will be replaced by a default within Paperless.
+# Commented out variables will be replaced with a default within Paperless.
+#
+# In addition to what you see here, you can also define any values you find in
+# paperless.conf.example here.  Values like:
+#
+# * PAPERLESS_PASSPHRASE
+# * PAPERLESS_CONSUMPTION_DIR
+# * PAPERLESS_CONSUME_MAIL_HOST
+#
+# ...are all explained in that file but can be defined here, since the Docker
+# installation doesn't make use of paperless.conf.

-# Passphrase Paperless uses to encrypt and decrypt your documents, if you want
-# encryption at all.
-# PAPERLESS_PASSPHRASE=CHANGE_ME

-# The amount of threads to use for text recognition
-# PAPERLESS_OCR_THREADS=4
-
-# Additional languages to install for text recognition
+# Additional languages to install for text recognition.  Note that this is
+# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
+# default language used when guessing the language from the OCR output.
 # PAPERLESS_OCR_LANGUAGES=deu ita

 # You can change the default user and group id to a custom one
 # USERMAP_UID=1000
 # USERMAP_GID=1000
-
-###############################################################################
-####                         Mail Consumption                              ####
-###############################################################################
-
-# These values are required if you want paperless to check a particular email
-# box every 10 minutes and attempt to consume documents from there.  If you
-# don't define a HOST, mail checking will just be disabled.
-# Don't use quotes after = or it will crash your docker
-# PAPERLESS_CONSUME_MAIL_HOST=
-# PAPERLESS_CONSUME_MAIL_PORT=
-# PAPERLESS_CONSUME_MAIL_USER=
-# PAPERLESS_CONSUME_MAIL_PASS=
-
-# Override the default IMAP inbox here. If it's not set, Paperless defaults to
-# INBOX.
-# PAPERLESS_CONSUME_MAIL_INBOX=INBOX
-
-# Any email sent to the target account that does not contain this text will be
-# ignored.  Mail checking won't work without this.
-# PAPERLESS_EMAIL_SECRET=
-
--- a/docker-compose.yml.example
+++ b/docker-compose.yml.example
@@ -17,6 +17,9 @@ services:
        volumes:
            - data:/usr/src/paperless/data
            - media:/usr/src/paperless/media
+            # You have to adapt the local path you want the consumption
+            # directory to mount to by modifying the part before the ':'.
+            - ./consume:/consume
        env_file: docker-compose.env
        # The reason the line is here is so that the webserver that doesn't do
        # any text recognition and doesn't have to install unnecessary
@@ -36,8 +39,8 @@ services:
        volumes:
            - data:/usr/src/paperless/data
            - media:/usr/src/paperless/media
-            # You have to adapt the local path you want the consumption
-            # directory to mount to by modifying the part before the ':'.
+            # This should be set to the same value as the consume directory
+            # in the webserver service above.
            - ./consume:/consume
            # Likewise, you can add a local path to mount a directory for
            # exporting. This is not strictly needed for paperless to
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,121 @@
 Changelog
 #########

+2.7.0
+=====
+
+* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
+* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_.  He also fixed
+  a syntax error in ``docker-compose.yml.example`` `#488`_ and added [DjangoQL](https://github.com/ivelum/djangoql),
+  which allows a litany of handy search functionality `#492`_.
+* `CkuT`_ and `JOKer`_ hacked out a simple, but super-helpful optimisation to
+  how the thumbnails are served up, improving performance considerably `#481`_.
+* `tsia`_ added a few fields to the tags REST API. `#483`_.
+* `Brian Cribbs`_ improved the documentation to help people using Paperless
+  over NFS `#484`_.
+* `Brendan M. Sleight`_ updated the documentation to include a note for setting the
+  ``DEBUG`` value.  The ``paperless.conf.example`` file was also updated to
+  mirror the project defaults.
+
+
+2.6.1
+=====
+
+* We now have a logo, complete with a favicon :-)
+* Removed some problematic tests.
+* Fix the docker-compose example config to include a shared consume volume so
+  that using the push API will work for users of the Docker install.  Thanks to
+  `Colin Frei`_ for fixing this in `#466`_.
+* `khrise`_ submitted a pull request to include the ``added`` property to the
+  REST API `#471`_.
+
+
+2.6.0
+=====
+
+* Allow an infinite number of logs to be deleted.  Thanks to `Ulli`_ for noting
+  the problem in `#433`_.
+* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
+  in 2.4 to play nice with the defaults.  Thanks to `tsia`_ and `Sblop`_ who
+  pointed this out. `#423`_.
+* Updated dependencies to include (among other things) a security patch to
+  requests.
+* Fix text in sample data for tests so that the language guesser stops thinking
+  that everything is in Catalan because we had *Lorem ipsum* in there.
+* Tweaked the gunicorn sample command to use filesystem paths instead of Python
+  paths. `#441`_
+* Added pretty colour boxes next to the hex values in the Tags section, thanks
+  to a pull request from `Joshua Taillon`_ `#442`_.
+* Added a ``.editorconfig`` file to better specify coding style.
+* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
+  into how it parses file names on import. `#440`_
+
+
+2.5.0
+=====
+
+* **New dependency**: Paperless now optimises thumbnail generation with
+  `optipng`_, so you'll need to install that somewhere in your PATH or declare
+  its location in ``PAPERLESS_OPTIPNG_BINARY``.  The Docker image has already
+  been updated on the Docker Hub, so you just need to pull the latest one from
+  there if you're a Docker user.
+
+* "Login free" instances of Paperless were breaking whenever you tried to edit
+  objects in the admin: adding/deleting tags or correspondents, or even fixing
+  spelling.  This was due to the "user hack" we were applying to sessions that
+  weren't using a login, as that hack user didn't have a valid id.  The fix was
+  to attribute the first user id in the system to this hack user.  `#394`_
+
+* A problem in how we handle slug values on Tags and Correspondents required a
+  few changes to how we handle this field `#393`_:
+
+  1. Slugs are no longer editable.  They're derived from the name of the tag or
+     correspondent at save time, so if you wanna change the slug, you have to
+     change the name, and even then you're restricted to the rules of the
+     ``slugify()`` function.  The slug value is still visible in the admin
+     though.
+  2. I've added a migration to go over all existing tags & correspondents and
+     rewrite the ``.slug`` values to ones conforming to the ``slugify()``
+     rules.
+  3. The consumption process now uses the same rules as ``.save()`` in
+     determining a slug and using that to check for an existing
+     tag/correspondent.
+
+* An annoying bug in the date capture code was causing some bogus dates to be
+  attached to documents, which in turn busted the UI.  Thanks to `Andrew Peng`_
+  for reporting this. `#414`_.
+
+* A bug in the Dockerfile meant that Tesseract language files weren't being
+  installed correctly.  `euri10`_ was quick to provide a fix: `#406`_, `#413`_.
+
+* Document consumption is now wrapped in a transaction as per an old ticket
+  `#262`_.
+
+* The ``get_date()`` functionality of the parsers has been consolidated onto
+  the ``DocumentParser`` class since much of that code was redundant anyway.
+
+
+2.4.0
+=====
+
+* A new set of actions are now available thanks to `jonaswinkler`_'s very first
+  pull request!  You can now do nifty things like tag documents in bulk, or set
+  correspondents in bulk.  `#405`_
+* The import/export system is now a little smarter.  By default, documents are
+  tagged as ``unencrypted``, since exports are by their nature unencrypted.
+  It's now in the import step that we decide the storage type.  This allows you
+  to export from an encrypted system and import into an unencrypted one, or
+  vice-versa.
+* The migration history has been slightly modified to accommodate PostgreSQL
+  users.  Additionally, you can now tell paperless to use PostgreSQL simply by
+  declaring ``PAPERLESS_DBUSER`` in your environment.  This will attempt to
+  connect to your Postgres database without a password unless you also set
+  ``PAPERLESS_DBPASS``.
+* A bug was found in the REST API filter system that was the result of an
+  update of django-filter some time ago.  This has now been patched in `#412`_.
+  Thanks to `thepill`_ for spotting it!
+
+
 2.3.0
 =====

@@ -15,7 +130,8 @@ Changelog
 * As his last bit of effort on this release, Joshua also added some code to
  allow you to view the documents inline rather than download them as an
  attachment. `#400`_
-* Finally, `ahyear`_ found a slip in the Docker documentation and patched it. `#401`_
+* Finally, `ahyear`_ found a slip in the Docker documentation and patched it.
+  `#401`_


 2.2.1
@@ -32,14 +148,14 @@ Changelog
  version of Paperless that supports Django 2.0!  As a result of their hard
  work, you can now also run Paperless on Python 3.7 as well: `#386`_ &
  `#390`_.
-* `Stéphane Brunner`_ added a few lines of code that made tagging interface a lot
-  easier on those of us with lots of different tags: `#391`_.
+* `Stéphane Brunner`_ added a few lines of code that made tagging interface a
+  lot easier on those of us with lots of different tags: `#391`_.
 * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
  tags, so that's fixed now too: `#384`_.
 * `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
  for packaging environments: `#383`_.
-* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based front-end
-  cleaner & easier: `#387`_.
+* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based
+  front-end cleaner & easier: `#387`_.


 2.1.0
@@ -499,8 +615,21 @@ bulk of the work on this big change.
 .. _Kilian Koeltzsch: https://github.com/kiliankoe
 .. _Lukasz Soluch: https://github.com/LukaszSolo
 .. _Joshua Taillon: https://github.com/jat255
-.. _dubit0:  https://github.com/dubit0
-.. _ahyear:  https://github.com/ahyear
+.. _dubit0: https://github.com/dubit0
+.. _ahyear: https://github.com/ahyear
+.. _jonaswinkler: https://github.com/jonaswinkler
+.. _thepill: https://github.com/thepill
+.. _Andrew Peng: https://github.com/pengc99
+.. _euri10: https://github.com/euri10
+.. _Ulli: https://github.com/Ulli2k
+.. _tsia: https://github.com/tsia
+.. _Sblop: https://github.com/Sblop
+.. _Colin Frei: https://github.com/colinfrei
+.. _khrise: https://github.com/khrise
+.. _syntonym: https://github.com/syntonym
+.. _JOKer: https://github.com/JOKer
+.. _Brian Cribbs: https://github.com/cribbstechnolog
+.. _Brendan M. Sleight: https://github.com/bmsleight

 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -566,6 +695,7 @@ bulk of the work on this big change.
 .. _#322: https://github.com/danielquinn/paperless/pull/322
 .. _#328: https://github.com/danielquinn/paperless/pull/328
 .. _#253: https://github.com/danielquinn/paperless/issues/253
+.. _#262: https://github.com/danielquinn/paperless/issues/262
 .. _#323: https://github.com/danielquinn/paperless/issues/323
 .. _#344: https://github.com/danielquinn/paperless/pull/344
 .. _#351: https://github.com/danielquinn/paperless/pull/351
@@ -582,11 +712,33 @@ bulk of the work on this big change.
 .. _#391: https://github.com/danielquinn/paperless/pull/391
 .. _#390: https://github.com/danielquinn/paperless/pull/390
 .. _#392: https://github.com/danielquinn/paperless/issues/392
+.. _#393: https://github.com/danielquinn/paperless/issues/393
 .. _#395: https://github.com/danielquinn/paperless/pull/395
+.. _#394: https://github.com/danielquinn/paperless/issues/394
 .. _#396: https://github.com/danielquinn/paperless/pull/396
 .. _#399: https://github.com/danielquinn/paperless/pull/399
 .. _#400: https://github.com/danielquinn/paperless/pull/400
 .. _#401: https://github.com/danielquinn/paperless/pull/401
+.. _#405: https://github.com/danielquinn/paperless/pull/405
+.. _#406: https://github.com/danielquinn/paperless/issues/406
+.. _#412: https://github.com/danielquinn/paperless/issues/412
+.. _#413: https://github.com/danielquinn/paperless/pull/413
+.. _#414: https://github.com/danielquinn/paperless/issues/414
+.. _#423: https://github.com/danielquinn/paperless/issues/423
+.. _#433: https://github.com/danielquinn/paperless/issues/433
+.. _#440: https://github.com/danielquinn/paperless/pull/440
+.. _#441: https://github.com/danielquinn/paperless/pull/441
+.. _#442: https://github.com/danielquinn/paperless/pull/442
+.. _#466: https://github.com/danielquinn/paperless/pull/466
+.. _#471: https://github.com/danielquinn/paperless/pull/471
+.. _#475: https://github.com/danielquinn/paperless/pull/475
+.. _#481: https://github.com/danielquinn/paperless/pull/481
+.. _#483: https://github.com/danielquinn/paperless/pull/483
+.. _#484: https://github.com/danielquinn/paperless/pull/484
+.. _#488: https://github.com/danielquinn/paperless/pull/488
+.. _#489: https://github.com/danielquinn/paperless/pull/489
+.. _#492: https://github.com/danielquinn/paperless/pull/492

 .. _pipenv: https://docs.pipenv.org/
 .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
+.. _optipng: http://optipng.sourceforge.net/
--- a/docs/consumption.rst
+++ b/docs/consumption.rst
@@ -76,6 +76,31 @@ Pre-consumption script

 * Document file name

+A simple but common example for this would be creating a simple script like
+this:
+
+``/usr/local/bin/ocr-pdf``
+
+.. code:: bash
+
+    #!/usr/bin/env bash
+    pdf2pdfocr.py -i ${1}
+
+``/etc/paperless.conf``
+
+.. code:: bash
+
+    ...
+    PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
+    ...
+
+This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``,
+which will in turn call `pdf2pdfocr.py`_ on your document, which will then
+overwrite the file with an OCR'd version of the file and exit.  At which point,
+the consumption process will begin with the newly modified file.
+
+.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
+

 .. _consumption-director-hook-variables-post:

--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -0,0 +1,141 @@
+.. _contributing:
+
+Contributing to Paperless
+#########################
+
+Maybe you've been using Paperless for a while and want to add a feature or two,
+or maybe you've come across a bug that you have some ideas how to solve.  The
+beauty of Free software is that you can see what's wrong and help to get it
+fixed for everyone!
+
+
+How to Get Your Changes Rolled Into Paperless
+=============================================
+
+If you've found a bug, but don't know how to fix it, you can always post an
+issue on `GitHub`_ in the hopes that someone will have the time to fix it for
+you.  If however you're the one with the time, pull requests are always
+welcome, you just have to make sure that your code conforms to a few standards:
+
+Pep8
+----
+
+It's the standard for all Python development, so it's `very well documented`_.
+The short version is:
+
+* Lines should wrap at 79 characters
+* Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS``
+  for constants.
+* Space out your operators: ``stuff + 7`` instead of ``stuff+7``
+* Two empty lines between classes, and functions, but 1 empty line between
+  class methods.
+
+There's more to it than that, but if you follow those, you'll probably be
+alright.  When you submit your pull request, there's a pep8 checker that'll
+look at your code to see if anything is off.  If it finds anything, it'll
+complain at you until you fix it.
+
+
+Additional Style Guides
+-----------------------
+
+Where pep8 is ambiguous, I've tried to be a little more specific.  These rules
+aren't hard-and-fast, but if you can conform to them, I'll appreciate it and
+spend less time trying to conform your PR before merging:
+
+
+Function calls
+..............
+
+If you're calling a function and that necessitates more than one line of code,
+please format it like this:
+
+.. code:: python
+
+    my_function(
+        argument1,
+        kwarg1="x",
+        kwarg2="y"
+        another_really_long_kwarg="some big value"
+        a_kwarg_calling_another_long_function=another_function(
+            another_arg,
+            another_kwarg="kwarg!"
+        )
+    )
+
+This is all in the interest of code uniformity rather than anything else.  If
+we stick to a style, everything is understandable in the same way.
+
+
+Quoting Strings
+...............
+
+pep8 is a little too open-minded on this for my liking.  Python strings should
+be quoted with double quotes (``"``) except in cases where the resulting string
+would require too much escaping of a double quote, in which case, a single
+quoted, or triple-quoted string will do:
+
+.. code:: python
+
+    my_string = "This is my string"
+    problematic_string = 'This is a "string" with "quotes" in it'
+
+In HTML templates, please use double-quotes for tag attributes, and single
+quotes for arguments passed to Django tempalte tags:
+
+.. code:: html
+
+    <div class="stuff">
+        <a href="{% url 'some-url-name' pk='w00t' %}">link this</a>
+    </div>
+
+This is to keep linters happy they look at an HTML file and see an attribute
+closing the ``"`` before it should have been.
+
+--
+
+That's all there is in terms of guidelines, so I hope it's not too daunting.
+
+
+Indentation & Spacing
+.....................
+
+When it comes to indentation:
+
+* For Python, the rule is: follow pep8 and use 4 spaces.
+* For Javascript, CSS, and HTML, please use 1 tab.
+
+Additionally, Django templates making use of block elements like ``{% if %}``,
+``{% for %}``, and ``{% block %}`` etc. should be indented:
+
+Good:
+
+.. code:: html
+
+    {% block stuff %}
+    	<h1>This is the stuff</h1>
+    {% endblock %}
+
+Bad:
+
+.. code:: html
+
+    {% block stuff %}
+    <h1>This is the stuff</h1>
+    {% endblock %}
+
+
+The Code of Conduct
+===================
+
+Paperless has a `code of conduct`_.  It's a lot like the other ones you see out
+there, with a few small changes, but basically it boils down to:
+
+> Don't be an ass, or you might get banned.
+
+I'm proud to say that the CoC has never had to be enforced because everyone has
+been awesome, friendly, and professional.
+
+.. _GitHub: https://github.com/danielquinn/paperless/issues
+.. _very well documented: https://www.python.org/dev/peps/pep-0008/
+.. _code of conduct: https://github.com/danielquinn/paperless/blob/master/CODE_OF_CONDUCT.md
--- a/docs/guesswork.rst
+++ b/docs/guesswork.rst
@@ -43,6 +43,16 @@ These however wouldn't work:
 * ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
 * ``Another Company- Letter of Reference.jpg``

+Do I have to be so strict about naming?
+---------------------------------------
+Rather than using the strict document naming rules, one can also set the option
+``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
+that is accepted by dateparser_. Doing so will cause ``paperless`` to default
+to any date format that is found in the title, instead of a date pulled from
+the document's text, without requiring the strict formatting of the document
+filename as described above.
+
+.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings

 .. _guesswork-content:

@@ -82,11 +92,11 @@ text and matching algorithm.  From the help info there:
    uses a regex to match the PDF.  If you don't know what a regex is, you
    probably don't want this option.

-When using the "any" or "all" matching algorithms, you can search for terms that
-consist of multiple words by enclosing them in double quotes. For example, defining
-a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
-documents that contain either "Bank of America" or "BofA", but will not match
-documents containing "Bank of South America".
+When using the "any" or "all" matching algorithms, you can search for terms
+that consist of multiple words by enclosing them in double quotes. For example,
+defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
+will match documents that contain either "Bank of America" or "BofA", but will
+not match documents containing "Bank of South America".

 Then just save your tag/correspondent and run another document through the
 consumer.  Once complete, you should see the newly-created document,
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -43,5 +43,6 @@ Contents
   customising
   extending
   troubleshooting
+   contributing
   scanners
   changelog
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -82,6 +82,7 @@ rolled in as part of the update:

    $ cd /path/to/project
    $ git pull
+    $ pip install -r requirements.txt
    $ cd src
    $ ./manage.py migrate

@@ -101,7 +102,7 @@ is similar:
    $ cd /path/to/project
    $ git pull
    $ docker build -t paperless .
-    $ docker-compose run --rm comsumer migrate
+    $ docker-compose run --rm consumer migrate
    $ docker-compose up -d

 If ``git pull`` doesn't report any changes, there is no need to continue with
--- a/docs/requirements.rst
+++ b/docs/requirements.rst
@@ -12,6 +12,7 @@ should work) that has the following software installed:
 * `Imagemagick`_ version 6.7.5 or higher
 * `unpaper`_
 * `libpoppler-cpp-dev`_ PDF rendering library
+* `optipng`_

 .. _Python3: https://python.org/
 .. _GNU Privacy Guard: https://gnupg.org
@@ -19,6 +20,7 @@ should work) that has the following software installed:
 .. _Imagemagick: http://imagemagick.org/
 .. _unpaper: https://www.flameeyes.eu/projects/unpaper
 .. _libpoppler-cpp-dev: https://poppler.freedesktop.org/
+.. _optipng: http://optipng.sourceforge.net/

 Notably, you should confirm how you access your Python3 installation.  Many
 Linux distributions will install Python3 in parallel to Python2, using the
@@ -33,7 +35,7 @@ In addition to the above, there are a number of Python requirements, all of
 which are listed in a file called ``requirements.txt`` in the project root
 directory.

-If you're not working on a virtual environment (like Vagrant or Docker), you
+If you're not working on a virtual environment (like Docker), you
 should probably be using a virtualenv, but that's your call.  The reasons why
 you might choose a virtualenv or not aren't really within the scope of this
 document.  Needless to say if you don't know what a virtualenv is, you should
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -42,18 +42,14 @@ Installation & Configuration
 You can go multiple routes with setting up and running Paperless:

 * The `bare metal route`_
- * The `vagrant route`_
 * The `docker route`_


-The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
-with memory consumption, cpu overhead etc. The `docker route`_ offers the same
-simplicity as Vagrant with lower resource consumption.
+The `docker route`_ is quick & easy.

 The `bare metal route`_ is a bit more complicated to setup but makes it easier
 should you want to contribute some code back.

-.. _Vagrant route: setup-installation-vagrant_
 .. _docker route: setup-installation-docker_
 .. _bare metal route: setup-installation-bare-metal_
 .. _Docker Machine: https://docs.docker.com/machine/
@@ -81,12 +77,16 @@ Standard (Bare Metal)
      encrypt/decrypt the original documents.  Don't worry about defining this
      if you don't want to use encryption (the default).

+   Note also that if you're using the ``runserver`` as mentioned below, you
+   should make sure that PAPERLESS_DEBUG="true" or is just commented out as
+   this is the default.
+
 4. Initialise the SQLite database with ``./manage.py migrate``.
 5. Create a user for your Paperless instance with
   ``./manage.py createsuperuser``. Follow the prompts to create your user.
 6. Start the webserver with ``./manage.py runserver <IP>:<PORT>``.
-   If no specifc IP or port are given, the default is ``127.0.0.1:8000``
-   also known as http://localhost:8000/.
+   If no specific IP or port is given, the default is ``127.0.0.1:8000`` also
+   known as http://localhost:8000/.
   You should now be able to visit your (empty) installation at
   `Paperless webserver`_ or whatever you chose before.  You can login with the
   user/pass you created in #5.
@@ -147,6 +147,15 @@ Docker Method
   instructions in comments in the file. The only change that is a hard
   requirement is to specify where the consumption directory should
   mount.[#dockercomposeyml]_
+	 
+	 .. caution::
+	 
+	     If you are using NFS mounts for the consume directory you also need to
+			 change the command to turn off inotify as it doesn't work with NFS
+			 
+			 `command: ["document_consumer", "--no-inotify"]`
+			 
+			 
 5. Modify ``docker-compose.env`` and adapt the following environment variables:

   ``PAPERLESS_PASSPHRASE``
@@ -267,54 +276,6 @@ Docker Method
   newer ``docker-compose.yml.example`` file


-.. _setup-installation-vagrant:
-
-Vagrant Method
-++++++++++++++
-
-1. Install `Vagrant`_.  How you do that is really between you and your OS.
-2. Run ``vagrant up``.  An instance will start up for you.  When it's ready and
-   provisioned...
-3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
-   ``/etc/paperless.conf`` and set the values for:
-
-    * ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
-      dumped to be consumed by Paperless.
-    * ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
-      encrypt/decrypt the original document.  It's only required if you want
-      your original files to be encrypted, otherwise, just leave it unset.
-    * ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
-      documents from mail or via the API.  If you don't use either, leaving it
-      blank is just fine.
-
-4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again.  This
-   updates the environment to make use of the changes you made to the config
-   file.
-5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
-6. Still inside your vagrant box, create a user for your Paperless instance
-   with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
-   create your user.
-7. Start the webserver with
-   ``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
-   able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
-   You can login with the user/pass you created in #6.
-8. In a separate window, run ``vagrant ssh`` again, but this time once inside
-   your vagrant instance, you should start the consumer script with
-   ``/opt/paperless/src/manage.py document_consumer``.
-9. Scan something.  Put it in the ``CONSUMPTION_DIR``.
-10. Wait a few minutes
-11. Visit the document list on your webserver, and it should be there, indexed
-    and downloadable.
-
-.. caution::
-
-    This installation is not secure. Once everything is working head up to
-    `Making things more permanent`_
-
-.. _Vagrant: https://vagrantup.com/
-.. _Paperless server: http://172.28.128.4:8000
-
-
 .. _setup-permanent:

 Making Things a Little more Permanent
@@ -398,7 +359,7 @@ instance listening on localhost port 8000.
        location /static {

            autoindex on;
-            alias <path-to-paperless-static-directory>
+            alias <path-to-paperless-static-directory>;

        }

@@ -409,7 +370,7 @@ instance listening on localhost port 8000.
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;

-            proxy_pass http://127.0.0.1:8000
+            proxy_pass http://127.0.0.1:8000;
        }
    }

@@ -418,7 +379,7 @@ The gunicorn server can be started with the command:

 .. code-block:: shell

-    $ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
+    $ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2


 .. _setup-permanent-standard-systemd:
@@ -475,7 +436,7 @@ after restarting your system:
    respawn limit 10 5

    script
-      exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
+      exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
    end script

   Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
@@ -513,13 +474,6 @@ second period.
 .. _Upstart: http://upstart.ubuntu.com/


-Vagrant
-~~~~~~~
-
-You may use the Ubuntu explanation above. Replace
-``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
-
-
 .. _setup-permanent-docker:

 Docker
--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
 `Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
 marching your document's languages.

-As an example, if you are running Paperless from the Vagrant setup provided
-(or from any Ubuntu or Debian box), and your documents are written in Spanish
-you may need to run::
+As an example, if you are running Paperless from any Ubuntu or Debian
+box, and your documents are written in Spanish you may need to run::

    apt-get install -y tesseract-ocr-spa

--- a/docs/utilities.rst
+++ b/docs/utilities.rst
@@ -214,5 +214,5 @@ This too is done via the ``manage.py`` script:

 That's it.  It'll loop over all of the documents in your database and attempt
 to match all of your tags to them.  If one matches, it'll be applied.  And
-don't worry, you can run this as often as you like, it' won't double-tag
+don't worry, you can run this as often as you like, it won't double-tag
 a document.
--- a/overrides/README.md
+++ b/overrides/README.md
@@ -0,0 +1,11 @@
+# Customizing Paperless
+
+*See customization
+[documentation](https://paperless.readthedocs.io/en/latest/customising.html) 
+for more detail!*
+
+The example `.css` and `.js` snippets in this folder can be placed into
+one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or 
+`overrides.css`. Please feel free to submit pull requests to the main 
+repository with other examples of customizations that you think others may
+find useful.
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -59,6 +59,11 @@ PAPERLESS_EMAIL_SECRET=""
 ####                              Security                                 ####
 ###############################################################################

+# Controls whether django's debug mode is enabled. Disable this on production
+# systems. Debug mode is enabled by default.
+#PAPERLESS_DEBUG="true"
+
+
 # Paperless can be instructed to attempt to encrypt your PDF files with GPG
 # using the PAPERLESS_PASSPHRASE specified below.  If however you're not
 # concerned about encrypting these files (for example if you have disk
@@ -122,6 +127,14 @@ PAPERLESS_EMAIL_SECRET=""
 # "true", the document will instead be opened in the browser, if possible.
 #PAPERLESS_INLINE_DOC="false"

+# By default, paperless will check the document text for document date information.
+# Uncomment the line below to enable checking the document filename for date
+# information. The date order can be set to any option as specified in
+# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
+# checked first, and if nothing is found, the document text will be checked
+# as normal.
+#PAPERLESS_FILENAME_DATE_ORDER="YMD"
+
 #
 # The following values use sensible defaults for modern systems, but if you're
 # running Paperless on a low-resource device (like a Raspberry Pi), modifying
@@ -183,6 +196,17 @@ PAPERLESS_EMAIL_SECRET=""
 #PAPERLESS_CONSUMER_LOOP_TIME=10


+# By default Paperless stops consuming a document if no language can be
+# detected. Set to true to consume documents even if the language detection
+# fails.
+#PAPERLESS_FORGIVING_OCR="false"
+
+
+# By default Paperless does not OCR a document if the text can be retrieved from
+# the document directly. Set to true to always OCR documents.
+#PAPERLESS_OCR_ALWAYS="false"
+
+
 ###############################################################################
 ####                            Interface                                  ####
 ###############################################################################
@@ -203,3 +227,28 @@ PAPERLESS_EMAIL_SECRET=""
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
 #PAPERLESS_LIST_PER_PAGE=100
+
+
+# The number of years for which a correspondent will be included in the recent
+# correspondents filter.
+#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
+
+###############################################################################
+####                     Third-Party Binaries                              ####
+###############################################################################
+
+# There are a few external software packages that Paperless expects to find on
+# your system when it starts up.  Unless you've done something creative with
+# their installation, you probably won't need to edit any of these.  However,
+# if you've installed these programs somewhere where simply typing the name of
+# the program doesn't automatically execute it (ie. the program isn't in your
+# $PATH), then you'll need to specify the literal path for that program here.
+
+# Convert (part of the ImageMagick suite)
+#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
+
+# Unpaper
+#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
+
+# Optipng (for optimising thumbnail sizes)
+#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,51 +1,70 @@
 -i https://pypi.python.org/simple
-apipkg==1.5; python_version != '3.1.*'
-atomicwrites==1.2.1; python_version != '3.1.*'
+alabaster==0.7.12
+apipkg==1.5
+atomicwrites==1.2.1
 attrs==18.2.0
-certifi==2018.8.24
+babel==2.6.0
+certifi==2018.11.29
 chardet==3.0.4
-coverage==4.5.1; python_version != '3.1.*'
-coveralls==1.5.0
+coverage==4.5.2
+coveralls==1.5.1
 dateparser==0.7.0
 django-cors-headers==2.4.0
 django-crispy-forms==1.7.2
-django-extensions==2.1.2
-django-filter==2.0.0
-django==2.0.8
-djangorestframework==3.8.2
+django-extensions==2.1.4
+django-filter==2.1.0
+django==2.0.10
+djangoql==0.12.3
+djangorestframework==3.9.1
 docopt==0.6.2
-execnet==1.5.0; python_version != '3.1.*'
+docutils==0.14
+execnet==1.5.0
 factory-boy==2.11.1
-faker==0.9.0
+faker==1.0.2
+filelock==3.0.10
 filemagic==1.6
-fuzzywuzzy==0.15.0
+fuzzywuzzy[speedup]==0.15.0
 gunicorn==19.9.0
-idna==2.7
+idna==2.8
+imagesize==1.1.0
 inotify-simple==1.1.8
+jinja2==2.10
 langdetect==1.0.7
-more-itertools==4.3.0
-pdftotext==2.1.0
-pillow==5.2.0
-pluggy==0.7.1; python_version != '3.1.*'
-py==1.6.0; python_version != '3.1.*'
+markupsafe==1.1.0
+more-itertools==5.0.0
+packaging==19.0
+pdftotext==2.1.1
+pillow==5.4.1
+pluggy==0.8.1
+ply==3.11
+psycopg2==2.7.7
+py==1.7.0
 pycodestyle==2.4.0
+pygments==2.3.1
 pyocr==0.5.3
-pytest-cov==2.5.1
-pytest-django==3.4.2
+pyparsing==2.3.1
+pytest-cov==2.6.1
+pytest-django==3.4.5
 pytest-env==0.6.2
-pytest-forked==0.2
-pytest-sugar==0.9.1
-pytest-xdist==1.23.0
-pytest==3.7.4
-python-dateutil==2.7.3
-python-dotenv==0.9.1
-python-gnupg==0.4.3
+pytest-forked==1.0.1
+pytest-sugar==0.9.2
+pytest-xdist==1.26.0
+pytest==4.1.1
+python-dateutil==2.7.5
+python-dotenv==0.10.1
+python-gnupg==0.4.4
 python-levenshtein==0.12.0
-pytz==2018.5
-regex==2018.8.29
-requests==2.19.1
-six==1.11.0
+pytz==2018.9
+regex==2019.1.24
+requests==2.21.0
+six==1.12.0
+snowballstemmer==1.2.1
+sphinx==1.8.3
+sphinxcontrib-websupport==1.1.0
 termcolor==1.1.0
 text-unidecode==1.2
+toml==0.10.0
+tox==3.7.0
 tzlocal==1.5.1
-urllib3==1.23; python_version != '3.0.*'
+urllib3==1.24.1
+virtualenv==16.3.0
--- a/resources/logo/print/eps/Black
+++ b/resources/logo/print/eps/Black
--- a/resources/logo/print/eps/Color
+++ b/resources/logo/print/eps/Color
--- a/resources/logo/print/eps/Color
+++ b/resources/logo/print/eps/Color
--- a/resources/logo/print/eps/White
+++ b/resources/logo/print/eps/White
--- a/resources/logo/print/pdf/Black
+++ b/resources/logo/print/pdf/Black
--- a/resources/logo/print/pdf/Color
+++ b/resources/logo/print/pdf/Color
--- a/resources/logo/print/pdf/Color
+++ b/resources/logo/print/pdf/Color
--- a/resources/logo/print/pdf/White
+++ b/resources/logo/print/pdf/White
--- a/resources/logo/web/png/Black
+++ b/resources/logo/web/png/Black
--- a/resources/logo/web/png/Color
+++ b/resources/logo/web/png/Color
--- a/resources/logo/web/png/Color
+++ b/resources/logo/web/png/Color
--- a/resources/logo/web/png/White
+++ b/resources/logo/web/png/White
--- a/resources/logo/web/svg/Black
+++ b/resources/logo/web/svg/Black
--- a/resources/logo/web/svg/Color
+++ b/resources/logo/web/svg/Color
--- a/resources/logo/web/svg/Color
+++ b/resources/logo/web/svg/Color
--- a/resources/logo/web/svg/White
+++ b/resources/logo/web/svg/White
--- a/resources/logo/web/svg/square.svg
+++ b/resources/logo/web/svg/square.svg
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   version="1.1"
+   width="900"
+   height="900"
+   id="svg3923"
+   sodipodi:docname="square.svg"
+   inkscape:export-filename="/tmp/test.png"
+   inkscape:export-xdpi="96"
+   inkscape:export-ydpi="96"
+   inkscape:version="0.92.2 2405546, 2018-03-11">
+  <metadata
+     id="metadata3929">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs3927" />
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="3840"
+     inkscape:window-height="2096"
+     id="namedview3925"
+     showgrid="false"
+     inkscape:zoom="1.1360927"
+     inkscape:cx="635.07139"
+     inkscape:cy="606.383"
+     inkscape:window-x="0"
+     inkscape:window-y="27"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="g3921" />
+  <g
+     transform="matrix(10.638298,0,0,10.638298,106.38298,-206.38301)"
+     id="g3921">
+    <defs
+       id="SvgjsDefs1018" />
+    <g
+       id="SvgjsG1019"
+       featureKey="root"
+       style="fill:#ffffff" />
+    <g
+       id="SvgjsG1020"
+       featureKey="symbol1"
+       transform="matrix(0.10341565,0,0,0.10341565,-11.43874,18.048418)"
+       inkscape:export-filename="/tmp/test.png"
+       inkscape:export-xdpi="116.02285"
+       inkscape:export-ydpi="116.02285"
+       style="fill:#17541f">
+      <defs
+         id="defs3911" />
+      <g
+         id="g3915">
+        <path
+           d="M 231,798 C 227,779 219,741 218,741 49,640 69,465 125,365 c 12,126 235,213 105,367 -1,2 6,26 12,48 26,-44 65,-97 63,-102 C 145,288 645,258 749,16 c 47,234 -24,596 -426,688 -2,1 -73,126 -76,127 0,-2 -30,-1 -26,-11 2,-6 6,-14 10,-22 z M 330,625 C 267,476 452,312 544,271 356,439 324,564 330,625 Z m -104,79 c 51,-59 -9,-160 -45,-193 61,105 57,166 45,193 z"
+           style="fill:#17541f"
+           id="path3913"
+           inkscape:connector-curvature="0" />
+      </g>
+    </g>
+  </g>
+</svg>
--- a/scripts/docker-entrypoint.sh
+++ b/scripts/docker-entrypoint.sh
@@ -75,7 +75,7 @@ install_languages() {
        pkg="tesseract-ocr-data-$lang"

        # English is installed by default
-        if [ "$lang" ==  "eng" ]; then
+        if [[ "$lang" ==  "eng" ]]; then
            continue
        fi

@@ -95,7 +95,7 @@ if [[ "$1" != "/"* ]]; then
    initialize

    # Install additional languages if specified
-    if [ ! -z "$PAPERLESS_OCR_LANGUAGES"  ]; then
+    if [[ ! -z "$PAPERLESS_OCR_LANGUAGES"  ]]; then
        install_languages "$PAPERLESS_OCR_LANGUAGES"
    fi

--- a/scripts/paperless-webserver.service
+++ b/scripts/paperless-webserver.service
@@ -4,7 +4,7 @@ Description=Paperless webserver
 [Service]
 User=paperless
 Group=paperless
-ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
+ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2

 [Install]
 WantedBy=multi-user.target
--- a/scripts/vagrant-provision
+++ b/scripts/vagrant-provision
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-# Install packages
-apt-get update
-apt-get build-dep -y python-imaging
-apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
-apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
-apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
-
-# Python dependencies
-pip3 install -r /opt/paperless/requirements.txt
-
-# Create the environment file
-cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
-chmod 0640 /etc/paperless.conf
-chown root:vagrant /etc/paperless.conf
-
-# Create the consumption directory
-mkdir /home/vagrant/consumption
-chown vagrant:vagrant /home/vagrant/consumption
-
-echo "
-
-
-Now follow the remaining steps in the Vagrant section of the setup
-documentation to complete the process:
-
-http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
-
-
-"
--- a/src/documents/actions.py
+++ b/src/documents/actions.py
@@ -0,0 +1,146 @@
+from django.contrib import messages
+from django.contrib.admin import helpers
+from django.contrib.admin.utils import model_ngettext
+from django.core.exceptions import PermissionDenied
+from django.template.response import TemplateResponse
+
+from documents.models import Correspondent, Tag
+
+
+def select_action(
+        modeladmin, request, queryset, title, action, modelclass,
+        success_message="", document_action=None, queryset_action=None):
+
+    opts = modeladmin.model._meta
+    app_label = opts.app_label
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    if request.POST.get('post'):
+        n = queryset.count()
+        selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
+        if n:
+            for document in queryset:
+                if document_action:
+                    document_action(document, selected_object)
+                document_display = str(document)
+                modeladmin.log_change(request, document, document_display)
+            if queryset_action:
+                queryset_action(queryset, selected_object)
+
+            modeladmin.message_user(request, success_message % {
+                "selected_object": selected_object.name,
+                "count": n,
+                "items": model_ngettext(modeladmin.opts, n)
+            }, messages.SUCCESS)
+
+        # Return None to display the change list page again.
+        return None
+
+    context = dict(
+        modeladmin.admin_site.each_context(request),
+        title=title,
+        queryset=queryset,
+        opts=opts,
+        action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
+        media=modeladmin.media,
+        action=action,
+        objects=modelclass.objects.all(),
+        itemname=model_ngettext(modelclass, 1)
+    )
+
+    request.current_app = modeladmin.admin_site.name
+
+    return TemplateResponse(
+        request,
+        "admin/{}/{}/select_object.html".format(app_label, opts.model_name),
+        context
+    )
+
+
+def simple_action(
+        modeladmin, request, queryset, success_message="",
+        document_action=None, queryset_action=None):
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    n = queryset.count()
+    if n:
+        for document in queryset:
+            if document_action:
+                document_action(document)
+            document_display = str(document)
+            modeladmin.log_change(request, document, document_display)
+        if queryset_action:
+            queryset_action(queryset)
+        modeladmin.message_user(request, success_message % {
+            "count": n, "items": model_ngettext(modeladmin.opts, n)
+        }, messages.SUCCESS)
+
+    # Return None to display the change list page again.
+    return None
+
+
+def add_tag_to_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Add tag to multiple documents",
+        action="add_tag_to_selected",
+        modelclass=Tag,
+        success_message="Successfully added tag %(selected_object)s to "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.add(tag)
+    )
+
+
+def remove_tag_from_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Remove tag from multiple documents",
+        action="remove_tag_from_selected",
+        modelclass=Tag,
+        success_message="Successfully removed tag %(selected_object)s from "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.remove(tag)
+    )
+
+
+def set_correspondent_on_selected(modeladmin, request, queryset):
+
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Set correspondent on multiple documents",
+        action="set_correspondent_on_selected",
+        modelclass=Correspondent,
+        success_message="Successfully set correspondent %(selected_object)s "
+                        "on %(count)d %(items)s.",
+        queryset_action=lambda qs, corr: qs.update(correspondent=corr)
+    )
+
+
+def remove_correspondent_from_selected(modeladmin, request, queryset):
+    return simple_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        success_message="Successfully removed correspondent from %(count)d "
+                        "%(items)s.",
+        queryset_action=lambda qs: qs.update(correspondent=None)
+    )
+
+
+add_tag_to_selected.short_description = "Add tag to selected documents"
+remove_tag_from_selected.short_description = \
+    "Remove tag from selected documents"
+set_correspondent_on_selected.short_description = \
+    "Set correspondent on selected documents"
+remove_correspondent_from_selected.short_description = \
+    "Remove correspondent from selected documents"
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -1,42 +1,26 @@
-from datetime import datetime
+from datetime import datetime, timedelta

 from django.conf import settings
-from django.contrib import admin
-from django.contrib.auth.models import User, Group
-try:
-    from django.core.urlresolvers import reverse
-except ImportError:
-    from django.urls import reverse
+from django.contrib import admin, messages
+from django.contrib.admin.templatetags.admin_urls import add_preserved_filters
+from django.contrib.auth.models import Group, User
+from django.db import models
+from django.http import HttpResponseRedirect
 from django.templatetags.static import static
-from django.utils.safestring import mark_safe
+from django.urls import reverse
 from django.utils.html import format_html, format_html_join
+from django.utils.http import urlquote
+from django.utils.safestring import mark_safe
+from djangoql.admin import DjangoQLSearchMixin

-from .models import Correspondent, Tag, Document, Log
+from documents.actions import (
+    add_tag_to_selected,
+    remove_correspondent_from_selected,
+    remove_tag_from_selected,
+    set_correspondent_on_selected
+)

-
-class MonthListFilter(admin.SimpleListFilter):
-
-    title = "Month"
-
-    # Parameter for the filter that will be used in the URL query.
-    parameter_name = "month"
-
-    def lookups(self, request, model_admin):
-        r = []
-        for document in Document.objects.all():
-            r.append((
-                document.created.strftime("%Y-%m"),
-                document.created.strftime("%B %Y")
-            ))
-        return sorted(set(r), key=lambda x: x[0], reverse=True)
-
-    def queryset(self, request, queryset):
-
-        if not self.value():
-            return None
-
-        year, month = self.value().split("-")
-        return queryset.filter(created__year=year, created__month=month)
+from .models import Correspondent, Document, Log, Tag


 class FinancialYearFilter(admin.SimpleListFilter):
@@ -78,12 +62,12 @@ class FinancialYearFilter(admin.SimpleListFilter):

            # To keep it simple we use the same string for both
            # query parameter and the display.
-            return (query, query)
+            return query, query

        else:
            query = "{0}-{0}".format(date.year)
            display = "{}".format(date.year)
-            return (query, display)
+            return query, display

    def lookups(self, request, model_admin):
        if not settings.FY_START or not settings.FY_END:
@@ -104,32 +88,85 @@ class FinancialYearFilter(admin.SimpleListFilter):
                               created__lte=self._fy_end(end))


+class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
+    """
+    If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
+    correspondents to documents sent our way over the past ``n`` years.
+    """
+
+    def field_choices(self, field, request, model_admin):
+
+        years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
+        correspondents = Correspondent.objects.all()
+
+        if years and years > 0:
+            self.title = "Correspondent (Recent)"
+            days = 365 * years
+            correspondents = correspondents.filter(
+                documents__created__gte=datetime.now() - timedelta(days=days)
+            ).distinct()
+
+        return [(c.id, c.name) for c in correspondents]
+
+
 class CommonAdmin(admin.ModelAdmin):
    list_per_page = settings.PAPERLESS_LIST_PER_PAGE


 class CorrespondentAdmin(CommonAdmin):

-    list_display = ("name", "match", "matching_algorithm", "document_count")
+    list_display = (
+        "name",
+        "match",
+        "matching_algorithm",
+        "document_count",
+        "last_correspondence"
+    )
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

+    readonly_fields = ("slug",)
+
+    def get_queryset(self, request):
+        qs = super(CorrespondentAdmin, self).get_queryset(request)
+        qs = qs.annotate(
+            document_count=models.Count("documents"),
+            last_correspondence=models.Max("documents__created")
+        )
+        return qs
+
    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"
+
+    def last_correspondence(self, obj):
+        return obj.last_correspondence
+    last_correspondence.admin_order_field = "last_correspondence"


 class TagAdmin(CommonAdmin):

-    list_display = ("name", "colour", "match", "matching_algorithm",
-                    "document_count")
+    list_display = (
+        "name", "colour", "match", "matching_algorithm", "document_count")
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

+    readonly_fields = ("slug",)
+
+    class Media:
+        js = ("js/colours.js",)
+
+    def get_queryset(self, request):
+        qs = super(TagAdmin, self).get_queryset(request)
+        qs = qs.annotate(document_count=models.Count("documents"))
+        return qs
+
    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"


-class DocumentAdmin(CommonAdmin):
+class DocumentAdmin(DjangoQLSearchMixin, CommonAdmin):

    class Media:
        css = {
@@ -137,15 +174,32 @@ class DocumentAdmin(CommonAdmin):
        }

    search_fields = ("correspondent__name", "title", "content", "tags__name")
-    readonly_fields = ("added",)
+    readonly_fields = ("added", "file_type", "storage_type",)
    list_display = ("title", "created", "added", "thumbnail", "correspondent",
                    "tags_")
-    list_filter = ("tags", "correspondent", FinancialYearFilter,
-                   MonthListFilter)
+    list_filter = (
+        "tags",
+        ("correspondent", RecentCorrespondentFilter),
+        FinancialYearFilter
+    )
+
    filter_horizontal = ("tags",)

    ordering = ["-created", "correspondent"]

+    actions = [
+        add_tag_to_selected,
+        remove_tag_from_selected,
+        set_correspondent_on_selected,
+        remove_correspondent_from_selected
+    ]
+
+    date_hierarchy = "created"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.document_queue = []
+
    def has_add_permission(self, request):
        return False

@@ -153,6 +207,79 @@ class DocumentAdmin(CommonAdmin):
        return obj.created.date().strftime("%Y-%m-%d")
    created_.short_description = "Created"

+    def changelist_view(self, request, extra_context=None):
+
+        response = super().changelist_view(
+            request,
+            extra_context=extra_context
+        )
+
+        if request.method == "GET":
+            cl = self.get_changelist_instance(request)
+            self.document_queue = [doc.id for doc in cl.queryset]
+
+        return response
+
+    def change_view(self, request, object_id=None, form_url='',
+                    extra_context=None):
+
+        extra_context = extra_context or {}
+
+        if self.document_queue and object_id:
+            if int(object_id) in self.document_queue:
+                # There is a queue of documents
+                current_index = self.document_queue.index(int(object_id))
+                if current_index < len(self.document_queue) - 1:
+                    # ... and there are still documents in the queue
+                    extra_context["next_object"] = self.document_queue[
+                        current_index + 1
+                    ]
+
+        return super(DocumentAdmin, self).change_view(
+            request,
+            object_id,
+            form_url,
+            extra_context=extra_context,
+        )
+
+    def response_change(self, request, obj):
+
+        # This is mostly copied from ModelAdmin.response_change()
+        opts = self.model._meta
+        preserved_filters = self.get_preserved_filters(request)
+
+        msg_dict = {
+            "name": opts.verbose_name,
+            "obj": format_html(
+                '<a href="{}">{}</a>',
+                urlquote(request.path),
+                obj
+            ),
+        }
+        if "_saveandeditnext" in request.POST:
+            msg = format_html(
+                'The {name} "{obj}" was changed successfully. '
+                'Editing next object.',
+                **msg_dict
+            )
+            self.message_user(request, msg, messages.SUCCESS)
+            redirect_url = reverse(
+                "admin:{}_{}_change".format(opts.app_label, opts.model_name),
+                args=(request.POST["_next_object"],),
+                current_app=self.admin_site.name
+            )
+            redirect_url = add_preserved_filters(
+                {
+                    "preserved_filters": preserved_filters,
+                    "opts": opts
+                },
+                redirect_url
+            )
+            return HttpResponseRedirect(redirect_url)
+
+        return super().response_change(request, obj)
+
+    @mark_safe
    def thumbnail(self, obj):
        return self._html_tag(
            "a",
@@ -165,8 +292,8 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    thumbnail.allow_tags = True

+    @mark_safe
    def tags_(self, obj):
        r = ""
        for tag in obj.tags.all():
@@ -183,10 +310,11 @@ class DocumentAdmin(CommonAdmin):
                    )
                }
            )
-        return mark_safe(r)
-    tags_.allow_tags = True
+        return r

+    @mark_safe
    def document(self, obj):
+        # TODO: is this method even used anymore?
        return self._html_tag(
            "a",
            self._html_tag(
@@ -199,7 +327,6 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    document.allow_tags = True

    @staticmethod
    def _html_tag(kind, inside=None, **kwargs):
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -1,3 +1,4 @@
+from django.db import transaction
 import datetime
 import hashlib
 import logging
@@ -111,8 +112,11 @@ class Consumer:
                if not self.try_consume_file(file):
                    self._ignore.append((file, mtime))

+    @transaction.atomic
    def try_consume_file(self, file):
-        "Return True if file was consumed"
+        """
+        Return True if file was consumed
+        """

        if not re.match(FileInfo.REGEXES["title"], file):
            return False
@@ -145,7 +149,7 @@ class Consumer:
        parsed_document = parser_class(doc)

        try:
-            thumbnail = parsed_document.get_thumbnail()
+            thumbnail = parsed_document.get_optimised_thumbnail()
            date = parsed_document.get_date()
            document = self._store(
                parsed_document.get_text(),
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -1,8 +1,14 @@
-from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter
+from django_filters.rest_framework import BooleanFilter, FilterSet

 from .models import Correspondent, Document, Tag


+CHAR_KWARGS = (
+    "startswith", "endswith", "contains",
+    "istartswith", "iendswith", "icontains"
+)
+
+
 class CorrespondentFilterSet(FilterSet):

    class Meta:
@@ -31,34 +37,24 @@ class TagFilterSet(FilterSet):

 class DocumentFilterSet(FilterSet):

-    CHAR_KWARGS = {
-        "lookup_expr": (
-            "startswith",
-            "endswith",
-            "contains",
-            "istartswith",
-            "iendswith",
-            "icontains"
-        )
-    }
-
-    correspondent__name = CharFilter(
-        field_name="correspondent__name", **CHAR_KWARGS)
-    correspondent__slug = CharFilter(
-        field_name="correspondent__slug", **CHAR_KWARGS)
-    tags__name = CharFilter(
-        field_name="tags__name", **CHAR_KWARGS)
-    tags__slug = CharFilter(
-        field_name="tags__slug", **CHAR_KWARGS)
-    tags__empty = BooleanFilter(
-        field_name="tags", lookup_expr="isnull", distinct=True)
+    tags_empty = BooleanFilter(
+        label="Is tagged",
+        field_name="tags",
+        lookup_expr="isnull",
+        exclude=True
+    )

    class Meta:
        model = Document
        fields = {
-            "title": [
-                "startswith", "endswith", "contains",
-                "istartswith", "iendswith", "icontains"
-            ],
-            "content": ["contains", "icontains"],
+
+            "title": CHAR_KWARGS,
+            "content": ("contains", "icontains"),
+
+            "correspondent__name": CHAR_KWARGS,
+            "correspondent__slug": CHAR_KWARGS,
+
+            "tags__name": CHAR_KWARGS,
+            "tags__slug": CHAR_KWARGS,
+
        }
--- a/src/documents/mail.py
+++ b/src/documents/mail.py
@@ -216,7 +216,11 @@ class MailFetcher(Loggable):
        return r

    def _connect(self):
-        self._connection = imaplib.IMAP4_SSL(self._host, self._port)
+        try:
+            self._connection = imaplib.IMAP4_SSL(self._host, self._port)
+        except OSError as e:
+            msg = "Problem connecting to {}: {}".format(self._host, e.strerror)
+            raise MailFetcherError(msg)

    def _login(self):

--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -55,7 +55,12 @@ class Command(Renderable, BaseCommand):
        documents = Document.objects.all()
        document_map = {d.pk: d for d in documents}
        manifest = json.loads(serializers.serialize("json", documents))
-        for document_dict in manifest:
+
+        for index, document_dict in enumerate(manifest):
+
+            # Force output to unencrypted as that will be the current state.
+            # The importer will make the decision to encrypt or not.
+            manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501

            document = document_map[document_dict["pk"]]

--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -94,7 +94,7 @@ class Command(Renderable, BaseCommand):
            document_path = os.path.join(self.source, doc_file)
            thumbnail_path = os.path.join(self.source, thumb_file)

-            if document.storage_type == Document.STORAGE_TYPE_GPG:
+            if settings.PASSPHRASE:

                with open(document_path, "rb") as unencrypted:
                    with open(document.source_path, "wb") as encrypted:
@@ -112,3 +112,15 @@ class Command(Renderable, BaseCommand):

                shutil.copy(document_path, document.source_path)
                shutil.copy(thumbnail_path, document.thumbnail_path)
+
+        # Reset the storage type to whatever we've used while importing
+
+        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+
+        Document.objects.filter(
+            pk__in=[r["pk"] for r in self.manifest]
+        ).update(
+            storage_type=storage_type
+        )
--- a/src/documents/migrations/0014_document_checksum.py
+++ b/src/documents/migrations/0014_document_checksum.py
@@ -158,9 +158,4 @@ class Migration(migrations.Migration):
            name='modified',
            field=models.DateTimeField(auto_now=True, db_index=True),
        ),
-        migrations.AlterField(
-            model_name='document',
-            name='checksum',
-            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
-        ),
    ]
--- a/src/documents/migrations/0015_add_insensitive_to_match.py
+++ b/src/documents/migrations/0015_add_insensitive_to_match.py
@@ -12,6 +12,11 @@ class Migration(migrations.Migration):
    ]

    operations = [
+        migrations.AlterField(
+            model_name='document',
+            name='checksum',
+            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
+        ),
        migrations.AddField(
            model_name='correspondent',
            name='is_insensitive',
--- a/src/documents/migrations/0022_auto_20181007_1420.py
+++ b/src/documents/migrations/0022_auto_20181007_1420.py
@@ -0,0 +1,52 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+from django.utils.text import slugify
+
+
+def re_slug_all_the_things(apps, schema_editor):
+    """
+    Rewrite all slug values to make sure they're actually slugs before we brand
+    them as uneditable.
+    """
+
+    Tag = apps.get_model("documents", "Tag")
+    Correspondent = apps.get_model("documents", "Correspondent")
+
+    for klass in (Tag, Correspondent):
+        for instance in klass.objects.all():
+            klass.objects.filter(
+                pk=instance.pk
+            ).update(
+                slug=slugify(instance.slug)
+            )
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('documents', '0021_document_storage_type'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='tag',
+            options={'ordering': ('name',)},
+        ),
+        migrations.AlterField(
+            model_name='correspondent',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.AlterField(
+            model_name='document',
+            name='file_type',
+            field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='slug',
+            field=models.SlugField(blank=True, editable=False),
+        ),
+        migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
+    ]
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -11,6 +11,7 @@ from django.conf import settings
 from django.db import models
 from django.template.defaultfilters import slugify
 from django.utils import timezone
+from django.utils.text import slugify
 from fuzzywuzzy import fuzz

 from .managers import LogManager
@@ -37,7 +38,7 @@ class MatchingModel(models.Model):
    )

    name = models.CharField(max_length=128, unique=True)
-    slug = models.SlugField(blank=True)
+    slug = models.SlugField(blank=True, editable=False)

    match = models.CharField(max_length=256, blank=True)
    matching_algorithm = models.PositiveIntegerField(
@@ -147,9 +148,7 @@ class MatchingModel(models.Model):
    def save(self, *args, **kwargs):

        self.match = self.match.lower()
-
-        if not self.slug:
-            self.slug = slugify(self.name)
+        self.slug = slugify(self.name)

        models.Model.save(self, *args, **kwargs)

@@ -452,7 +451,7 @@ class FileInfo:
        r = []
        for t in tags.split(","):
            r.append(Tag.objects.get_or_create(
-                slug=t.lower(),
+                slug=slugify(t),
                defaults={"name": t}
            )[0])
        return tuple(r)
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -1,23 +1,31 @@
 import logging
-import shutil
-import tempfile
+import os
 import re
+import shutil
+import subprocess
+import tempfile

+import dateparser
 from django.conf import settings
+from django.utils import timezone

 # This regular expression will try to find dates in the document at
 # hand and will match the following formats:
 # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 DATE_REGEX = re.compile(
-    r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
-    r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
-    r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
-    r'\b([^\W\d_]{3,9} [0-9]{4})\b'
+    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
 )


@@ -32,6 +40,9 @@ class DocumentParser:
    """

    SCRATCH = settings.SCRATCH_DIR
+    DATE_ORDER = settings.DATE_ORDER
+    FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
+    OPTIPNG = settings.OPTIPNG_BINARY

    def __init__(self, path):
        self.document_path = path
@@ -45,6 +56,19 @@ class DocumentParser:
        """
        raise NotImplementedError()

+    def optimise_thumbnail(self, in_path):
+
+        out_path = os.path.join(self.tempdir, "optipng.png")
+
+        args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
+        if not subprocess.Popen(args).wait() == 0:
+            raise ParseError("Optipng failed at {}".format(args))
+
+        return out_path
+
+    def get_optimised_thumbnail(self):
+        return self.optimise_thumbnail(self.get_thumbnail())
+
    def get_text(self):
        """
        Returns the text from the document and only the text.
@@ -55,7 +79,82 @@ class DocumentParser:
        """
        Returns the date of the document.
        """
-        raise NotImplementedError()
+
+        def __parser(ds, date_order):
+            """
+            Call dateparser.parse with a particular date ordering
+            """
+            return dateparser.parse(
+                ds,
+                settings={
+                    "DATE_ORDER": date_order,
+                    "PREFER_DAY_OF_MONTH": "first",
+                    "RETURN_AS_TIMEZONE_AWARE":
+                    True
+                }
+            )
+
+        date = None
+        date_string = None
+
+        next_year = timezone.now().year + 5  # Arbitrary 5 year future limit
+        title = os.path.basename(self.document_path)
+
+        # if filename date parsing is enabled, search there first:
+        if self.FILENAME_DATE_ORDER:
+            self.log("info", "Checking document title for date")
+            for m in re.finditer(DATE_REGEX, title):
+                date_string = m.group(0)
+
+                try:
+                    date = __parser(date_string, self.FILENAME_DATE_ORDER)
+                except TypeError:
+                    # Skip all matches that do not parse to a proper date
+                    continue
+
+                if date is not None and next_year > date.year > 1900:
+                    self.log(
+                        "info",
+                        "Detected document date {} based on string {} "
+                        "from document title"
+                        "".format(date.isoformat(), date_string)
+                    )
+                    return date
+
+        try:
+            # getting text after checking filename will save time if only
+            # looking at the filename instead of the whole text
+            text = self.get_text()
+        except ParseError:
+            return None
+
+        # Iterate through all regex matches in text and try to parse the date
+        for m in re.finditer(DATE_REGEX, text):
+            date_string = m.group(0)
+
+            try:
+                date = __parser(date_string, self.DATE_ORDER)
+            except TypeError:
+                # Skip all matches that do not parse to a proper date
+                continue
+
+            if date is not None and next_year > date.year > 1900:
+                break
+            else:
+                date = None
+
+        if date is not None:
+            self.log(
+                "info",
+                "Detected document date {} based on string {}".format(
+                    date.isoformat(),
+                    date_string
+                )
+            )
+        else:
+            self.log("info", "Unable to detect date for document")
+
+        return date

    def log(self, level, message):
        getattr(self.logger, level)(message, extra={
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -7,7 +7,14 @@ class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):

    class Meta:
        model = Correspondent
-        fields = ("id", "slug", "name")
+        fields = (
+            "id",
+            "slug",
+            "name",
+            "match",
+            "matching_algorithm",
+            "is_insensitive"
+        )


 class TagSerializer(serializers.HyperlinkedModelSerializer):
@@ -15,7 +22,14 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
    class Meta:
        model = Tag
        fields = (
-            "id", "slug", "name", "colour", "match", "matching_algorithm")
+            "id",
+            "slug",
+            "name",
+            "colour",
+            "match",
+            "matching_algorithm",
+            "is_insensitive"
+        )


 class CorrespondentField(serializers.HyperlinkedRelatedField):
@@ -46,6 +60,7 @@ class DocumentSerializer(serializers.ModelSerializer):
            "checksum",
            "created",
            "modified",
+            "added",
            "file_name",
            "download_url",
            "thumbnail_url",
--- a/src/documents/static/js/colours.js
+++ b/src/documents/static/js/colours.js
@@ -0,0 +1,66 @@
+// The following jQuery snippet will add a small square next to the selection
+// drop-down on the `Add tag` page that will update to show the selected tag
+// color as the drop-down value is changed.
+
+django.jQuery(document).ready(function(){
+
+  if (django.jQuery("#id_colour").length) {
+
+    let colour;
+    let colour_num;
+
+    colour_num = django.jQuery("#id_colour").val() - 1;
+    colour = django.jQuery('#id_colour')[0][colour_num].text;
+    django.jQuery('#id_colour').after('<div class="colour_square"></div>');
+
+    django.jQuery('.colour_square').css({
+      'float': 'left',
+      'width': '20px',
+      'height': '20px',
+      'margin': '5px',
+      'border': '1px solid rgba(0, 0, 0, .2)',
+      'background': colour
+    });
+
+    django.jQuery('#id_colour').change(function () {
+      colour_num = django.jQuery("#id_colour").val() - 1;
+      colour = django.jQuery('#id_colour')[0][colour_num].text;
+      django.jQuery('.colour_square').css({'background': colour});
+    });
+
+  } else if (django.jQuery("select[id*='colour']").length) {
+
+    django.jQuery('select[id*="-colour"]').each(function (index, element) {
+      let id;
+      let loop_colour_num;
+      let loop_colour;
+
+      id = "colour_square_" + index;
+      django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
+
+      loop_colour_num = django.jQuery(element).val() - 1;
+      loop_colour = django.jQuery(element)[0][loop_colour_num].text;
+
+      django.jQuery("<style type='text/css'>\
+                        .colour_square{ \
+                            float: left; \
+                            width: 20px; \
+                            height: 20px; \
+                            margin: 5px; \
+                            border: 1px solid rgba(0,0,0,.2); \
+                        } </style>").appendTo("head");
+      django.jQuery('#' + id).css({'background': loop_colour});
+
+      console.log(id, loop_colour_num, loop_colour);
+
+      django.jQuery(element).change(function () {
+        loop_colour_num = django.jQuery(element).val() - 1;
+        loop_colour = django.jQuery(element)[0][loop_colour_num].text;
+        django.jQuery('#' + id).css({'background': loop_colour});
+        console.log('#' + id, loop_colour)
+      });
+    })
+
+  }
+
+});
--- a/src/documents/templates/admin/base_site.html
+++ b/src/documents/templates/admin/base_site.html
@@ -3,10 +3,63 @@
 {# NOTE: This should probably be extending base.html.  See CSS comment below details. #}


+{% load static %}
 {% load custom_css from customisation %}
 {% load custom_js from customisation %}


+{% block extrahead %}
+	<link rel="icon" type="image/x-icon" href="{% url 'favicon' %}" />
+	<style>
+		#header {
+			background-color: #90a9b7;
+			line-height: inherit;
+			height: auto;
+		}
+		#branding h1 {
+			font-weight: inherit;
+			font-size: inherit;
+		}
+		.button,
+		.button:active,
+		.button:focus,
+		.button:hover,
+		a.button,
+		.submit-row input,
+		input[type="submit"],
+		input[type="submit"]:active,
+		input[type="submit"]:focus,
+		input[type="submit"]:hover,
+		input[type="button"],
+		input[type="button"]:active,
+		input[type="button"]:focus,
+		input[type="button"]:hover {
+			background-color: #074f57;
+		}
+		.module h2,
+		.module caption,
+		.inline-group h2 {
+			background-color: #90a9b7;
+		}
+		div.breadcrumbs {
+			background-color: #077187;
+		}
+		.module h2,
+		.module caption,
+		.inline-group h2 {
+			background-color: #077187;
+		}
+	</style>
+{% endblock %}
+
+
+{% block branding %}
+<h1 id="site-name">
+	<a href="{% url 'admin:index' %}"><img src="{% static 'paperless/img/logo-light.png' %}" alt="Paperless" /></a>
+</h1>
+{% endblock %}
+
+
 {% block blockbots %}

 	{% comment %}
--- a/src/documents/templates/admin/documents/document/change_form.html
+++ b/src/documents/templates/admin/documents/document/change_form.html
@@ -1,5 +1,21 @@
 {% extends 'admin/change_form.html' %}

+{% block content %}
+
+{{ block.super }}
+
+{% if next_object %}
+	<script type="text/javascript">//<![CDATA[
+		(function($){
+			$('<input type="submit" value="Save and edit next" name="_saveandeditnext" />')
+			.prependTo('div.submit-row');
+			$('<input type="hidden" value="{{next_object}}" name="_next_object" />')
+			.prependTo('div.submit-row');
+		})(django.jQuery);
+	//]]></script>
+{% endif %}
+
+{% endblock content %}

 {% block footer %}

@@ -10,4 +26,4 @@
 		django.jQuery(".field-created input").first().attr("type", "date")
 	</script>

-{% endblock footer %}
+{% endblock footer %}
--- a/src/documents/templates/admin/documents/document/change_list_results.html
+++ b/src/documents/templates/admin/documents/document/change_list_results.html
@@ -28,7 +28,7 @@
  }
  .result .header {
    padding: 5px;
-    background-color: #79AEC8;
+    background-color: #90a9b7;
    position: relative;
  }
  .result .header .checkbox {
--- a/src/documents/templates/admin/documents/document/select_object.html
+++ b/src/documents/templates/admin/documents/document/select_object.html
@@ -0,0 +1,50 @@
+{% extends "admin/base_site.html" %}
+
+
+{% load i18n l10n admin_urls static %}
+{% load staticfiles %}
+
+
+{% block extrahead %}
+	{{ block.super }}
+	{{ media }}
+	<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
+{% endblock %}
+
+
+{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
+
+
+{% block breadcrumbs %}
+	<div class="breadcrumbs">
+		<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
+		&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
+		&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
+		&rsaquo; {{ title }}
+	</div>
+{% endblock %}
+
+{% block content %}
+	<p>Please select the {{itemname}}.</p>
+	<form method="post">{% csrf_token %}
+		<div>
+			{% for obj in queryset %}
+			<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
+			{% endfor %}
+			<p>
+				<select name="obj_id">
+					{% for obj in objects %}
+					<option value="{{ obj.id }}">{{ obj.name }}</option>
+					{% endfor %}
+				</select>
+			</p>
+
+			<input type="hidden" name="action" value="{{ action }}"/>
+			<input type="hidden" name="post" value="yes" />
+			<p>
+				<input type="submit" value="{% trans 'Confirm' %}" />
+				<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
+			</p>
+		</div>
+	</form>
+{% endblock %}
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -2,6 +2,7 @@ from django.http import HttpResponse, HttpResponseBadRequest
 from django.views.generic import DetailView, FormView, TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
 from django.conf import settings
+from django.utils import cache

 from paperless.db import GnuPG
 from paperless.mixins import SessionOrBasicAuthMixin
@@ -56,10 +57,12 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
        }

        if self.kwargs["kind"] == "thumb":
-            return HttpResponse(
+            response = HttpResponse(
                self._get_raw_data(self.object.thumbnail_file),
                content_type=content_types[Document.TYPE_PNG]
            )
+            cache.patch_cache_control(response, max_age=31536000, private=True)
+            return response

        response = HttpResponse(
            self._get_raw_data(self.object.source_file),
@@ -130,7 +133,7 @@ class DocumentViewSet(RetrieveModelMixin,
    filter_class = DocumentFilterSet
    search_fields = ("title", "correspondent__name", "content")
    ordering_fields = (
-        "id", "title", "correspondent__name", "created", "modified")
+        "id", "title", "correspondent__name", "created", "modified", "added")


 class LogViewSet(ReadOnlyModelViewSet):
--- a/src/paperless/checks.py
+++ b/src/paperless/checks.py
@@ -76,7 +76,12 @@ def binaries_check(app_configs, **kwargs):
    error = "Paperless can't find {}. Without it, consumption is impossible."
    hint = "Either it's not in your ${PATH} or it's not installed."

-    binaries = (settings.CONVERT_BINARY, settings.UNPAPER_BINARY, "tesseract")
+    binaries = (
+        settings.CONVERT_BINARY,
+        settings.OPTIPNG_BINARY,
+        settings.UNPAPER_BINARY,
+        "tesseract"
+    )

    check_messages = []
    for binary in binaries:
--- a/src/paperless/models.py
+++ b/src/paperless/models.py
@@ -1,15 +1,20 @@
+from django.contrib.auth.models import User as DjangoUser
+
+
 class User:
    """
-      This is a dummy django User used with our middleware to disable
-      login authentication if that is configured in paperless.conf
+    This is a dummy django User used with our middleware to disable
+    login authentication if that is configured in paperless.conf
    """
+
    is_superuser = True
    is_active = True
    is_staff = True
    is_authenticated = True

-    # Must be -1 to avoid colliding with real user ID's (which start at 1)
-    id = -1
+    @property
+    def id(self):
+        return DjangoUser.objects.order_by("pk").first().pk

    @property
    def pk(self):
@@ -17,9 +22,9 @@ class User:


 """
-  NOTE: These are here as a hack instead of being in the User definition
-  above due to the way pycodestyle handles lamdbdas.
-  See https://github.com/PyCQA/pycodestyle/issues/379 for more.
+NOTE: These are here as a hack instead of being in the User definition
+NOTE: above due to the way pycodestyle handles lamdbdas.
+NOTE: See https://github.com/PyCQA/pycodestyle/issues/379 for more.
 """

 User.has_module_perms = lambda *_: True
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -22,12 +22,12 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
    load_dotenv("/usr/local/etc/paperless.conf")


-def __get_boolean(key):
+def __get_boolean(key, default="NO"):
    """
    Return a boolean value based on whatever the user has supplied in the
    environment based on whether the value "looks like" it's True or not.
    """
-    return bool(os.getenv(key, "NO").lower() in ("yes", "y", "1", "t", "true"))
+    return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))


 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
@@ -47,7 +47,7 @@ SECRET_KEY = os.getenv(


 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
+DEBUG = __get_boolean("PAPERLESS_DEBUG", "YES")

 LOGIN_URL = "admin:login"

@@ -72,6 +72,7 @@ INSTALLED_APPS = [
    "corsheaders",
    "django_extensions",

+    "paperless",
    "documents.apps.DocumentsConfig",
    "reminders.apps.RemindersConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
@@ -82,6 +83,7 @@ INSTALLED_APPS = [
    "rest_framework",
    "crispy_forms",
    "django_filters",
+    "djangoql",

 ]

@@ -144,13 +146,18 @@ DATABASES = {
    }
 }

-if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"):
+if os.getenv("PAPERLESS_DBUSER"):
    DATABASES["default"] = {
        "ENGINE": "django.db.backends.postgresql_psycopg2",
        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
        "USER": os.getenv("PAPERLESS_DBUSER"),
-        "PASSWORD": os.getenv("PAPERLESS_DBPASS")
    }
+    if os.getenv("PAPERLESS_DBPASS"):
+        DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
+    if os.getenv("PAPERLESS_DBHOST"):
+        DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
+    if os.getenv("PAPERLESS_DBPORT"):
+        DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")


 # Password validation
@@ -198,6 +205,16 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
 MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")


+# Other
+
+# Disable Django's artificial limit on the number of form fields to submit at
+# once.  This is a protection against overloading the server, but since this is
+# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
+# of log entries outweight the benefits of such a safeguard.
+
+DATA_UPLOAD_MAX_NUMBER_FIELDS = None
+
+
 # Paperless-specific stuff
 # You shouldn't have to edit any of these values.  Rather, you can set these
 # values in /etc/paperless.conf instead.
@@ -246,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
 CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")

+# OptiPNG
+OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
+
 # Unpaper
 UNPAPER_BINARY = os.getenv("PAPERLESS_UNPAPER_BINARY", "unpaper")

@@ -292,3 +312,10 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")

 # Specify the default date order (for autodetected dates)
 DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
+FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
+
+# Specify for how many years a correspondent is considered recent. Recent
+# correspondents will be shown in a separate "Recent correspondents" filter as
+# well. Set to 0 to disable this filter.
+PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv(
+    "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0))
--- a/src/paperless/static/paperless/img/favicon.ico
+++ b/src/paperless/static/paperless/img/favicon.ico
--- a/src/paperless/static/paperless/img/logo-dark.png
+++ b/src/paperless/static/paperless/img/logo-dark.png
--- a/src/paperless/static/paperless/img/logo-light.png
+++ b/src/paperless/static/paperless/img/logo-light.png
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -6,6 +6,7 @@ from django.views.decorators.csrf import csrf_exempt
 from django.views.generic import RedirectView
 from rest_framework.routers import DefaultRouter

+from paperless.views import FaviconView
 from documents.views import (
    CorrespondentViewSet,
    DocumentViewSet,
@@ -44,6 +45,9 @@ urlpatterns = [
    # File uploads
    url(r"^push$", csrf_exempt(PushView.as_view()), name="push"),

+    # Favicon
+    url(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
+
    # The Django admin
    url(r"admin/", admin.site.urls),

--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (2, 3, 0)
+__version__ = (2, 6, 1)
--- a/src/paperless/views.py
+++ b/src/paperless/views.py
@@ -1,3 +1,7 @@
+import os
+
+from django.http import HttpResponse
+from django.views.generic import View
 from rest_framework.pagination import PageNumberPagination


@@ -5,3 +9,17 @@ class StandardPagination(PageNumberPagination):
    page_size = 25
    page_size_query_param = "page-size"
    max_page_size = 100000
+
+
+class FaviconView(View):
+
+    def get(self, request, *args, **kwargs):
+        favicon = os.path.join(
+            os.path.dirname(__file__),
+            "static",
+            "paperless",
+            "img",
+            "favicon.ico"
+        )
+        with open(favicon, "rb") as f:
+            return HttpResponse(f, content_type="image/x-icon")
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -4,7 +4,6 @@ import re
 import subprocess
 from multiprocessing.pool import Pool

-import dateparser
 import langdetect
 import pyocr
 from django.conf import settings
@@ -14,7 +13,7 @@ from pyocr.libtesseract.tesseract_raw import \
 from pyocr.tesseract import TesseractError

 import pdftotext
-from documents.parsers import DocumentParser, ParseError, DATE_REGEX
+from documents.parsers import DocumentParser, ParseError

 from .languages import ISO639

@@ -33,7 +32,6 @@ class RasterisedDocumentParser(DocumentParser):
    DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
    UNPAPER = settings.UNPAPER_BINARY
-    DATE_ORDER = settings.DATE_ORDER
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    OCR_ALWAYS = settings.OCR_ALWAYS

@@ -46,15 +44,18 @@ class RasterisedDocumentParser(DocumentParser):
        The thumbnail of a PDF is just a 500px wide image of the first page.
        """

+        out_path = os.path.join(self.tempdir, "convert.png")
+
+        # Run convert to get a decent thumbnail
        run_convert(
            self.CONVERT,
            "-scale", "500x5000",
            "-alpha", "remove",
            "{}[0]".format(self.document_path),
-            os.path.join(self.tempdir, "convert.png")
+            out_path
        )

-        return os.path.join(self.tempdir, "convert.png")
+        return out_path

    def _is_ocred(self):

@@ -152,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError("Language detection failed")
+            error_msg = ("Language detection failed. Set "
+                         "PAPERLESS_FORGIVING_OCR in config file to continue "
+                         "anyway.")
+            raise OCRError(error_msg)

        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
@@ -172,8 +176,8 @@ class RasterisedDocumentParser(DocumentParser):
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
            raise OCRError(
-                "The guessed language is not available in this instance of "
-                "Tesseract."
+                "The guessed language ({}) is not available in this instance "
+                "of Tesseract.".format(guessed_language)
            )

    def _ocr(self, imgs, lang):
@@ -202,40 +206,6 @@ class RasterisedDocumentParser(DocumentParser):
        text += self._ocr(imgs[middle + 1:], self.DEFAULT_OCR_LANGUAGE)
        return text

-    def get_date(self):
-        date = None
-        datestring = None
-
-        try:
-            text = self.get_text()
-        except ParseError as e:
-            return None
-
-        # Iterate through all regex matches and try to parse the date
-        for m in re.finditer(DATE_REGEX, text):
-            datestring = m.group(0)
-
-            try:
-                date = dateparser.parse(
-                           datestring,
-                           settings={'DATE_ORDER': self.DATE_ORDER,
-                                     'PREFER_DAY_OF_MONTH': 'first',
-                                     'RETURN_AS_TIMEZONE_AWARE': True})
-            except TypeError:
-                # Skip all matches that do not parse to a proper date
-                continue
-
-            if date is not None:
-                break
-
-        if date is not None:
-            self.log("info", "Detected document date " + date.isoformat() +
-                             " based on string " + datestring)
-        else:
-            self.log("info", "Unable to detect date for document")
-
-        return date
-

 def run_convert(*args):

@@ -251,7 +221,8 @@ def run_convert(*args):

 def run_unpaper(args):
    unpaper, pnm = args
-    command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
+    command_args = (unpaper, "--overwrite", pnm,
+                    pnm.replace(".pnm", ".unpaper.pnm"))
    if not subprocess.Popen(command_args).wait() == 0:
        raise ParseError("Unpaper failed at {}".format(command_args))

--- a/src/paperless_tesseract/tests/samples/tests_date_1.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_1.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_1.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_1.png
--- a/src/paperless_tesseract/tests/samples/tests_date_2.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_2.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_2.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_2.png
--- a/src/paperless_tesseract/tests/samples/tests_date_3.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_3.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.png
--- a/src/paperless_tesseract/tests/samples/tests_date_4.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_4.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.png
--- a/src/paperless_tesseract/tests/samples/tests_date_5.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_5.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_5.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_5.png
--- a/src/paperless_tesseract/tests/samples/tests_date_6.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_6.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_6.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_6.png
--- a/src/paperless_tesseract/tests/samples/tests_date_7.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_7.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_8.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_8.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_9.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_9.pdf
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -8,6 +8,7 @@ from dateutil import tz
 from django.test import TestCase

 from ..parsers import RasterisedDocumentParser
+from django.conf import settings


 class TestDate(TestCase):
@@ -15,73 +16,67 @@ class TestDate(TestCase):
    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
    SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])

+    MOCK_SCRATCH = "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH"  # NOQA: E501
+
    def setUp(self):
        os.makedirs(self.SCRATCH, exist_ok=True)

    def tearDown(self):
        shutil.rmtree(self.SCRATCH)

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_1(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 130218 lorem ipsum"
        self.assertEqual(document.get_date(), None)

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_2(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 2018 lorem ipsum"
        self.assertEqual(document.get_date(), None)

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_3(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 20180213 lorem ipsum"
        self.assertEqual(document.get_date(), None)

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_4(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 13.02.2018 lorem ipsum"
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2018, 2, 13, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_5(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = (
-            "lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
+            "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
+            "ipsum"
+        )
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2018, 2, 13, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_6(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
@@ -98,10 +93,7 @@ class TestDate(TestCase):
        )
        self.assertEqual(document.get_date(), None)

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_7(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
@@ -110,277 +102,83 @@ class TestDate(TestCase):
            "März 2019\n"
            "lorem ipsum"
        )
+        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
+            date,
+            datetime.datetime(
+                2019, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_8(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
-                          "Wohnort\n"
-                          "3100\n"
-                          "IBAN\n"
-                          "AT87 4534\n"
-                          "1234\n"
-                          "1234 5678\n"
-                          "BIC\n"
-                          "lorem ipsum\n"
-                          "März 2020")
-        self.assertEqual(document.get_date(),
-                         datetime.datetime(2020, 3, 1, 0, 0,
-                                           tzinfo=tz.tzutc()))
+        document._text = (
+            "lorem ipsum\n"
+            "Wohnort\n"
+            "3100\n"
+            "IBAN\n"
+            "AT87 4534\n"
+            "1234\n"
+            "1234 5678\n"
+            "BIC\n"
+            "lorem ipsum\n"
+            "März 2020"
+        )
+        self.assertEqual(
+            document.get_date(),
+            datetime.datetime(
+                2020, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
+        )

-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
    def test_date_format_9(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
-                          "27. Nullmonth 2020\n"
-                          "März 2020\n"
-                          "lorem ipsum")
-        self.assertEqual(document.get_date(),
-                         datetime.datetime(2020, 3, 1, 0, 0,
-                                           tzinfo=tz.tzutc()))
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_1_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
+        document._text = (
+            "lorem ipsum\n"
+            "27. Nullmonth 2020\n"
+            "März 2020\n"
+            "lorem ipsum"
+        )
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
+                2020, 3, 1, 0, 0,
+                tzinfo=tz.gettz(settings.TIME_ZONE)
+            )
        )

    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-0590 00:00:00"
    )
-    def test_get_text_1_png(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
-        document = RasterisedDocumentParser(input_file)
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
+    def test_crazy_date_past(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
-        )
+        self.assertIsNone(document.get_date())

    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-2350 00:00:00"
    )
-    def test_get_text_2_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
-        document = RasterisedDocumentParser(input_file)
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
+    def test_crazy_date_future(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
-        )
+        self.assertIsNone(document.get_date())

    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
+        "paperless_tesseract.parsers.RasterisedDocumentParser.get_text",
+        return_value="01-07-0590 00:00:00"
    )
-    def test_get_text_2_png(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
-        document = RasterisedDocumentParser(input_file)
+    @mock.patch(MOCK_SCRATCH, SCRATCH)
+    def test_crazy_date_past(self, *args):
+        document = RasterisedDocumentParser("/dev/null")
        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_3_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_3_png(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_4_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_4_png(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_5_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_5_png(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_6_pdf_us(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        document.DATE_ORDER = "MDY"
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_6_png_us(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        document.DATE_ORDER = "MDY"
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_6_pdf_eu(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(document.get_date(), None)
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_6_png_eu(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), False)
-        self.assertEqual(document.get_date(), None)
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_7_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_8_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
-        )
-
-    @mock.patch(
-        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
-        SCRATCH
-    )
-    def test_get_text_9_pdf(self):
-        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
-        document = RasterisedDocumentParser(input_file)
-        document.get_text()
-        self.assertEqual(document._is_ocred(), True)
-        self.assertEqual(
-            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
-        )
+        self.assertIsNone(document.get_date())
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -1,11 +1,9 @@
 import os
-import re
 import subprocess

-import dateparser
 from django.conf import settings

-from documents.parsers import DocumentParser, ParseError, DATE_REGEX
+from documents.parsers import DocumentParser, ParseError


 class TextDocumentParser(DocumentParser):
@@ -16,7 +14,6 @@ class TextDocumentParser(DocumentParser):
    CONVERT = settings.CONVERT_BINARY
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
    UNPAPER = settings.UNPAPER_BINARY
-    DATE_ORDER = settings.DATE_ORDER
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
    OCR_ALWAYS = settings.OCR_ALWAYS

@@ -26,7 +23,7 @@ class TextDocumentParser(DocumentParser):

    def get_thumbnail(self):
        """
-        The thumbnail of a txt is just a 500px wide image of the text
+        The thumbnail of a text file is just a 500px wide image of the text
        rendered onto a letter-sized page.
        """
        # The below is heavily cribbed from https://askubuntu.com/a/590951
@@ -35,7 +32,7 @@ class TextDocumentParser(DocumentParser):
        text_color = "black"  # text color
        psize = [500, 647]  # icon size
        n_lines = 50  # number of lines to show
-        output_file = os.path.join(self.tempdir, "convert-txt.png")
+        out_path = os.path.join(self.tempdir, "convert.png")

        temp_bg = os.path.join(self.tempdir, "bg.png")
        temp_txlayer = os.path.join(self.tempdir, "tx.png")
@@ -46,9 +43,13 @@ class TextDocumentParser(DocumentParser):
            work_size = ",".join([str(n - 1) for n in psize])
            r = str(round(psize[0] / 10))
            rounded = ",".join([r, r])
-            run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
-                        '"fill ', bg_color, ' roundrectangle 0,0,',
-                        work_size, ",", rounded, '" ', temp_bg)
+            run_command(
+                self.CONVERT,
+                "-size ", picsize,
+                ' xc:none -draw ',
+                '"fill ', bg_color, ' roundrectangle 0,0,', work_size, ",", rounded, '" ',  # NOQA: E501
+                temp_bg
+            )

        def read_text():
            with open(self.document_path, 'r') as src:
@@ -57,22 +58,29 @@ class TextDocumentParser(DocumentParser):
                return text.replace('"', "'")

        def create_txlayer():
-            run_command(self.CONVERT,
-                        "-background none",
-                        "-fill",
-                        text_color,
-                        "-pointsize", "12",
-                        "-border 4 -bordercolor none",
-                        "-size ", txsize,
-                        ' caption:"', read_text(), '" ',
-                        temp_txlayer)
+            run_command(
+                self.CONVERT,
+                "-background none",
+                "-fill",
+                text_color,
+                "-pointsize", "12",
+                "-border 4 -bordercolor none",
+                "-size ", txsize,
+                ' caption:"', read_text(), '" ',
+                temp_txlayer
+            )

        create_txlayer()
        create_bg()
-        run_command(self.CONVERT, temp_bg, temp_txlayer,
-                    "-background None -layers merge ", output_file)
+        run_command(
+            self.CONVERT,
+            temp_bg,
+            temp_txlayer,
+            "-background None -layers merge ",
+            out_path
+        )

-        return output_file
+        return out_path

    def get_text(self):

@@ -84,40 +92,6 @@ class TextDocumentParser(DocumentParser):

        return self._text

-    def get_date(self):
-        date = None
-        datestring = None
-
-        try:
-            text = self.get_text()
-        except ParseError as e:
-            return None
-
-        # Iterate through all regex matches and try to parse the date
-        for m in re.finditer(DATE_REGEX, text):
-            datestring = m.group(0)
-
-            try:
-                date = dateparser.parse(
-                           datestring,
-                           settings={'DATE_ORDER': self.DATE_ORDER,
-                                     'PREFER_DAY_OF_MONTH': 'first',
-                                     'RETURN_AS_TIMEZONE_AWARE': True})
-            except TypeError:
-                # Skip all matches that do not parse to a proper date
-                continue
-
-            if date is not None:
-                break
-
-        if date is not None:
-            self.log("info", "Detected document date " + date.isoformat() +
-                             " based on string " + datestring)
-        else:
-            self.log("info", "Unable to detect date for document")
-
-        return date
-

 def run_command(*args):
    environment = os.environ.copy()
--- a/src/reminders/migrations/0002_auto_20181007_1420.py
+++ b/src/reminders/migrations/0002_auto_20181007_1420.py
@@ -0,0 +1,19 @@
+# Generated by Django 2.0.8 on 2018-10-07 14:20
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('reminders', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='reminder',
+            name='document',
+            field=models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='documents.Document'),
+        ),
+    ]
--- a/src/reminders/models.py
+++ b/src/reminders/models.py
@@ -4,7 +4,6 @@ from django.db import models
 class Reminder(models.Model):

    document = models.ForeignKey(
-        "documents.Document", on_delete=models.PROTECT
-        )
+        "documents.Document", on_delete=models.PROTECT)
    date = models.DateTimeField()
    note = models.TextField(blank=True)
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -5,7 +5,7 @@

 [tox]
 skipsdist = True
-envlist = py34, py35, py36, pycodestyle, doc
+envlist = py34, py35, py36, py37, pycodestyle, doc

 [testenv]
 commands = pytest
@@ -17,6 +17,5 @@ deps=pycodestyle

 [testenv:doc]
 deps =
-  -r{toxinidir}/../requirements.txt
-  sphinx
+  -r {toxinidir}/../requirements.txt
 commands=sphinx-build -b html ../docs ../docs/_build -W
Author	SHA1	Message	Date
Daniel Quinn	94c2950afe	Ignore sqlite3-journal files too	2019-01-27 13:48:05 +00:00
Daniel Quinn	9f56bf9992	Fix missing links	2019-01-27 13:47:40 +00:00
Daniel Quinn	6df35e4cb7	Merge branch 'sbrunner-dql'	2019-01-27 13:42:58 +00:00
Daniel Quinn	b4b7d167d1	Update dependencies & add djangoql	2019-01-27 13:39:56 +00:00
Daniel Quinn	4936fad542	Merge pull request #488 from sbrunner/no-tab Tabs are not allowed in Yaml files	2019-01-27 13:06:46 +00:00
Daniel Quinn	3c78105fd7	Align example conf with real-world defaults	2019-01-27 13:05:56 +00:00
Daniel Quinn	a58a7ce0f7	Move note about DEBUG up into 3.	2019-01-27 13:03:55 +00:00
Daniel Quinn	792aeee11e	Merge pull request #487 from bmsleight/issue486 Add note runserver PAPERLESS_DEBUG='true'	2019-01-27 13:00:37 +00:00
Daniel Quinn	5588e86855	Merge pull request #484 from cribbstechnologies/patch-1 adding information about NFS mounts and inotify	2019-01-27 12:54:03 +00:00
Daniel Quinn	97f1e4ab16	pep8	2019-01-27 12:52:15 +00:00
Daniel Quinn	e4dece8e53	Merge pull request #483 from tsia/patch-1 added fields to the correspondent and tag REST API	2019-01-27 12:50:42 +00:00
Daniel Quinn	c5c204f605	Merge pull request #481 from CkuT/cache-control Add Cache-Control header for thumbnails	2019-01-27 12:46:32 +00:00
Daniel Quinn	611ec6840b	Merge pull request #489 from sbrunner/docker-psycopg2-clean Install psycopg2 in the Docker container	2019-01-27 12:25:32 +00:00
Daniel Quinn	2cd077d12d	Merge pull request #475 from syntonym/master Catch IMAP connection errors	2019-01-27 12:19:18 +00:00
Stéphane Brunner	4efb153e86	Add and configure DjangoQL	2019-01-26 22:15:28 +01:00
Stéphane Brunner	25e953bbf0	Install psycopg2 in the Docker container	2019-01-26 14:33:51 +01:00
Stéphane Brunner	0509d5a3d2	Tabs are not allowed in Yaml files	2019-01-26 14:23:11 +01:00
bmsleight	5e674f17af	Add note runserver PAPERLESS_DEBUG='true'	2019-01-26 13:15:45 +00:00
Brian Cribbs	7c7a814096	adding information about NFS mounts and inotify	2019-01-22 15:18:14 -05:00
tsia	43e71cfcaa	added fields to the correspondent and tag REST API	2019-01-22 20:51:20 +01:00
CkuT	79868930f1	Add Cache-Control header for thumbnails This drastically optimizes admin interface loading by telling the browser to cache thumbnails. The max-age recommendation is 1 year according to rfc2616 Closes #411	2019-01-21 20:59:40 +01:00
Daniel Quinn	0256dcbe32	Merge pull request #478 from wiwie/patch-1 Update requirements.rst	2019-01-21 00:32:01 +00:00
Christian Wiwie	29db177ce2	Update requirements.rst	2019-01-18 20:33:35 +01:00
syntonym	5c1edf78ce	Catches OSError on IMAP connection error When something goes wrong with the imaplib.IMAP4_SSL connection (like the host is temporarely down or the DNS does not resolve) it generates an OSError which is currently not catched and handled. Now OSErrors are translated to MailFetcherErrors which get logged and the IMAP connection is retried in the next IMAP check. Fixes #474	2019-01-14 19:08:59 +01:00
Daniel Quinn	60e8990a7b	Update to include #471	2019-01-04 11:38:57 +00:00
Daniel Quinn	75a79ac204	Merge pull request #471 from khrise/added-missing-column-to-rest-api Exposing documents' "added" timestamp via Rest API.	2019-01-04 11:36:38 +00:00
khrise	0c47907dda	Exposing documents' "added" timestamp via Rest API.	2019-01-03 20:23:34 +00:00
Daniel Quinn	cea8332038	Merge pull request #468 from ddddavidmartin/document_ocr_always_setting Reference PAPERLESS_OCR_ALWAYS in example config file.	2018-12-31 14:30:58 +00:00
Daniel Quinn	5982cb693a	Include notes for #466	2018-12-30 18:30:23 +00:00
Daniel Quinn	73a02d40c4	Merge pull request #453 from jonaswinkler/patch-1 Update 0022_auto_20181007_1420.py	2018-12-30 18:27:27 +00:00
Daniel Quinn	b541765817	Merge pull request #464 from colinfrei/patch-2 remove unnecessary character	2018-12-30 18:26:12 +00:00
Daniel Quinn	28ffd1ec6b	Merge pull request #466 from colinfrei/patch-3 Set consume directory for webserver too	2018-12-30 18:23:33 +00:00
Daniel Quinn	5760aa0894	Merge pull request #467 from danielquinn/feature/update-travis Fix the tests so they finally start passing everywhere they should.	2018-12-30 18:22:46 +00:00
Daniel Quinn	562e5f644d	Update changelog with test changes	2018-12-30 18:19:05 +00:00
Daniel Quinn	5ab2009ebf	Tweak Travis to include Python3.7 and pipenv	2018-12-30 18:18:37 +00:00
Daniel Quinn	637b0d4cc2	Drop problematic tests Some tests had differing outcomes depending on the version of Tesseract installed on the test system. This lead to a bunch of false test failures, which lead to people (including me) just ignoring the Travis results. This commit removes those tests, and while it reduces our coverage, at least the results are predictable.	2018-12-30 17:32:45 +00:00
Daniel Quinn	4a71c33537	Use [[]] instead of [] in Bash scripts	2018-12-30 17:32:17 +00:00
Daniel Quinn	cf36c8467e	Update the Pipfile lock	2018-12-30 17:32:04 +00:00
Daniel Quinn	dafa6a4c71	Use pipenv in the Docker build	2018-12-30 17:31:26 +00:00
Daniel Quinn	a3c5ec834d	Codify spaces in .yml files	2018-12-30 17:31:13 +00:00
Daniel Quinn	be57dbe4c8	Merge pull request #462 from colinfrei/patch-1 Type in docker-compose command	2018-12-30 14:46:17 +00:00
Daniel Quinn	4d50c7e105	Add Python 3.7 to test suite	2018-12-30 14:09:32 +00:00
Daniel Quinn	27af2603f5	Use modern languages for sample test files	2018-12-30 14:09:17 +00:00
Daniel Quinn	ff5b34179a	Bump version	2018-12-30 12:44:26 +00:00
Daniel Quinn	0334617287	Update language READMEs with new logo & new language navigation	2018-12-30 12:44:13 +00:00
Daniel Quinn	f8b43fa74b	Add the new logo	2018-12-30 12:40:29 +00:00
Daniel Quinn	1ff06d0dd9	Fix .gitignore exclusion that hid the logos	2018-12-30 12:39:07 +00:00
Daniel Quinn	4ad6813d11	Add the new logo I bought this logo from the excellent logojoy.com site, and am including the source files here to be covered under the project license (GPL).	2018-12-30 12:20:08 +00:00
Colin Frei	cbc5f0603f	Set consume directory for webserver too Fixes #289 The HTTP POST endpoint saves the file in the consume directory. This needs to be shared between the two services so that the file is actually consumed.	2018-12-30 07:43:49 +01:00
Colin Frei	0d21bdeffa	remove unnecessary character	2018-12-28 17:43:36 +01:00
Colin Frei	b1f9b18b8c	Type in docker-compose command	2018-12-26 16:43:22 +01:00
David Martin	4d13521f36	Reference PAPERLESS_OCR_ALWAYS in example config file. This setting was introduced when support for retrieving the text layer from documents was added. Having it in the example config makes it more clear that it exists.	2018-12-16 18:11:39 +11:00
Daniel Quinn	7b4785bdb9	Merge pull request #450 from erikarvstedt/fix-parser-test Fix date test sample image	2018-12-11 11:43:14 +00:00
jonaswinkler	baf89cad8e	Update 0022_auto_20181007_1420.py copy paste error.	2018-12-10 18:38:19 +01:00
Daniel Quinn	3c2a1a8c13	Merge pull request #451 from speshak/remote_pg Add DBHOST & DBPORT parameters to settings	2018-12-06 23:38:50 +00:00
Daniel Quinn	1c7047bbb8	Move ipython out of the base dependencies	2018-12-06 23:28:33 +00:00
Scott Peshak	96dafe8c43	Add psycopg2 dependencies to Dockerfile	2018-12-02 16:14:58 -06:00
Scott Peshak	d6896daece	Add psycopg2 to requirements.txt	2018-12-02 16:14:58 -06:00
Scott Peshak	d12f0642f2	Add DBHOST & DBPORT parameters Resolves #445	2018-12-02 15:20:29 -06:00
Erik Arvstedt	a19f0ef97e	Fix date test sample image The previous version of `tests_date_3.png` had too much spacing between the `0` and the `8` glyphs, which resulted in the year getting parsed as `200 8` in Tesseract 3.05.00 (+ tessdata 3.04.00). This caused the date parsing test to fail.	2018-12-02 15:10:21 +01:00
Erik Arvstedt	ec7125b6bb	Fix travis ocr languages The tests need German language support for Tesseract	2018-12-02 15:10:20 +01:00
Daniel Quinn	e3a616ebc3	Version bump	2018-12-01 17:12:34 +00:00
Daniel Quinn	f898ec792f	Added notes for 2.6.0	2018-12-01 17:11:58 +00:00
Daniel Quinn	f45b6762f2	Merge branch 'jat255-ENH_filename_date_parsing'	2018-12-01 17:10:26 +00:00
Daniel Quinn	d544f269e0	Conform everything to the coding standards https://paperless.readthedocs.io/en/latest/contributing.html#additional-style-guides	2018-12-01 17:09:12 +00:00
Daniel Quinn	650db75c2b	Merge branch 'ENH_filename_date_parsing' of https://github.com/jat255/paperless into jat255-ENH_filename_date_parsing	2018-12-01 16:57:16 +00:00
Daniel Quinn	7dbb77e57b	Add a .editorconfig	2018-12-01 16:56:58 +00:00
Daniel Quinn	f1b3312bcb	Merge branch 'jat255-ENH_tag_colour_override'	2018-12-01 16:22:38 +00:00
Daniel Quinn	ea05ab2b06	Restructure colour.js to work withing a .ready()	2018-12-01 16:22:19 +00:00
Daniel Quinn	4f4c515629	Add colours to the tags pages	2018-12-01 16:21:58 +00:00
Daniel Quinn	c1f926a40c	Merge branch 'ENH_tag_colour_override' of https://github.com/jat255/paperless into jat255-ENH_tag_colour_override	2018-12-01 15:56:37 +00:00
Daniel Quinn	c1d18c1e83	Fix language guesses in tests It turns out that the Lorem ipsum text in the sample files was confuing the language guesser, causing it to think the file was in Catalan and not English or German.	2018-12-01 15:55:59 +00:00
Joshua Taillon	ba452e0524	move tag colour override to static folder	2018-12-01 09:14:44 -05:00
Daniel Quinn	c5488dcb98	Merge pull request #441 from jat255/patch-1 Update gunicorn commands	2018-11-30 19:45:01 +00:00
Joshua Taillon	d6eefbccee	encapsulate in if blocks so no errors on non-tag pages; added support for edit tags page	2018-11-17 21:34:11 -05:00
Joshua Taillon	a813288aaf	add example override for tag colour display	2018-11-17 09:18:36 -05:00
Joshua Taillon	63e2fbe0c9	Update paperless-webserver.service Update `gunicorn` command to use `--pythonpath`	2018-11-16 09:21:07 -05:00
Joshua Taillon	597a7bb391	Update setup.rst The provided `gunicorn` command did not work for me, failing with the following error: ``` ModuleNotFoundError: No module named '/home/paperless/paperless/src/paperless' ``` The solution was to provide only `paperless.wsgi` as the argument to `gunicorn`, and provide a flag for `--pythonpath`. After changing it to this, the server started up fine.	2018-11-16 09:20:08 -05:00
Joshua Taillon	730daa3d6d	Merge branch 'master' of github.com:danielquinn/paperless into ENH_filename_date_parsing	2018-11-15 23:17:59 -05:00
Joshua Taillon	c225281f95	Change the massive regex to match boundaries with _ or - characters (not just word breaks); add line for year first formats like YYYY-MM-DD	2018-11-15 20:38:53 -05:00
Joshua Taillon	e1d8744c66	Add option for parsing of date from filename (and associated tests)	2018-11-15 20:32:15 -05:00
Joshua Taillon	4409f65840	Update date tests to be more explicit with settings and allow tests to pass if using a timezone other than UTC	2018-11-15 20:30:23 -05:00
Daniel Quinn	c83dc666a4	I'm going to have to ditch requirements.txt if it can't be reliably generated	2018-11-03 13:42:03 +00:00
Daniel Quinn	9ab50ed09d	Fix requiremnts.txt	2018-11-03 13:29:22 +00:00
Daniel Quinn	e0acb4a40b	Update dependencies This includes a security update for requests.	2018-11-03 12:49:35 +00:00
Daniel Quinn	eca6250c1b	Fix the correspondent filters #423	2018-11-03 11:06:55 +00:00
Daniel Quinn	33abec0663	Code cleanup	2018-11-03 11:05:22 +00:00
Daniel Quinn	d825667c9b	Allow an infinite number of logs to be deleted.	2018-11-03 10:25:51 +00:00
Daniel Quinn	84511f8418	Merge pull request #432 from deanpcmad/patch-1 Added missing ; to nginx config	2018-10-31 13:12:32 +00:00
Dean Perry	81e488b90d	added missing ; to nginx config	2018-10-31 12:39:48 +00:00
Daniel Quinn	bff28113df	Merge pull request #425 from mrwacky42/remove_vagrant Remove Vagrant docs	2018-10-14 09:57:41 +01:00
Sharif Nassar	0b377a76d0	Remove Vagrant docs * Vagrant does not seem to have any libvirt boxes for Ubuntu any more. * Vagrant 2 was released a year ago, but vagrant-libvirt only claims to support up to Vagrant 1.8.	2018-10-13 11:31:53 -07:00
Daniel Quinn	ec1d5c80ff	Add pip install to update process	2018-10-08 10:38:53 +01:00
Daniel Quinn	bd95804fbf	Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_handling Clarify forgiving ocr handling	2018-10-08 09:35:57 +00:00
Daniel Quinn	8dc355a66f	Merge pull request #422 from erikarvstedt/inotify-linux requirements.txt: bring back Linux-only restriction for inotify-simple	2018-10-08 09:34:47 +00:00
Daniel Quinn	fbb389553c	Merge pull request #419 from ddddavidmartin/let_unpaper_overwrite_temp_files Let unpaper overwrite temporary files.	2018-10-08 09:32:30 +00:00
Erik Arvstedt	f8cfbb44d2	requirements.txt: bring back Linux-only restriction for inotify-simple Fixes #418	2018-10-08 11:00:34 +02:00
David Martin	818780a191	Add PAPERLESS_FORGIVING_OCR option to example config. It helps having it in the example config as that makes it more clear that it exists.	2018-10-08 19:38:38 +11:00
David Martin	b350ec48b7	Mention FORGIVING_OCR config option when language detection fails. It is not obvious that the PAPERLESS_FORGIVING_OCR allows to let document consumption happen even if no language can be detected. Mentioning it in the actual error message in the log seems like the best way to make it clear.	2018-10-08 19:37:05 +11:00
David Martin	f948ee11be	Let unpaper overwrite temporary files. I'm not sure what the circumstances are, but it looks like unpaper can attempt to write a temporary file that already exists [0]. This then fails the consumption. As per daedadu's comment simply letting unpaper overwrite files fixes this. [0] unpaper: error: output file '/tmp/paperless/paperless-pjkrcr4l/convert-0000.unpaper.pnm' already present. See https://web.archive.org/web/20181008081515/https://github.com/danielquinn/paperless/issues/406#issue-360651630	2018-10-08 19:12:11 +11:00
Daniel Quinn	2ef2bf873e	Version bump: 2.5.0	2018-10-07 16:30:36 +01:00
Daniel Quinn	0bb7d27269	pep8	2018-10-07 16:30:02 +01:00
Daniel Quinn	ce5e8b2658	Rework user hack for "login-free" sessions #394	2018-10-07 16:27:41 +01:00
Daniel Quinn	3f572afb8b	Add a little more read-only info for documents	2018-10-07 16:26:05 +01:00
Daniel Quinn	5c3cb1e4ab	Rework how slugs are generated/referenced #393	2018-10-07 16:25:51 +01:00
Daniel Quinn	c7f4bfe4f3	Add migration that should have come in some time ago	2018-10-07 16:23:03 +01:00
Daniel Quinn	65d6599964	Fix formatting	2018-10-07 16:22:52 +01:00
Daniel Quinn	5d32e89c44	Wrap each document consumption in a transaction	2018-10-07 14:56:56 +01:00
Daniel Quinn	750ab5bf85	Use optipng to optimise document thumbnails	2018-10-07 14:56:38 +01:00
Daniel Quinn	2a3f766b93	Consolidate get_date onto the DocumentParser parent class	2018-10-07 14:56:02 +01:00
Daniel Quinn	14bb52b6a4	Wrap document consumption in a transaction #262	2018-10-07 13:12:22 +01:00
Daniel Quinn	b5176d207e	Hopefully fix Travis	2018-10-01 20:40:43 +01:00
Daniel Quinn	e4044d0df9	Update version number & changelog	2018-10-01 20:40:32 +01:00
Daniel Quinn	bacdd51fd7	Merge pull request #413 from euri10/master Fix issue where tesseract langages weren't installed properly	2018-10-01 19:40:04 +00:00
Daniel Quinn	8010d72f18	Tweak the date guesser to not allow dates prior to 1900 (#414 )	2018-10-01 20:03:47 +01:00
euri10	9dd76f1b87	Fix issue where tesseract langages weren't installed properly	2018-09-24 13:30:10 +02:00
Daniel Quinn	a511d34d69	Fix implementation of django-filter	2018-09-23 15:47:14 +01:00
Daniel Quinn	35c5b8e263	Add note about tweaks to psql connections	2018-09-23 14:05:35 +01:00
Daniel Quinn	8726b0316c	Add note about import/export process changes	2018-09-23 14:03:38 +01:00
Daniel Quinn	acf6caca2f	Add a tox test for Python 3.7	2018-09-23 14:01:35 +01:00
Daniel Quinn	b20d7eca03	Tweak settings.py to allow for TRUST-based PostgreSQL auth	2018-09-23 14:01:15 +01:00
Daniel Quinn	d17497fd5b	Move the unique key on checksums to migration 15 This shouldn't affect anyone, since this migration is pretty old, but it allows people using PostgreSQL to actually run Paperless.	2018-09-23 14:00:27 +01:00
Daniel Quinn	090565d84c	Tweak the import/export system to handle encryption choices better Now when you export a document, the `storage_type` value is always `unencrypted` (since that's what it is when it's exported anyway), and the flag is set by the importing script instead, based on the existence of a `PAPERLESS_PASSPHRASE` environment variable, indicating that encryption is enabled.	2018-09-23 13:58:40 +01:00
Daniel Quinn	79e1e60238	Fix typo	2018-09-23 12:59:56 +01:00
Daniel Quinn	ff111f1bde	Update changelog for new stuff from #405	2018-09-23 12:54:49 +01:00
Daniel Quinn	6db788a550	Add docs for indentation & spacing	2018-09-23 12:54:39 +01:00
Daniel Quinn	f4a09013d7	Merge branch 'jonaswinkler-new-features'	2018-09-23 12:42:02 +01:00
Daniel Quinn	4130dd3465	Conform code to standards	2018-09-23 12:41:28 +01:00
Daniel Quinn	117d7dad04	Improve the unknown language error message	2018-09-23 12:41:14 +01:00
Daniel Quinn	b420281be0	Remove numpy, scikit-learn, and scipy as they weren't being used	2018-09-23 12:40:46 +01:00
Daniel Quinn	17f8953a49	Merge branch 'new-features' of git://github.com/jonaswinkler/paperless into jonaswinkler-new-features	2018-09-23 11:57:44 +01:00
Daniel Quinn	9682a6f6fc	Add a contribution guide	2018-09-22 16:22:03 +01:00
Daniel Quinn	425bbe34ef	Make the names of the sample files visible	2018-09-22 16:17:18 +01:00
Daniel Quinn	60ee08adec	Reduce duplication in docker-compose.env.example See #404 for more info on where this came from.	2018-09-22 15:27:22 +01:00
Daniel Quinn	b4b4d8f25e	Add an example for pdf2pdfocr with the pre-consume hook	2018-09-22 14:00:00 +01:00
Daniel Quinn	cce6b43062	Clean up release notes	2018-09-22 13:59:50 +01:00
Jonas Winkler	fb6f2e07c9	Added a bunch of new features: - Debug mode is now configurable in the configuration file. This way, we don't have to edit versioned files to disable it on production systems. - Recent correspondents filter (enable in configuration file) - Document actions: Edit tags and correspondents on multiple documents at once - Replaced month list filter with date drilldown - Sortable document count columns on Tag and Correspondent admin - Last correspondence column on Correspondent admin - Save and edit next functionality for document editing	2018-09-13 15:19:25 +02:00