Fix implementation of django-filter

Add note about tweaks to psql connections
Add note about import/export process changes
2025-12-14 23:21:18 +00:00 · 2018-09-23 15:47:14 +01:00 · 2018-09-23 14:05:35 +01:00 · 2018-09-23 14:03:38 +01:00 · 2018-09-23 14:01:35 +01:00 · 2018-09-23 14:01:15 +01:00
35 changed files with 1030 additions and 206 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,5 @@ docker-compose.env
 scripts/import-for-development
 scripts/nuke

+# Static files collected by the collectstatic command
+static/
--- a/2
+++ b/2
@@ -36,3 +36,5 @@ pytest-xdist = "*"
 [dev-packages]
 ipython = "*"
 sphinx = "*"
+tox = "*"
+
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "e20c2294bcafd346ee57901df94a515a12976ed192dc37df848b39b56bdd1f4b"
+            "sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7"
        },
        "pipfile-spec": 6,
        "requires": {},
@@ -19,7 +19,7 @@
                "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
                "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5"
        },
        "atomicwrites": {
@@ -27,7 +27,7 @@
                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.2.1"
        },
        "attrs": {
@@ -85,7 +85,7 @@
                "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.0.*' and python_version != '3.2.*' and python_version < '4' and python_version != '3.1.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
            "version": "==4.5.1"
        },
        "coveralls": {
@@ -163,7 +163,7 @@
                "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
                "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5.0"
        },
        "factory-boy": {
@@ -179,6 +179,7 @@
                "sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
                "sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
            ],
+            "markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'",
            "version": "==0.9.0"
        },
        "filemagic": {
@@ -282,7 +283,7 @@
                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.7.1"
        },
        "py": {
@@ -290,7 +291,7 @@
                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.6.0"
        },
        "pycodestyle": {
@@ -303,26 +304,26 @@
        },
        "pyocr": {
            "hashes": [
-                "sha256:bdc4d43bf9b63c2a9a4b2c9a1a623a0e63c8e6600eede5dbe866b31f3a5f2207"
+                "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
            ],
            "index": "pypi",
-            "version": "==0.5.2"
+            "version": "==0.5.3"
        },
        "pytest": {
            "hashes": [
-                "sha256:2d7c49e931316cc7d1638a3e5f54f5d7b4e5225972b3c9838f3584788d27f349",
-                "sha256:ad0c7db7b5d4081631e0155f5c61b80ad76ce148551aaafe3a718d65a7508b18"
+                "sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823",
+                "sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d"
            ],
            "index": "pypi",
-            "version": "==3.7.4"
+            "version": "==3.8.0"
        },
        "pytest-cov": {
            "hashes": [
-                "sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d",
-                "sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec"
+                "sha256:513c425e931a0344944f84ea47f3956be0e416d95acbd897a44970c8d926d5d7",
+                "sha256:e360f048b7dae3f2f2a9a4d067b2dd6b6a015d384d1577c994a43f3f7cbad762"
            ],
            "index": "pypi",
-            "version": "==2.5.1"
+            "version": "==2.6.0"
        },
        "pytest-django": {
            "hashes": [
@@ -344,6 +345,7 @@
                "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
                "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
            ],
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.2"
        },
        "pytest-sugar": {
@@ -457,7 +459,7 @@
                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
            "version": "==1.23"
        }
    },
@@ -521,10 +523,11 @@
        },
        "imagesize": {
            "hashes": [
-                "sha256:3620cc0cadba3f7475f9940d22431fc4d407269f1be59ec9b8edcca26440cf18",
-                "sha256:5b326e4678b6925158ccc66a9fa3122b6106d7c876ee32d7de6ce59385b96315"
+                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
+                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
            ],
-            "version": "==1.0.0"
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
+            "version": "==1.1.0"
        },
        "ipython": {
            "hashes": [
@@ -590,6 +593,14 @@
            ],
            "version": "==0.7.4"
        },
+        "pluggy": {
+            "hashes": [
+                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
+                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
+            ],
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
+            "version": "==0.7.1"
+        },
        "prompt-toolkit": {
            "hashes": [
                "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
@@ -605,6 +616,14 @@
            ],
            "version": "==0.6.0"
        },
+        "py": {
+            "hashes": [
+                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
+                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
+            ],
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
+            "version": "==1.6.0"
+        },
        "pygments": {
            "hashes": [
                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
@@ -656,20 +675,28 @@
        },
        "sphinx": {
            "hashes": [
-                "sha256:a07050845cc9a2f4026a6035cc8ed795a5ce7be6528bbc82032385c10807dfe7",
-                "sha256:d719de667218d763e8fd144b7fcfeefd8d434a6201f76bf9f0f0c1fa6f47fcdb"
+                "sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4",
+                "sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86"
            ],
            "index": "pypi",
-            "version": "==1.7.8"
+            "version": "==1.7.9"
        },
        "sphinxcontrib-websupport": {
            "hashes": [
                "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
                "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
            ],
-            "markers": "python_version != '3.3.*' and python_version >= '2.7' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.1.0"
        },
+        "tox": {
+            "hashes": [
+                "sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7",
+                "sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600"
+            ],
+            "index": "pypi",
+            "version": "==3.2.1"
+        },
        "traitlets": {
            "hashes": [
                "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
@@ -682,9 +709,17 @@
                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.3.*' and python_version < '4' and python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*'",
+            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
            "version": "==1.23"
        },
+        "virtualenv": {
+            "hashes": [
+                "sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669",
+                "sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752"
+            ],
+            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "version": "==16.0.0"
+        },
        "wcwidth": {
            "hashes": [
                "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
--- a/docker-compose.env.example
+++ b/docker-compose.env.example
@@ -1,38 +1,22 @@
 # Environment variables to set for Paperless
-# Commented out variables will be replaced by a default within Paperless.
+# Commented out variables will be replaced with a default within Paperless.
+#
+# In addition to what you see here, you can also define any values you find in
+# paperless.conf.example here.  Values like:
+#
+# * PAPERLESS_PASSPHRASE
+# * PAPERLESS_CONSUMPTION_DIR
+# * PAPERLESS_CONSUME_MAIL_HOST
+#
+# ...are all explained in that file but can be defined here, since the Docker
+# installation doesn't make use of paperless.conf.

-# Passphrase Paperless uses to encrypt and decrypt your documents, if you want
-# encryption at all.
-# PAPERLESS_PASSPHRASE=CHANGE_ME

-# The amount of threads to use for text recognition
-# PAPERLESS_OCR_THREADS=4
-
-# Additional languages to install for text recognition
+# Additional languages to install for text recognition.  Note that this is
+# different from PAPERLESS_OCR_LANGUAGE (default=eng), which defines the
+# default language used when guessing the language from the OCR output.
 # PAPERLESS_OCR_LANGUAGES=deu ita

 # You can change the default user and group id to a custom one
 # USERMAP_UID=1000
 # USERMAP_GID=1000
-
-###############################################################################
-####                         Mail Consumption                              ####
-###############################################################################
-
-# These values are required if you want paperless to check a particular email
-# box every 10 minutes and attempt to consume documents from there.  If you
-# don't define a HOST, mail checking will just be disabled.
-# Don't use quotes after = or it will crash your docker
-# PAPERLESS_CONSUME_MAIL_HOST=
-# PAPERLESS_CONSUME_MAIL_PORT=
-# PAPERLESS_CONSUME_MAIL_USER=
-# PAPERLESS_CONSUME_MAIL_PASS=
-
-# Override the default IMAP inbox here. If it's not set, Paperless defaults to
-# INBOX.
-# PAPERLESS_CONSUME_MAIL_INBOX=INBOX
-
-# Any email sent to the target account that does not contain this text will be
-# ignored.  Mail checking won't work without this.
-# PAPERLESS_EMAIL_SECRET=
-
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,16 +1,68 @@
 Changelog
 #########

+2.4.0
+=====
+
+* A new set of actions are now available thanks to `jonaswinkler`_'s very first
+  pull request!  You can now do nifty things like tag documents in bulk, or set
+  correspondents in bulk.  `#405`_
+* The import/export system is now a little smarter.  By default, documents are
+  tagged as ``unencrypted``, since exports are by their nature unencrypted.
+  It's now in the import step that we decide the storage type.  This allows you
+  to export from an encrypted system and import into an unencrypted one, or
+  vice-versa.
+* The migration history has been slightly modified to accomodate PostgreSQL
+  users.  Additionally, you can now tell paperless to use PostgreSQL simply by
+  declaring ``PAPERLESS_DBUSER`` in your environment.  This will attempt to
+  connect to your Postgres database without a password unless you also set
+  ``PAPERLESS_DBPASS``.
+* A bug was found in the REST API filter system that was the result of an
+  update of django-filter some time ago.  This has now been patched `#412`_.
+  Thanks to `thepill`_ for spotting it!
+
+
+2.3.0
+=====
+
+* Support for consuming plain text & markdown documents was added by
+  `Joshua Taillon`_!  This was a long-requested feature, and it's addition is
+  likely to be greatly appreciated by the community: `#395`_  Thanks also to
+  `David Martin`_ for his assistance on the issue.
+* `dubit0`_ found & fixed a bug that prevented management commands from running
+  before we had an operational database: `#396`_
+* Joshua also added a simple update to the thumbnail generation process to
+  improve performance: `#399`_
+* As his last bit of effort on this release, Joshua also added some code to
+  allow you to view the documents inline rather than download them as an
+  attachment. `#400`_
+* Finally, `ahyear`_ found a slip in the Docker documentation and patched it.
+  `#401`_
+
+
+2.2.1
+=====
+
+* `Kyle Lucy`_ reported a bug quickly after the release of 2.2.0 where we broke
+  the ``DISABLE_LOGIN`` feature: `#392`_.
+
+
 2.2.0
 =====

-* Thanks to `dadosch`_ and `Wolfgang Mader`_, this is the first version of
-  Paperless that supports Django 2.0!  As a result of their hard work, you can
-  now also run Paperless on Python 3.7 as well: `#386`_ & `#390`_.
-* `Stéphane Brunner`_ added a few lines of code that made tagging interface a lot
-  easier on those of us with lots of different tags: `#391`_.
+* Thanks to `dadosch`_, `Wolfgang Mader`_, and `Tim Brooks`_ this is the first
+  version of Paperless that supports Django 2.0!  As a result of their hard
+  work, you can now also run Paperless on Python 3.7 as well: `#386`_ &
+  `#390`_.
+* `Stéphane Brunner`_ added a few lines of code that made tagging interface a
+  lot easier on those of us with lots of different tags: `#391`_.
 * `Kilian Koeltzsch`_ noticed a bug in how we capture & automatically create
  tags, so that's fixed now too: `#384`_.
+* `erikarvstedt`_ tweaked the behaviour of the test suite to be better behaved
+  for packaging environments: `#383`_.
+* `Lukasz Soluch`_ added CORS support to make building a new Javascript-based
+  front-end cleaner & easier: `#387`_.
+

 2.1.0
 =====
@@ -464,8 +516,15 @@ bulk of the work on this big change.
 .. _Mark McFate: https://github.com/SummittDweller
 .. _dadosch: https://github.com/dadosch
 .. _Wolfgang Mader: https://github.com/wmader
+.. _Tim Brooks: https://github.com/brookst
 .. _Stéphane Brunner: https://github.com/sbrunner
 .. _Kilian Koeltzsch: https://github.com/kiliankoe
+.. _Lukasz Soluch: https://github.com/LukaszSolo
+.. _Joshua Taillon: https://github.com/jat255
+.. _dubit0: https://github.com/dubit0
+.. _ahyear: https://github.com/ahyear
+.. _jonaswinkler: https://github.com/jonaswinkler
+.. _thepill: https://github.com/thepill

 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -540,10 +599,20 @@ bulk of the work on this big change.
 .. _#374: https://github.com/danielquinn/paperless/pull/374
 .. _#375: https://github.com/danielquinn/paperless/pull/375
 .. _#376: https://github.com/danielquinn/paperless/pull/376
+.. _#383: https://github.com/danielquinn/paperless/pull/383
 .. _#384: https://github.com/danielquinn/paperless/issues/384
 .. _#386: https://github.com/danielquinn/paperless/issues/386
+.. _#387: https://github.com/danielquinn/paperless/pull/387
 .. _#391: https://github.com/danielquinn/paperless/pull/391
 .. _#390: https://github.com/danielquinn/paperless/pull/390
+.. _#392: https://github.com/danielquinn/paperless/issues/392
+.. _#395: https://github.com/danielquinn/paperless/pull/395
+.. _#396: https://github.com/danielquinn/paperless/pull/396
+.. _#399: https://github.com/danielquinn/paperless/pull/399
+.. _#400: https://github.com/danielquinn/paperless/pull/400
+.. _#401: https://github.com/danielquinn/paperless/pull/401
+.. _#405: https://github.com/danielquinn/paperless/pull/405
+.. _#412: https://github.com/danielquinn/paperless/issues/412

 .. _pipenv: https://docs.pipenv.org/
 .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
--- a/docs/consumption.rst
+++ b/docs/consumption.rst
@@ -76,6 +76,31 @@ Pre-consumption script

 * Document file name

+A simple but common example for this would be creating a simple script like
+this:
+
+``/usr/local/bin/ocr-pdf``
+
+.. code:: bash
+
+    #!/usr/bin/env bash
+    pdf2pdfocr.py -i ${1}
+
+``/etc/paperless.conf``
+
+.. code:: bash
+
+    ...
+    PAPERLESS_PRE_CONSUME_SCRIPT="/usr/local/bin/ocr-pdf"
+    ...
+
+This will pass the path to the document about to be consumed to ``/usr/local/bin/ocr-pdf``,
+which will in turn call `pdf2pdfocr.py`_ on your document, which will then
+overwrite the file with an OCR'd version of the file and exit.  At which point,
+the consumption process will begin with the newly modified file.
+
+.. _pdf2pdfocr.py: https://github.com/LeoFCardoso/pdf2pdfocr
+

 .. _consumption-director-hook-variables-post:

--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -0,0 +1,141 @@
+.. _contributing:
+
+Contributing to Paperless
+#########################
+
+Maybe you've been using Paperless for a while and want to add a feature or two,
+or maybe you've come across a bug that you have some ideas how to solve.  The
+beauty of Free software is that you can see what's wrong and help to get it
+fixed for everyone!
+
+
+How to Get Your Changes Rolled Into Paperless
+=============================================
+
+If you've found a bug, but don't know how to fix it, you can always post an
+issue on `GitHub`_ in the hopes that someone will have the time to fix it for
+you.  If however you're the one with the time, pull requests are always
+welcome, you just have to make sure that your code conforms to a few standards:
+
+Pep8
+----
+
+It's the standard for all Python development, so it's `very well documented`_.
+The short version is:
+
+* Lines should wrap at 79 characters
+* Use ``snake_case`` for variables, ``CamelCase`` for classes, and ``ALL_CAPS``
+  for constants.
+* Space out your operators: ``stuff + 7`` instead of ``stuff+7``
+* Two empty lines between classes, and functions, but 1 empty line between
+  class methods.
+
+There's more to it than that, but if you follow those, you'll probably be
+alright.  When you submit your pull request, there's a pep8 checker that'll
+look at your code to see if anything is off.  If it finds anything, it'll
+complain at you until you fix it.
+
+
+Additional Style Guides
+-----------------------
+
+Where pep8 is ambiguous, I've tried to be a little more specific.  These rules
+aren't hard-and-fast, but if you can conform to them, I'll appreciate it and
+spend less time trying to conform your PR before merging:
+
+
+Function calls
+..............
+
+If you're calling a function and that necessitates more than one line of code,
+please format it like this:
+
+.. code:: python
+
+    my_function(
+        argument1,
+        kwarg1="x",
+        kwarg2="y"
+        another_really_long_kwarg="some big value"
+        a_kwarg_calling_another_long_function=another_function(
+            another_arg,
+            another_kwarg="kwarg!"
+        )
+    )
+
+This is all in the interest of code uniformity rather than anything else.  If
+we stick to a style, everything is understandable in the same way.
+
+
+Quoting Strings
+...............
+
+pep8 is a little too open-minded on this for my liking.  Python strings should
+be quoted with double quotes (``"``) except in cases where the resulting string
+would require too much escaping of a double quote, in which case, a single
+quoted, or triple-quoted string will do:
+
+.. code:: python
+
+    my_string = "This is my string"
+    problematic_string = 'This is a "string" with "quotes" in it'
+
+In HTML templates, please use double-quotes for tag attributes, and single
+quotes for arguments passed to Django tempalte tags:
+
+.. code:: html
+
+    <div class="stuff">
+        <a href="{% url 'some-url-name' pk='w00t' %}">link this</a>
+    </div>
+
+This is to keep linters happy they look at an HTML file and see an attribute
+closing the ``"`` before it should have been.
+
+--
+
+That's all there is in terms of guidelines, so I hope it's not too daunting.
+
+
+Indentation & Spacing
+.....................
+
+When it comes to indentation:
+
+* For Python, the rule is: follow pep8 and use 4 spaces.
+* For Javascript, CSS, and HTML, please use 1 tab.
+
+Additionally, Django templates making use of block elements like ``{% if %}``,
+``{% for %}``, and ``{% block %}`` etc. should be indented:
+
+Good:
+
+.. code:: html
+
+    {% block stuff %}
+    	<h1>This is the stuff</h1>
+    {% endblock %}
+
+Bad:
+
+.. code:: html
+
+    {% block stuff %}
+    <h1>This is the stuff</h1>
+    {% endblock %}
+
+
+The Code of Conduct
+===================
+
+Paperless has a `code of conduct`_.  It's a lot like the other ones you see out
+there, with a few small changes, but basically it boils down to:
+
+> Don't be an ass, or you might get banned.
+
+I'm proud to say that the CoC has never had to be enforced because everyone has
+been awesome, friendly, and professional.
+
+.. _GitHub: https://github.com/danielquinn/paperless/issues
+.. _very well documented: https://www.python.org/dev/peps/pep-0008/
+.. _code of conduct: https://github.com/danielquinn/paperless/blob/master/CODE_OF_CONDUCT.md
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -43,5 +43,6 @@ Contents
   customising
   extending
   troubleshooting
+   contributing
   scanners
   changelog
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -101,6 +101,7 @@ is similar:
    $ cd /path/to/project
    $ git pull
    $ docker build -t paperless .
+    $ docker-compose run --rm comsumer migrate
    $ docker-compose up -d

 If ``git pull`` doesn't report any changes, there is no need to continue with
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -59,6 +59,11 @@ PAPERLESS_EMAIL_SECRET=""
 ####                              Security                                 ####
 ###############################################################################

+# Controls whether django's debug mode is enabled. Disable this on production
+# systems. Debug mode is enabled by default.
+PAPERLESS_DEBUG="false"
+
+
 # Paperless can be instructed to attempt to encrypt your PDF files with GPG
 # using the PAPERLESS_PASSPHRASE specified below.  If however you're not
 # concerned about encrypting these files (for example if you have disk
@@ -89,9 +94,10 @@ PAPERLESS_EMAIL_SECRET=""
 # as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
 #PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"

-# If you decide to use Paperless APIs in an ajax calls, you need to add your
-# servers to the allowed hosts that can do CORS calls. By default Paperless allows 
-# calls from localhost:8080. The same rules as above how the list should look like.
+# If you decide to use the Paperless API in an ajax call, you need to add your
+# servers to the list of allowed hosts that can do CORS calls. By default
+# Paperless allows calls from localhost:8080, but you'd like to change that,
+# you can set this value to a comma-separated list.
 #PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"

 # To host paperless under a subpath url like example.com/paperless you set
@@ -116,6 +122,10 @@ PAPERLESS_EMAIL_SECRET=""
 # http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
 #PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"

+# By default, when clicking on a document within the web interface, the
+# browser will prompt the user to save the document to disk. By setting this to
+# "true", the document will instead be opened in the browser, if possible.
+#PAPERLESS_INLINE_DOC="false"

 #
 # The following values use sensible defaults for modern systems, but if you're
@@ -198,3 +208,8 @@ PAPERLESS_EMAIL_SECRET=""
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
 #PAPERLESS_LIST_PER_PAGE=100
+
+
+# The number of years for which a correspondent will be included in the recent
+# correspondents filter.
+#PAPERLESS_RECENT_CORRESPONDENT_YEARS=1
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 -i https://pypi.python.org/simple
-apipkg==1.5; python_version != '3.1.*'
-atomicwrites==1.2.1; python_version != '3.1.*'
+apipkg==1.5; python_version != '3.3.*'
+atomicwrites==1.2.1; python_version != '3.3.*'
 attrs==18.2.0
 certifi==2018.8.24
 chardet==3.0.4
-coverage==4.5.1; python_version != '3.1.*'
+coverage==4.5.1; python_version < '4'
 coveralls==1.5.0
 dateparser==0.7.0
 django-cors-headers==2.4.0
@@ -14,9 +14,9 @@ django-filter==2.0.0
 django==2.0.8
 djangorestframework==3.8.2
 docopt==0.6.2
-execnet==1.5.0; python_version != '3.1.*'
+execnet==1.5.0; python_version != '3.3.*'
 factory-boy==2.11.1
-faker==0.9.0
+faker==0.9.0; python_version >= '2.7'
 filemagic==1.6
 fuzzywuzzy==0.15.0
 gunicorn==19.9.0
@@ -26,17 +26,17 @@ langdetect==1.0.7
 more-itertools==4.3.0
 pdftotext==2.1.0
 pillow==5.2.0
-pluggy==0.7.1; python_version != '3.1.*'
-py==1.6.0; python_version != '3.1.*'
+pluggy==0.7.1; python_version != '3.3.*'
+py==1.6.0; python_version != '3.3.*'
 pycodestyle==2.4.0
-pyocr==0.5.2
-pytest-cov==2.5.1
+pyocr==0.5.3
+pytest-cov==2.6.0
 pytest-django==3.4.2
 pytest-env==0.6.2
-pytest-forked==0.2
+pytest-forked==0.2; python_version != '3.3.*'
 pytest-sugar==0.9.1
 pytest-xdist==1.23.0
-pytest==3.7.4
+pytest==3.8.0
 python-dateutil==2.7.3
 python-dotenv==0.9.1
 python-gnupg==0.4.3
@@ -48,4 +48,4 @@ six==1.11.0
 termcolor==1.1.0
 text-unidecode==1.2
 tzlocal==1.5.1
-urllib3==1.23; python_version != '3.0.*'
+urllib3==1.23; python_version != '3.3.*'
--- a/src/documents/actions.py
+++ b/src/documents/actions.py
@@ -0,0 +1,146 @@
+from django.contrib import messages
+from django.contrib.admin import helpers
+from django.contrib.admin.utils import model_ngettext
+from django.core.exceptions import PermissionDenied
+from django.template.response import TemplateResponse
+
+from documents.models import Correspondent, Tag
+
+
+def select_action(
+        modeladmin, request, queryset, title, action, modelclass,
+        success_message="", document_action=None, queryset_action=None):
+
+    opts = modeladmin.model._meta
+    app_label = opts.app_label
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    if request.POST.get('post'):
+        n = queryset.count()
+        selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
+        if n:
+            for document in queryset:
+                if document_action:
+                    document_action(document, selected_object)
+                document_display = str(document)
+                modeladmin.log_change(request, document, document_display)
+            if queryset_action:
+                queryset_action(queryset, selected_object)
+
+            modeladmin.message_user(request, success_message % {
+                "selected_object": selected_object.name,
+                "count": n,
+                "items": model_ngettext(modeladmin.opts, n)
+            }, messages.SUCCESS)
+
+        # Return None to display the change list page again.
+        return None
+
+    context = dict(
+        modeladmin.admin_site.each_context(request),
+        title=title,
+        queryset=queryset,
+        opts=opts,
+        action_checkbox_name=helpers.ACTION_CHECKBOX_NAME,
+        media=modeladmin.media,
+        action=action,
+        objects=modelclass.objects.all(),
+        itemname=model_ngettext(modelclass, 1)
+    )
+
+    request.current_app = modeladmin.admin_site.name
+
+    return TemplateResponse(
+        request,
+        "admin/{}/{}/select_object.html".format(app_label, opts.model_name),
+        context
+    )
+
+
+def simple_action(
+        modeladmin, request, queryset, success_message="",
+        document_action=None, queryset_action=None):
+
+    if not modeladmin.has_change_permission(request):
+        raise PermissionDenied
+
+    n = queryset.count()
+    if n:
+        for document in queryset:
+            if document_action:
+                document_action(document)
+            document_display = str(document)
+            modeladmin.log_change(request, document, document_display)
+        if queryset_action:
+            queryset_action(queryset)
+        modeladmin.message_user(request, success_message % {
+            "count": n, "items": model_ngettext(modeladmin.opts, n)
+        }, messages.SUCCESS)
+
+    # Return None to display the change list page again.
+    return None
+
+
+def add_tag_to_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Add tag to multiple documents",
+        action="add_tag_to_selected",
+        modelclass=Tag,
+        success_message="Successfully added tag %(selected_object)s to "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.add(tag)
+    )
+
+
+def remove_tag_from_selected(modeladmin, request, queryset):
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Remove tag from multiple documents",
+        action="remove_tag_from_selected",
+        modelclass=Tag,
+        success_message="Successfully removed tag %(selected_object)s from "
+                        "%(count)d %(items)s.",
+        document_action=lambda doc, tag: doc.tags.remove(tag)
+    )
+
+
+def set_correspondent_on_selected(modeladmin, request, queryset):
+
+    return select_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        title="Set correspondent on multiple documents",
+        action="set_correspondent_on_selected",
+        modelclass=Correspondent,
+        success_message="Successfully set correspondent %(selected_object)s "
+                        "on %(count)d %(items)s.",
+        queryset_action=lambda qs, corr: qs.update(correspondent=corr)
+    )
+
+
+def remove_correspondent_from_selected(modeladmin, request, queryset):
+    return simple_action(
+        modeladmin=modeladmin,
+        request=request,
+        queryset=queryset,
+        success_message="Successfully removed correspondent from %(count)d "
+                        "%(items)s.",
+        queryset_action=lambda qs: qs.update(correspondent=None)
+    )
+
+
+add_tag_to_selected.short_description = "Add tag to selected documents"
+remove_tag_from_selected.short_description = \
+    "Remove tag from selected documents"
+set_correspondent_on_selected.short_description = \
+    "Set correspondent on selected documents"
+remove_correspondent_from_selected.short_description = \
+    "Remove correspondent from selected documents"
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -1,42 +1,25 @@
-from datetime import datetime
+from datetime import datetime, timedelta

 from django.conf import settings
-from django.contrib import admin
-from django.contrib.auth.models import User, Group
-try:
-    from django.core.urlresolvers import reverse
-except ImportError:
-    from django.urls import reverse
+from django.contrib import admin, messages
+from django.contrib.admin.templatetags.admin_urls import add_preserved_filters
+from django.contrib.auth.models import Group, User
+from django.db import models
+from django.http import HttpResponseRedirect
 from django.templatetags.static import static
-from django.utils.safestring import mark_safe
+from django.urls import reverse
 from django.utils.html import format_html, format_html_join
+from django.utils.http import urlquote
+from django.utils.safestring import mark_safe

-from .models import Correspondent, Tag, Document, Log
+from documents.actions import (
+    add_tag_to_selected,
+    remove_correspondent_from_selected,
+    remove_tag_from_selected,
+    set_correspondent_on_selected
+)

-
-class MonthListFilter(admin.SimpleListFilter):
-
-    title = "Month"
-
-    # Parameter for the filter that will be used in the URL query.
-    parameter_name = "month"
-
-    def lookups(self, request, model_admin):
-        r = []
-        for document in Document.objects.all():
-            r.append((
-                document.created.strftime("%Y-%m"),
-                document.created.strftime("%B %Y")
-            ))
-        return sorted(set(r), key=lambda x: x[0], reverse=True)
-
-    def queryset(self, request, queryset):
-
-        if not self.value():
-            return None
-
-        year, month = self.value().split("-")
-        return queryset.filter(created__year=year, created__month=month)
+from .models import Correspondent, Document, Log, Tag


 class FinancialYearFilter(admin.SimpleListFilter):
@@ -104,18 +87,59 @@ class FinancialYearFilter(admin.SimpleListFilter):
                               created__lte=self._fy_end(end))


+class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.title = "correspondent (recent)"
+
+    def field_choices(self, field, request, model_admin):
+
+        years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
+        days = 365 * years
+
+        lookups = []
+        if years and years > 0:
+            correspondents = Correspondent.objects.filter(
+                documents__created__gte=datetime.now() - timedelta(days=days)
+            ).distinct()
+            for c in correspondents:
+                lookups.append((c.id, c.name))
+
+        return lookups
+
+
 class CommonAdmin(admin.ModelAdmin):
    list_per_page = settings.PAPERLESS_LIST_PER_PAGE


 class CorrespondentAdmin(CommonAdmin):

-    list_display = ("name", "match", "matching_algorithm", "document_count")
+    list_display = (
+        "name",
+        "match",
+        "matching_algorithm",
+        "document_count",
+        "last_correspondence"
+    )
    list_filter = ("matching_algorithm",)
    list_editable = ("match", "matching_algorithm")

+    def get_queryset(self, request):
+        qs = super(CorrespondentAdmin, self).get_queryset(request)
+        qs = qs.annotate(
+            document_count=models.Count("documents"),
+            last_correspondence=models.Max("documents__created")
+        )
+        return qs
+
    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"
+
+    def last_correspondence(self, obj):
+        return obj.last_correspondence
+    last_correspondence.admin_order_field = "last_correspondence"


 class TagAdmin(CommonAdmin):
@@ -125,8 +149,14 @@ class TagAdmin(CommonAdmin):
    list_filter = ("colour", "matching_algorithm")
    list_editable = ("colour", "match", "matching_algorithm")

+    def get_queryset(self, request):
+        qs = super(TagAdmin, self).get_queryset(request)
+        qs = qs.annotate(document_count=models.Count("documents"))
+        return qs
+
    def document_count(self, obj):
-        return obj.documents.count()
+        return obj.document_count
+    document_count.admin_order_field = "document_count"


 class DocumentAdmin(CommonAdmin):
@@ -140,12 +170,30 @@ class DocumentAdmin(CommonAdmin):
    readonly_fields = ("added",)
    list_display = ("title", "created", "added", "thumbnail", "correspondent",
                    "tags_")
-    list_filter = ("tags", "correspondent", FinancialYearFilter,
-                   MonthListFilter)
+    list_filter = (
+        "tags",
+        ("correspondent", RecentCorrespondentFilter),
+        "correspondent",
+        FinancialYearFilter
+    )
+
    filter_horizontal = ("tags",)

    ordering = ["-created", "correspondent"]

+    actions = [
+        add_tag_to_selected,
+        remove_tag_from_selected,
+        set_correspondent_on_selected,
+        remove_correspondent_from_selected
+    ]
+
+    date_hierarchy = "created"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.document_queue = []
+
    def has_add_permission(self, request):
        return False

@@ -153,6 +201,79 @@ class DocumentAdmin(CommonAdmin):
        return obj.created.date().strftime("%Y-%m-%d")
    created_.short_description = "Created"

+    def changelist_view(self, request, extra_context=None):
+
+        response = super().changelist_view(
+            request,
+            extra_context=extra_context
+        )
+
+        if request.method == "GET":
+            cl = self.get_changelist_instance(request)
+            self.document_queue = [doc.id for doc in cl.queryset]
+
+        return response
+
+    def change_view(self, request, object_id=None, form_url='',
+                    extra_context=None):
+
+        extra_context = extra_context or {}
+
+        if self.document_queue and object_id:
+            if int(object_id) in self.document_queue:
+                # There is a queue of documents
+                current_index = self.document_queue.index(int(object_id))
+                if current_index < len(self.document_queue) - 1:
+                    # ... and there are still documents in the queue
+                    extra_context["next_object"] = self.document_queue[
+                        current_index + 1
+                    ]
+
+        return super(DocumentAdmin, self).change_view(
+            request,
+            object_id,
+            form_url,
+            extra_context=extra_context,
+        )
+
+    def response_change(self, request, obj):
+
+        # This is mostly copied from ModelAdmin.response_change()
+        opts = self.model._meta
+        preserved_filters = self.get_preserved_filters(request)
+
+        msg_dict = {
+            "name": opts.verbose_name,
+            "obj": format_html(
+                '<a href="{}">{}</a>',
+                urlquote(request.path),
+                obj
+            ),
+        }
+        if "_saveandeditnext" in request.POST:
+            msg = format_html(
+                'The {name} "{obj}" was changed successfully. '
+                'Editing next object.',
+                **msg_dict
+            )
+            self.message_user(request, msg, messages.SUCCESS)
+            redirect_url = reverse(
+                "admin:{}_{}_change".format(opts.app_label, opts.model_name),
+                args=(request.POST["_next_object"],),
+                current_app=self.admin_site.name
+            )
+            redirect_url = add_preserved_filters(
+                {
+                    "preserved_filters": preserved_filters,
+                    "opts": opts
+                },
+                redirect_url
+            )
+            return HttpResponseRedirect(redirect_url)
+
+        return super().response_change(request, obj)
+
+    @mark_safe
    def thumbnail(self, obj):
        return self._html_tag(
            "a",
@@ -165,8 +286,8 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    thumbnail.allow_tags = True

+    @mark_safe
    def tags_(self, obj):
        r = ""
        for tag in obj.tags.all():
@@ -183,10 +304,11 @@ class DocumentAdmin(CommonAdmin):
                    )
                }
            )
-        return mark_safe(r)
-    tags_.allow_tags = True
+        return r

+    @mark_safe
    def document(self, obj):
+        # TODO: is this method even used anymore?
        return self._html_tag(
            "a",
            self._html_tag(
@@ -199,7 +321,6 @@ class DocumentAdmin(CommonAdmin):
            ),
            href=obj.download_url
        )
-    document.allow_tags = True

    @staticmethod
    def _html_tag(kind, inside=None, **kwargs):
--- a/src/documents/checks.py
+++ b/src/documents/checks.py
@@ -2,7 +2,7 @@ import textwrap

 from django.conf import settings
 from django.core.checks import Error, register
-from django.db.utils import OperationalError
+from django.db.utils import OperationalError, ProgrammingError


@register()
@@ -14,7 +14,7 @@ def changed_password_check(app_configs, **kwargs):
    try:
        encrypted_doc = Document.objects.filter(
            storage_type=Document.STORAGE_TYPE_GPG).first()
-    except OperationalError:
+    except (OperationalError, ProgrammingError):
        return []  # No documents table yet

    if encrypted_doc:
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -1,8 +1,14 @@
-from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter
+from django_filters.rest_framework import CharFilter, FilterSet, BooleanFilter, ModelChoiceFilter

 from .models import Correspondent, Document, Tag


+CHAR_KWARGS = (
+    "startswith", "endswith", "contains",
+    "istartswith", "iendswith", "icontains"
+)
+
+
 class CorrespondentFilterSet(FilterSet):

    class Meta:
@@ -31,34 +37,24 @@ class TagFilterSet(FilterSet):

 class DocumentFilterSet(FilterSet):

-    CHAR_KWARGS = {
-        "lookup_expr": (
-            "startswith",
-            "endswith",
-            "contains",
-            "istartswith",
-            "iendswith",
-            "icontains"
-        )
-    }
-
-    correspondent__name = CharFilter(
-        field_name="correspondent__name", **CHAR_KWARGS)
-    correspondent__slug = CharFilter(
-        field_name="correspondent__slug", **CHAR_KWARGS)
-    tags__name = CharFilter(
-        field_name="tags__name", **CHAR_KWARGS)
-    tags__slug = CharFilter(
-        field_name="tags__slug", **CHAR_KWARGS)
-    tags__empty = BooleanFilter(
-        field_name="tags", lookup_expr="isnull", distinct=True)
+    tags_empty = BooleanFilter(
+        label="Is tagged",
+        field_name="tags",
+        lookup_expr="isnull",
+        exclude=True
+    )

    class Meta:
        model = Document
        fields = {
-            "title": [
-                "startswith", "endswith", "contains",
-                "istartswith", "iendswith", "icontains"
-            ],
-            "content": ["contains", "icontains"],
+
+            "title": CHAR_KWARGS,
+            "content": ("contains", "icontains"),
+
+            "correspondent__name": CHAR_KWARGS,
+            "correspondent__slug": CHAR_KWARGS,
+
+            "tags__name": CHAR_KWARGS,
+            "tags__slug": CHAR_KWARGS,
+
        }
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -55,7 +55,12 @@ class Command(Renderable, BaseCommand):
        documents = Document.objects.all()
        document_map = {d.pk: d for d in documents}
        manifest = json.loads(serializers.serialize("json", documents))
-        for document_dict in manifest:
+
+        for index, document_dict in enumerate(manifest):
+
+            # Force output to unencrypted as that will be the current state.
+            # The importer will make the decision to encrypt or not.
+            manifest[index]["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501

            document = document_map[document_dict["pk"]]

--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -94,7 +94,7 @@ class Command(Renderable, BaseCommand):
            document_path = os.path.join(self.source, doc_file)
            thumbnail_path = os.path.join(self.source, thumb_file)

-            if document.storage_type == Document.STORAGE_TYPE_GPG:
+            if settings.PASSPHRASE:

                with open(document_path, "rb") as unencrypted:
                    with open(document.source_path, "wb") as encrypted:
@@ -112,3 +112,15 @@ class Command(Renderable, BaseCommand):

                shutil.copy(document_path, document.source_path)
                shutil.copy(thumbnail_path, document.thumbnail_path)
+
+        # Reset the storage type to whatever we've used while importing
+
+        storage_type = Document.STORAGE_TYPE_UNENCRYPTED
+        if settings.PASSPHRASE:
+            storage_type = Document.STORAGE_TYPE_GPG
+
+        Document.objects.filter(
+            pk__in=[r["pk"] for r in self.manifest]
+        ).update(
+            storage_type=storage_type
+        )
--- a/src/documents/migrations/0014_document_checksum.py
+++ b/src/documents/migrations/0014_document_checksum.py
@@ -158,9 +158,4 @@ class Migration(migrations.Migration):
            name='modified',
            field=models.DateTimeField(auto_now=True, db_index=True),
        ),
-        migrations.AlterField(
-            model_name='document',
-            name='checksum',
-            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
-        ),
    ]
--- a/src/documents/migrations/0015_add_insensitive_to_match.py
+++ b/src/documents/migrations/0015_add_insensitive_to_match.py
@@ -12,6 +12,11 @@ class Migration(migrations.Migration):
    ]

    operations = [
+        migrations.AlterField(
+            model_name='document',
+            name='checksum',
+            field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted).  We use this to prevent duplicate document imports.', max_length=32, unique=True),
+        ),
        migrations.AddField(
            model_name='correspondent',
            name='is_insensitive',
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -1,24 +1,24 @@
 # coding=utf-8

-import dateutil.parser
 import logging
 import os
 import re
 import uuid
-
 from collections import OrderedDict
+
+import dateutil.parser
+from django.conf import settings
+from django.db import models
+from django.template.defaultfilters import slugify
+from django.utils import timezone
 from fuzzywuzzy import fuzz

-from django.conf import settings
+from .managers import LogManager
+
 try:
    from django.core.urlresolvers import reverse
 except ImportError:
    from django.urls import reverse
-from django.db import models
-from django.template.defaultfilters import slugify
-from django.utils import timezone
-
-from .managers import LogManager


 class MatchingModel(models.Model):
@@ -135,7 +135,7 @@ class MatchingModel(models.Model):
        Example:
          '  some random  words "with   quotes  " and   spaces'
            ==>
-          ["some", "random", "words", "with\s+quotes", "and", "spaces"]
+          ["some", "random", "words", "with+quotes", "and", "spaces"]
        """
        findterms = re.compile(r'"([^"]+)"|(\S+)').findall
        normspace = re.compile(r"\s+").sub
@@ -192,7 +192,11 @@ class Document(models.Model):
    TYPE_JPG = "jpg"
    TYPE_GIF = "gif"
    TYPE_TIF = "tiff"
-    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
+    TYPE_TXT = "txt"
+    TYPE_CSV = "csv"
+    TYPE_MD = "md"
+    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
+             TYPE_TXT, TYPE_CSV, TYPE_MD)

    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
    STORAGE_TYPE_GPG = "gpg"
@@ -365,51 +369,52 @@ class FileInfo:
        )
    )

+    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
    REGEXES = OrderedDict([
        ("created-correspondent-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title-tags", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-correspondent-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("created-title", re.compile(
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title-tags", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*) - "
            r"(?P<tags>[a-z0-9\-,]*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("correspondent-title", re.compile(
            r"(?P<correspondent>.*) - "
            r"(?P<title>.*)?"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        )),
        ("title", re.compile(
            r"(?P<title>.*)"
-            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
+            r"\.(?P<extension>{})$".format(formats),
            flags=re.IGNORECASE
        ))
    ])
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -1,9 +1,25 @@
 import logging
 import shutil
 import tempfile
+import re

 from django.conf import settings

+# This regular expression will try to find dates in the document at
+# hand and will match the following formats:
+# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
+# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
+# - MONTH ZZZZ, with ZZZZ being 4 digits
+# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
+DATE_REGEX = re.compile(
+    r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
+    r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
+    r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
+    r'\b([^\W\d_]{3,9} [0-9]{4})\b'
+)
+

 class ParseError(Exception):
    pass
--- a/src/documents/templates/admin/documents/document/change_form.html
+++ b/src/documents/templates/admin/documents/document/change_form.html
@@ -1,5 +1,21 @@
 {% extends 'admin/change_form.html' %}

+{% block content %}
+
+{{ block.super }}
+
+{% if next_object %}
+	<script type="text/javascript">//<![CDATA[
+		(function($){
+			$('<input type="submit" value="Save and edit next" name="_saveandeditnext" />')
+			.prependTo('div.submit-row');
+			$('<input type="hidden" value="{{next_object}}" name="_next_object" />')
+			.prependTo('div.submit-row');
+		})(django.jQuery);
+	//]]></script>
+{% endif %}
+
+{% endblock content %}

 {% block footer %}

@@ -10,4 +26,4 @@
 		django.jQuery(".field-created input").first().attr("type", "date")
 	</script>

-{% endblock footer %}
+{% endblock footer %}
--- a/src/documents/templates/admin/documents/document/select_object.html
+++ b/src/documents/templates/admin/documents/document/select_object.html
@@ -0,0 +1,50 @@
+{% extends "admin/base_site.html" %}
+
+
+{% load i18n l10n admin_urls static %}
+{% load staticfiles %}
+
+
+{% block extrahead %}
+	{{ block.super }}
+	{{ media }}
+	<script type="text/javascript" src="{% static 'admin/js/cancel.js' %}"></script>
+{% endblock %}
+
+
+{% block bodyclass %}{{ block.super }} app-{{ opts.app_label }} model-{{ opts.model_name }} delete-confirmation delete-selected-confirmation{% endblock %}
+
+
+{% block breadcrumbs %}
+	<div class="breadcrumbs">
+		<a href="{% url 'admin:index' %}">{% trans 'Home' %}</a>
+		&rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
+		&rsaquo; <a href="{% url opts|admin_urlname:'changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
+		&rsaquo; {{ title }}
+	</div>
+{% endblock %}
+
+{% block content %}
+	<p>Please select the {{itemname}}.</p>
+	<form method="post">{% csrf_token %}
+		<div>
+			{% for obj in queryset %}
+			<input type="hidden" name="{{ action_checkbox_name }}" value="{{ obj.pk|unlocalize }}"/>
+			{% endfor %}
+			<p>
+				<select name="obj_id">
+					{% for obj in objects %}
+					<option value="{{ obj.id }}">{{ obj.name }}</option>
+					{% endfor %}
+				</select>
+			</p>
+
+			<input type="hidden" name="action" value="{{ action }}"/>
+			<input type="hidden" name="post" value="yes" />
+			<p>
+				<input type="submit" value="{% trans 'Confirm' %}" />
+				<a href="#" class="button cancel-link">{% trans "Go back" %}</a>
+			</p>
+		</div>
+	</form>
+{% endblock %}
--- a/src/documents/tests/test_matchables.py
+++ b/src/documents/tests/test_matchables.py
@@ -166,7 +166,7 @@ class TestMatching(TestCase):
    def test_match_regex(self):

        self._test_matching(
-            "alpha\w+gamma",
+            r"alpha\w+gamma",
            "MATCH_REGEX",
            (
                "I have alpha_and_gamma in me",
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -1,6 +1,8 @@
 from django.http import HttpResponse, HttpResponseBadRequest
 from django.views.generic import DetailView, FormView, TemplateView
 from django_filters.rest_framework import DjangoFilterBackend
+from django.conf import settings
+
 from paperless.db import GnuPG
 from paperless.mixins import SessionOrBasicAuthMixin
 from paperless.views import StandardPagination
@@ -48,6 +50,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            Document.TYPE_JPG: "image/jpeg",
            Document.TYPE_GIF: "image/gif",
            Document.TYPE_TIF: "image/tiff",
+            Document.TYPE_CSV: "text/csv",
+            Document.TYPE_MD:  "text/markdown",
+            Document.TYPE_TXT: "text/plain"
        }

        if self.kwargs["kind"] == "thumb":
@@ -60,8 +65,11 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
            self._get_raw_data(self.object.source_file),
            content_type=content_types[self.object.file_type]
        )
-        response["Content-Disposition"] = 'attachment; filename="{}"'.format(
-            self.object.file_name)
+
+        DISPOSITION = 'inline' if settings.INLINE_DOC else 'attachment'
+
+        response["Content-Disposition"] = '{}; filename="{}"'.format(
+            DISPOSITION, self.object.file_name)

        return response

--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -22,6 +22,14 @@ elif os.path.exists("/usr/local/etc/paperless.conf"):
    load_dotenv("/usr/local/etc/paperless.conf")


+def __get_boolean(key, default="NO"):
+    """
+    Return a boolean value based on whatever the user has supplied in the
+    environment based on whether the value "looks like" it's True or not.
+    """
+    return bool(os.getenv(key, default).lower() in ("yes", "y", "1", "t", "true"))
+
+
 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

@@ -39,7 +47,7 @@ SECRET_KEY = os.getenv(


 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
+DEBUG = __get_boolean("PAPERLESS_DEBUG", "YES")

 LOGIN_URL = "admin:login"

@@ -67,6 +75,7 @@ INSTALLED_APPS = [
    "documents.apps.DocumentsConfig",
    "reminders.apps.RemindersConfig",
    "paperless_tesseract.apps.PaperlessTesseractConfig",
+    "paperless_text.apps.PaperlessTextConfig",

    "django.contrib.admin",

@@ -79,8 +88,6 @@ INSTALLED_APPS = [
 if os.getenv("PAPERLESS_INSTALLED_APPS"):
    INSTALLED_APPS += os.getenv("PAPERLESS_INSTALLED_APPS").split(",")

-
-
 MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
@@ -99,7 +106,6 @@ CORS_ORIGIN_WHITELIST = tuple(os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "localho
 if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
    _index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
    MIDDLEWARE[_index] = "paperless.middleware.Middleware"
-    MIDDLEWARE.remove("django.contrib.auth.middleware.SessionAuthenticationMiddleware")

 ROOT_URLCONF = 'paperless.urls'

@@ -138,13 +144,14 @@ DATABASES = {
    }
 }

-if os.getenv("PAPERLESS_DBUSER") and os.getenv("PAPERLESS_DBPASS"):
+if os.getenv("PAPERLESS_DBUSER"):
    DATABASES["default"] = {
        "ENGINE": "django.db.backends.postgresql_psycopg2",
        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
        "USER": os.getenv("PAPERLESS_DBUSER"),
-        "PASSWORD": os.getenv("PAPERLESS_DBPASS")
    }
+    if os.getenv("PAPERLESS_DBPASS"):
+        DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")


 # Password validation
@@ -224,12 +231,12 @@ OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
 OCR_THREADS = os.getenv("PAPERLESS_OCR_THREADS")

 # OCR all documents?
-OCR_ALWAYS = bool(os.getenv("PAPERLESS_OCR_ALWAYS", "NO").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+OCR_ALWAYS = __get_boolean("PAPERLESS_OCR_ALWAYS")

 # If this is true, any failed attempts to OCR a PDF will result in the PDF
 # being indexed anyway, with whatever we could get.  If it's False, the file
 # will simply be left in the CONSUMPTION_DIR.
-FORGIVING_OCR = bool(os.getenv("PAPERLESS_FORGIVING_OCR", "YES").lower() in ("yes", "y", "1", "t", "true"))  # NOQA
+FORGIVING_OCR = __get_boolean("PAPERLESS_FORGIVING_OCR")

 # GNUPG needs a home directory for some reason
 GNUPG_HOME = os.getenv("HOME", "/tmp")
@@ -273,6 +280,9 @@ PASSPHRASE = os.getenv("PAPERLESS_PASSPHRASE")
 PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
 POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")

+# Whether to display a selected document inline, or download it as attachment:
+INLINE_DOC = __get_boolean("PAPERLESS_INLINE_DOC")
+
 # The number of items on each page in the web UI.  This value must be a
 # positive integer, but if you don't define one in paperless.conf, a default of
 # 100 will be used.
@@ -283,3 +293,9 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")

 # Specify the default date order (for autodetected dates)
 DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
+
+# Specify for how many years a correspondent is considered recent. Recent
+# correspondents will be shown in a separate "Recent correspondents" filter as
+# well. Set to 0 to disable this filter.
+PAPERLESS_RECENT_CORRESPONDENT_YEARS = int(os.getenv(
+    "PAPERLESS_RECENT_CORRESPONDENT_YEARS", 0))
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (2, 2, 0)
+__version__ = (2, 3, 0)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -14,7 +14,7 @@ from pyocr.libtesseract.tesseract_raw import \
 from pyocr.tesseract import TesseractError

 import pdftotext
-from documents.parsers import DocumentParser, ParseError
+from documents.parsers import DocumentParser, ParseError, DATE_REGEX

 from .languages import ISO639

@@ -50,10 +50,11 @@ class RasterisedDocumentParser(DocumentParser):
            self.CONVERT,
            "-scale", "500x5000",
            "-alpha", "remove",
-            self.document_path, os.path.join(self.tempdir, "convert-%04d.png")
+            "{}[0]".format(self.document_path),
+            os.path.join(self.tempdir, "convert.png")
        )

-        return os.path.join(self.tempdir, "convert-0000.png")
+        return os.path.join(self.tempdir, "convert.png")

    def _is_ocred(self):

@@ -171,8 +172,8 @@ class RasterisedDocumentParser(DocumentParser):
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
            raise OCRError(
-                "The guessed language is not available in this instance of "
-                "Tesseract."
+                "The guessed language ({}) is not available in this instance "
+                "of Tesseract.".format(guessed_language)
            )

    def _ocr(self, imgs, lang):
@@ -210,22 +211,8 @@ class RasterisedDocumentParser(DocumentParser):
        except ParseError as e:
            return None

-        # This regular expression will try to find dates in the document at
-        # hand and will match the following formats:
-        # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
-        # - MONTH ZZZZ, with ZZZZ being 4 digits
-        # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
-        pattern = re.compile(
-            r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
-            r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
-            r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
-            r'\b([^\W\d_]{3,9} [0-9]{4})\b')
-
        # Iterate through all regex matches and try to parse the date
-        for m in re.finditer(pattern, text):
+        for m in re.finditer(DATE_REGEX, text):
            datestring = m.group(0)

            try:
@@ -272,8 +259,9 @@ def run_unpaper(args):
 def strip_excess_whitespace(text):
    collapsed_spaces = re.sub(r"([^\S\r\n]+)", " ", text)
    no_leading_whitespace = re.sub(
-        "([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
-    no_trailing_whitespace = re.sub("([^\S\n\r]+)$", '', no_leading_whitespace)
+        r"([\n\r]+)([^\S\n\r]+)", '\\1', collapsed_spaces)
+    no_trailing_whitespace = re.sub(
+        r"([^\S\n\r]+)$", '', no_leading_whitespace)
    return no_trailing_whitespace


--- a/src/paperless_tesseract/signals.py
+++ b/src/paperless_tesseract/signals.py
@@ -5,7 +5,7 @@ from .parsers import RasterisedDocumentParser

 class ConsumerDeclaration:

-    MATCHING_FILES = re.compile("^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
+    MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")

    @classmethod
    def handle(cls, sender, **kwargs):
--- a/src/paperless_text/init.py
+++ b/src/paperless_text/init.py
--- a/src/paperless_text/apps.py
+++ b/src/paperless_text/apps.py
@@ -0,0 +1,16 @@
+from django.apps import AppConfig
+
+
+class PaperlessTextConfig(AppConfig):
+
+    name = "paperless_text"
+
+    def ready(self):
+
+        from documents.signals import document_consumer_declaration
+
+        from .signals import ConsumerDeclaration
+
+        document_consumer_declaration.connect(ConsumerDeclaration.handle)
+
+        AppConfig.ready(self)
--- a/src/paperless_text/parsers.py
+++ b/src/paperless_text/parsers.py
@@ -0,0 +1,131 @@
+import os
+import re
+import subprocess
+
+import dateparser
+from django.conf import settings
+
+from documents.parsers import DocumentParser, ParseError, DATE_REGEX
+
+
+class TextDocumentParser(DocumentParser):
+    """
+    This parser directly parses a text document (.txt, .md, or .csv)
+    """
+
+    CONVERT = settings.CONVERT_BINARY
+    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
+    UNPAPER = settings.UNPAPER_BINARY
+    DATE_ORDER = settings.DATE_ORDER
+    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
+    OCR_ALWAYS = settings.OCR_ALWAYS
+
+    def __init__(self, path):
+        super().__init__(path)
+        self._text = None
+
+    def get_thumbnail(self):
+        """
+        The thumbnail of a txt is just a 500px wide image of the text
+        rendered onto a letter-sized page.
+        """
+        # The below is heavily cribbed from https://askubuntu.com/a/590951
+
+        bg_color = "white"  # bg color
+        text_color = "black"  # text color
+        psize = [500, 647]  # icon size
+        n_lines = 50  # number of lines to show
+        output_file = os.path.join(self.tempdir, "convert-txt.png")
+
+        temp_bg = os.path.join(self.tempdir, "bg.png")
+        temp_txlayer = os.path.join(self.tempdir, "tx.png")
+        picsize = "x".join([str(n) for n in psize])
+        txsize = "x".join([str(n - 8) for n in psize])
+
+        def create_bg():
+            work_size = ",".join([str(n - 1) for n in psize])
+            r = str(round(psize[0] / 10))
+            rounded = ",".join([r, r])
+            run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
+                        '"fill ', bg_color, ' roundrectangle 0,0,',
+                        work_size, ",", rounded, '" ', temp_bg)
+
+        def read_text():
+            with open(self.document_path, 'r') as src:
+                lines = [l.strip() for l in src.readlines()]
+                text = "\n".join([l for l in lines[:n_lines]])
+                return text.replace('"', "'")
+
+        def create_txlayer():
+            run_command(self.CONVERT,
+                        "-background none",
+                        "-fill",
+                        text_color,
+                        "-pointsize", "12",
+                        "-border 4 -bordercolor none",
+                        "-size ", txsize,
+                        ' caption:"', read_text(), '" ',
+                        temp_txlayer)
+
+        create_txlayer()
+        create_bg()
+        run_command(self.CONVERT, temp_bg, temp_txlayer,
+                    "-background None -layers merge ", output_file)
+
+        return output_file
+
+    def get_text(self):
+
+        if self._text is not None:
+            return self._text
+
+        with open(self.document_path, 'r') as f:
+            self._text = f.read()
+
+        return self._text
+
+    def get_date(self):
+        date = None
+        datestring = None
+
+        try:
+            text = self.get_text()
+        except ParseError as e:
+            return None
+
+        # Iterate through all regex matches and try to parse the date
+        for m in re.finditer(DATE_REGEX, text):
+            datestring = m.group(0)
+
+            try:
+                date = dateparser.parse(
+                           datestring,
+                           settings={'DATE_ORDER': self.DATE_ORDER,
+                                     'PREFER_DAY_OF_MONTH': 'first',
+                                     'RETURN_AS_TIMEZONE_AWARE': True})
+            except TypeError:
+                # Skip all matches that do not parse to a proper date
+                continue
+
+            if date is not None:
+                break
+
+        if date is not None:
+            self.log("info", "Detected document date " + date.isoformat() +
+                             " based on string " + datestring)
+        else:
+            self.log("info", "Unable to detect date for document")
+
+        return date
+
+
+def run_command(*args):
+    environment = os.environ.copy()
+    if settings.CONVERT_MEMORY_LIMIT:
+        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
+    if settings.CONVERT_TMPDIR:
+        environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
+
+    if not subprocess.Popen(' '.join(args), env=environment,
+                            shell=True).wait() == 0:
+        raise ParseError("Convert failed at {}".format(args))
--- a/src/paperless_text/signals.py
+++ b/src/paperless_text/signals.py
@@ -0,0 +1,23 @@
+import re
+
+from .parsers import TextDocumentParser
+
+
+class ConsumerDeclaration:
+
+    MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
+
+    @classmethod
+    def handle(cls, sender, **kwargs):
+        return cls.test
+
+    @classmethod
+    def test(cls, doc):
+
+        if cls.MATCHING_FILES.match(doc.lower()):
+            return {
+                "parser": TextDocumentParser,
+                "weight": 10
+            }
+
+        return None
--- a/src/tox.ini
+++ b/src/tox.ini
@@ -5,7 +5,7 @@

 [tox]
 skipsdist = True
-envlist = py34, py35, py36, pycodestyle, doc
+envlist = py34, py35, py36, py37, pycodestyle, doc

 [testenv]
 commands = pytest
Author	SHA1	Message	Date
Daniel Quinn	a511d34d69	Fix implementation of django-filter	2018-09-23 15:47:14 +01:00
Daniel Quinn	35c5b8e263	Add note about tweaks to psql connections	2018-09-23 14:05:35 +01:00
Daniel Quinn	8726b0316c	Add note about import/export process changes	2018-09-23 14:03:38 +01:00
Daniel Quinn	acf6caca2f	Add a tox test for Python 3.7	2018-09-23 14:01:35 +01:00
Daniel Quinn	b20d7eca03	Tweak settings.py to allow for TRUST-based PostgreSQL auth	2018-09-23 14:01:15 +01:00
Daniel Quinn	d17497fd5b	Move the unique key on checksums to migration 15 This shouldn't affect anyone, since this migration is pretty old, but it allows people using PostgreSQL to actually run Paperless.	2018-09-23 14:00:27 +01:00
Daniel Quinn	090565d84c	Tweak the import/export system to handle encryption choices better Now when you export a document, the `storage_type` value is always `unencrypted` (since that's what it is when it's exported anyway), and the flag is set by the importing script instead, based on the existence of a `PAPERLESS_PASSPHRASE` environment variable, indicating that encryption is enabled.	2018-09-23 13:58:40 +01:00
Daniel Quinn	79e1e60238	Fix typo	2018-09-23 12:59:56 +01:00
Daniel Quinn	ff111f1bde	Update changelog for new stuff from #405	2018-09-23 12:54:49 +01:00
Daniel Quinn	6db788a550	Add docs for indentation & spacing	2018-09-23 12:54:39 +01:00
Daniel Quinn	f4a09013d7	Merge branch 'jonaswinkler-new-features'	2018-09-23 12:42:02 +01:00
Daniel Quinn	4130dd3465	Conform code to standards	2018-09-23 12:41:28 +01:00
Daniel Quinn	117d7dad04	Improve the unknown language error message	2018-09-23 12:41:14 +01:00
Daniel Quinn	b420281be0	Remove numpy, scikit-learn, and scipy as they weren't being used	2018-09-23 12:40:46 +01:00
Daniel Quinn	17f8953a49	Merge branch 'new-features' of git://github.com/jonaswinkler/paperless into jonaswinkler-new-features	2018-09-23 11:57:44 +01:00
Daniel Quinn	9682a6f6fc	Add a contribution guide	2018-09-22 16:22:03 +01:00
Daniel Quinn	425bbe34ef	Make the names of the sample files visible	2018-09-22 16:17:18 +01:00
Daniel Quinn	60ee08adec	Reduce duplication in docker-compose.env.example See #404 for more info on where this came from.	2018-09-22 15:27:22 +01:00
Daniel Quinn	b4b4d8f25e	Add an example for pdf2pdfocr with the pre-consume hook	2018-09-22 14:00:00 +01:00
Daniel Quinn	cce6b43062	Clean up release notes	2018-09-22 13:59:50 +01:00
Jonas Winkler	fb6f2e07c9	Added a bunch of new features: - Debug mode is now configurable in the configuration file. This way, we don't have to edit versioned files to disable it on production systems. - Recent correspondents filter (enable in configuration file) - Document actions: Edit tags and correspondents on multiple documents at once - Replaced month list filter with date drilldown - Sortable document count columns on Tag and Correspondent admin - Last correspondence column on Correspondent admin - Save and edit next functionality for document editing	2018-09-13 15:19:25 +02:00
Daniel Quinn	2edf65dd1e	Bump to 2.3.0	2018-09-09 21:51:44 +01:00
Daniel Quinn	9a739bdbab	Merge pull request #401 from ahyear/patch-1 add migrate commande to docker update process	2018-09-09 21:26:56 +01:00
Daniel Quinn	66db06590d	Merge branch 'jat255-ENH_config_inline_or_attach'	2018-09-09 21:22:42 +01:00
Daniel Quinn	7cef108785	Streamline how we handle boolean values in settings.py	2018-09-09 21:22:07 +01:00
Daniel Quinn	a86a20ef0f	Make the example file contain the default value	2018-09-09 21:16:53 +01:00
Daniel Quinn	f94347abc0	Merge branch 'ENH_config_inline_or_attach' of git://github.com/jat255/paperless into jat255-ENH_config_inline_or_attach	2018-09-09 21:15:14 +01:00
Daniel Quinn	46cbd10ba0	Merge pull request #399 from jat255/ENH_convert_only_one_page Speed up thumbnail generation for PDFs	2018-09-09 21:12:42 +01:00
Daniel Quinn	2a96c648e8	Merge pull request #396 from dubit0/postgres_mysql_fix Fix document checks with PostgreSQL and MySQL backends.	2018-09-09 21:10:36 +01:00
Daniel Quinn	75648cc74b	Merge branch 'jat255-ENH_text_consumer'	2018-09-09 21:03:58 +01:00
Daniel Quinn	0472fe4e9e	Reorder imports	2018-09-09 21:03:37 +01:00
Daniel Quinn	c99f5923d5	Rename `parsers` to `DATE_REGEX` In moving the `parsers` variable into the package-level, it lost the context, so a more descriptive name was needed.	2018-09-09 21:02:30 +01:00
Daniel Quinn	ef302abed7	Fix pycodestyle complaints	2018-09-09 20:55:37 +01:00
Daniel Quinn	2dc35cc856	Merge branch 'ENH_text_consumer' of git://github.com/jat255/paperless into jat255-ENH_text_consumer	2018-09-09 20:52:59 +01:00
Daniel Quinn	f4c399f0dd	Merge pull request #398 from ddddavidmartin/bump_pyocr_version_for_tesseract_4_support Bump required version for Pyocr to support the latest tesseract 4.	2018-09-09 20:01:51 +01:00
Daniel Quinn	5342db6ada	Fix pycodestyle complaints Apparently, pycodestyle updated itself to now check for invalid escape sequences, which only complain if the regex in use isn't a raw string (r"").	2018-09-09 20:00:12 +01:00
Daniel Quinn	5c39fff51b	Add tox to dev dependencies	2018-09-09 19:59:47 +01:00
ahyear	ed0e40d3e6	add migrate commande to docker update process	2018-09-06 15:32:41 +02:00
Joshua Taillon	652ead2f5c	remove debugging print statement	2018-09-05 23:05:37 -04:00
Joshua Taillon	be9757894a	add INLINE_DOC to settings.py	2018-09-05 23:03:30 -04:00
Joshua Taillon	22378789e2	add option for inline vs. attachment for document rendering	2018-09-05 22:58:38 -04:00
Joshua Taillon	72c828170e	move date-matching regex pattern to base parser module for use by all subclasses	2018-09-05 21:13:36 -04:00
Joshua Taillon	cac63494f0	change tesseract parser to only convert first page to save (potentially) massive amounts of work	2018-09-05 15:18:35 -04:00
Daniel Quinn	939a67bd4b	Add empty requirements for rtd to reference	2018-09-05 11:16:42 +01:00
Daniel Quinn	fbc6a58f5a	Add credits for 2.2.0 that I forgot	2018-09-05 10:59:06 +01:00
Daniel Quinn	01a358d2b0	Re-flow text to keep it <80c wide	2018-09-05 10:58:41 +01:00
David Martin	6b447628ed	Bump required version for Pyocr to support the latest tesseract 4. This recently changed in the official tesseract engine [0]. -psm is not allowed as an option anymore and --psm has to be used instead. The latest pyocr enables support for this [1]. [0] tesseract-ocr/tesseract@ee201e1 [1] `5abd0a566a`	2018-09-05 13:03:42 +10:00
Thomas Niederprüm	2308d5a613	Catch ProgrammingError in Document checks. When running PostgreSQL or MariaDB/MySQL backends, a query to a non-existent table will raise a "ProgrammingError". This patch properly catches this error. Without this patch all management calls to manage.py will lead to an error when running PostgreSQL or MariaDB as a backend.	2018-09-04 20:11:48 +02:00
Joshua Taillon	23bf79274c	Merge branch 'master' into ENH_text_consumer	2018-09-03 23:47:30 -04:00
Joshua Taillon	4849249d86	explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation	2018-09-03 23:46:13 -04:00
Daniel Quinn	ee20af71e8	Bump for 2.2.1	2018-09-03 00:27:40 +01:00
Daniel Quinn	3c8aa3ba42	Don't try to remove SessionAuthenticationMiddleware It was remove entirely in Django 2.0	2018-09-03 00:25:10 +01:00
Daniel Quinn	778ffa488d	Add Tim to the credits for 2.2.0	2018-09-02 21:53:52 +01:00
Joshua Taillon	d6fedbec52	first stab at text consumer	2018-08-30 23:32:41 -04:00