Version bumb

Merge branch 'master' into dev
minor changes
2025-12-21 02:21:17 +00:00 · 2018-12-11 14:32:30 +01:00 · 2018-12-11 12:38:15 +01:00 · 2018-12-11 12:26:44 +01:00 · 2018-12-11 12:06:15 +01:00 · 2018-12-06 23:38:50 +00:00
66 changed files with 57348 additions and 861 deletions
--- a/.editorconfig
+++ b/.editorconfig
@@ -0,0 +1,25 @@
 # EditorConfig: http://EditorConfig.org
 root = true
 [*]
 indent_style = tab
 indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true
 end_of_line = lf
 charset = utf-8
 max_line_length = 79
 [{*.html,*.css,*.js}]
 max_line_length = off
 [*.py]
 indent_size = 4
 indent_style = space
 # Tests don't get a line width restriction.  It's still a good idea to follow
 # the 79 character rule, but in the interests of clarity, tests often need to
 # violate it.
 [**/test_*.py]
 max_line_length = off
--- a/.gitignore
+++ b/.gitignore
@@ -73,7 +73,6 @@ db.sqlite3
 # Other stuff that doesn't belong
 .virtualenv
 virtualenv
 .vagrant
 docker-compose.yml
 docker-compose.env
@@ -83,3 +82,6 @@ scripts/nuke
 # Static files collected by the collectstatic command
 static/
 # Classification Models
 models/
--- a/4
+++ b/4
@@ -13,10 +13,10 @@ ENV PAPERLESS_EXPORT_DIR=/export \
    PAPERLESS_CONSUMPTION_DIR=/consume
-RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
+RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
        sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
    apk add --virtual .build-dependencies \
-        python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
+        python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
 # Install python dependencies
    python3 -m ensurepip && \
    rm -r /usr/lib/python*/ensurepip && \
--- a/5
+++ b/5
@@ -25,6 +25,8 @@ python-dateutil = "*"
 python-dotenv = "*"
 python-gnupg = "*"
 pytz = "*"
 sphinx = "*"
 tox = "*"
 pycodestyle = "*"
 pytest = "*"
 pytest-cov = "*"
@@ -35,6 +37,3 @@ pytest-xdist = "*"
 [dev-packages]
 ipython = "*"
 sphinx = "*"
 tox = "*"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7"
+            "sha256": "3782f7e6b5461c39c8fd0d0048a4622418f247439113bd3cdc91712fd47036f6"
        },
        "pipfile-spec": 6,
        "requires": {},
@@ -14,12 +14,18 @@
        ]
    },
    "default": {
        "alabaster": {
            "hashes": [
                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
            ],
            "version": "==0.7.12"
        },
        "apipkg": {
            "hashes": [
                "sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
                "sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5"
        },
        "atomicwrites": {
@@ -27,7 +33,6 @@
                "sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
                "sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.2.1"
        },
        "attrs": {
@@ -37,12 +42,26 @@
            ],
            "version": "==18.2.0"
        },
        "babel": {
            "hashes": [
                "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
                "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
            ],
            "version": "==2.6.0"
        },
        "backcall": {
            "hashes": [
                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
            ],
            "version": "==0.1.0"
        },
        "certifi": {
            "hashes": [
-                "sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
+                "sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
-                "sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
+                "sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
            ],
-            "version": "==2018.8.24"
+            "version": "==2018.10.15"
        },
        "chardet": {
            "hashes": [
@@ -55,6 +74,7 @@
            "hashes": [
                "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
                "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
                "sha256:0bf8cbbd71adfff0ef1f3a1531e6402d13b7b01ac50a79c97ca15f030dba6306",
                "sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
                "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
                "sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
@@ -83,18 +103,18 @@
                "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
                "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
                "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
                "sha256:f05a636b4564104120111800021a92e43397bc12a5c72fed7036be8556e0029e",
                "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
            ],
            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
            "version": "==4.5.1"
        },
        "coveralls": {
            "hashes": [
-                "sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22",
+                "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
-                "sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a"
+                "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
            ],
            "index": "pypi",
-            "version": "==1.5.0"
+            "version": "==1.5.1"
        },
        "dateparser": {
            "hashes": [
@@ -104,13 +124,20 @@
            "index": "pypi",
            "version": "==0.7.0"
        },
        "decorator": {
            "hashes": [
                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
            ],
            "version": "==4.3.0"
        },
        "django": {
            "hashes": [
-                "sha256:0c5b65847d00845ee404bbc0b4a85686f15eb3001ffddda3db4e9baa265bf136",
+                "sha256:25df265e1fdb74f7e7305a1de620a84681bcc9c05e84a3ed97e4a1a63024f18d",
-                "sha256:68aeea369a8130259354b6ba1fa9babe0c5ee6bced505dea4afcd00f765ae38b"
+                "sha256:d6d94554abc82ca37e447c3d28958f5ac39bd7d4adaa285543ae97fb1129fd69"
            ],
            "index": "pypi",
-            "version": "==2.0.8"
+            "version": "==2.0.9"
        },
        "django-cors-headers": {
            "hashes": [
@@ -130,11 +157,11 @@
        },
        "django-extensions": {
            "hashes": [
-                "sha256:1f626353a11479014bfe0d77e76d8f866ebca1bb5d595cb57b776230b9e0eb92",
+                "sha256:30cb6a8c7d6f75a55edf0c0c4491bd98f8264ae1616ce105f9cecac4387edd07",
-                "sha256:f21b898598a1628cb73017fb9672e2c5e624133be9764f0eb138e0abf8a62b62"
+                "sha256:4ad86a7a5e84f1c77db030761ae87a600647250c652030a2b71a16e87f3a3d62"
            ],
            "index": "pypi",
-            "version": "==2.1.2"
+            "version": "==2.1.3"
        },
        "django-filter": {
            "hashes": [
@@ -146,11 +173,11 @@
        },
        "djangorestframework": {
            "hashes": [
-                "sha256:b6714c3e4b0f8d524f193c91ecf5f5450092c2145439ac2769711f7eba89a9d9",
+                "sha256:607865b0bb1598b153793892101d881466bd5a991de12bd6229abb18b1c86136",
-                "sha256:c375e4f95a3a64fccac412e36fb42ba36881e52313ec021ef410b40f67cddca4"
+                "sha256:63f76cbe1e7d12b94c357d7e54401103b2e52aef0f7c1650d6c820ad708776e5"
            ],
            "index": "pypi",
-            "version": "==3.8.2"
+            "version": "==3.9.0"
        },
        "docopt": {
            "hashes": [
@@ -158,12 +185,19 @@
            ],
            "version": "==0.6.2"
        },
        "docutils": {
            "hashes": [
                "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
                "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
                "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
            ],
            "version": "==0.14"
        },
        "execnet": {
            "hashes": [
                "sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
                "sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.5.0"
        },
        "factory-boy": {
@@ -176,11 +210,17 @@
        },
        "faker": {
            "hashes": [
-                "sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
+                "sha256:2621643b80a10b91999925cfd20f64d2b36f20bf22136bbdc749bb57d6ffe124",
-                "sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
+                "sha256:5ed822d31bd2d6edf10944d176d30dc9c886afdd381eefb7ba8b7aad86171646"
            ],
-            "markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'",
+            "version": "==0.9.2"
-            "version": "==0.9.0"
+        },
        "filelock": {
            "hashes": [
                "sha256:b8d5ca5ca1c815e1574aee746650ea7301de63d87935b3463d26368b76e31633",
                "sha256:d610c1bb404daf85976d7a82eb2ada120f04671007266b708606565dd03b5be6"
            ],
            "version": "==3.0.10"
        },
        "filemagic": {
            "hashes": [
@@ -190,12 +230,14 @@
            "version": "==1.6"
        },
        "fuzzywuzzy": {
            "extras": [
                "speedup"
            ],
            "hashes": [
                "sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f",
                "sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602"
            ],
            "index": "pypi",
            "markers": null,
            "version": "==0.15.0"
        },
        "gunicorn": {
@@ -213,6 +255,13 @@
            ],
            "version": "==2.7"
        },
        "imagesize": {
            "hashes": [
                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
            ],
            "version": "==1.1.0"
        },
        "inotify-simple": {
            "hashes": [
                "sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd"
@@ -220,6 +269,35 @@
            "index": "pypi",
            "version": "==1.1.8"
        },
        "ipython": {
            "hashes": [
                "sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
                "sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
            ],
            "index": "pypi",
            "version": "==7.1.1"
        },
        "ipython-genutils": {
            "hashes": [
                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
            ],
            "version": "==0.2.0"
        },
        "jedi": {
            "hashes": [
                "sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
                "sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
            ],
            "version": "==0.13.1"
        },
        "jinja2": {
            "hashes": [
                "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
                "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
            ],
            "version": "==2.10"
        },
        "langdetect": {
            "hashes": [
                "sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30"
@@ -227,6 +305,12 @@
            "index": "pypi",
            "version": "==1.0.7"
        },
        "markupsafe": {
            "hashes": [
                "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
            ],
            "version": "==1.0"
        },
        "more-itertools": {
            "hashes": [
                "sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
@@ -235,64 +319,106 @@
            ],
            "version": "==4.3.0"
        },
        "packaging": {
            "hashes": [
                "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
                "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
            ],
            "version": "==18.0"
        },
        "parso": {
            "hashes": [
                "sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
                "sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
            ],
            "version": "==0.3.1"
        },
        "pdftotext": {
            "hashes": [
-                "sha256:b7312302007e19fc784263a321b41682f01a582af84e14200cef53b3f4e69a50"
+                "sha256:e3ad11efe0aa22cbfc46aa1296b2ea5a52ad208b778288311f2801adef178ccb"
            ],
            "index": "pypi",
-            "version": "==2.1.0"
+            "version": "==2.1.1"
        },
        "pexpect": {
            "hashes": [
                "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
                "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
            ],
            "markers": "sys_platform != 'win32'",
            "version": "==4.6.0"
        },
        "pickleshare": {
            "hashes": [
                "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
                "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
            ],
            "version": "==0.7.5"
        },
        "pillow": {
            "hashes": [
-                "sha256:00def5b638994f888d1058e4d17c86dec8e1113c3741a0a8a659039aec59a83a",
+                "sha256:00203f406818c3f45d47bb8fe7e67d3feddb8dcbbd45a289a1de7dd789226360",
-                "sha256:026449b64e559226cdb8e6d8c931b5965d8fc90ec18ebbb0baa04c5b36503c72",
+                "sha256:0616f800f348664e694dddb0b0c88d26761dd5e9f34e1ed7b7a7d2da14b40cb7",
-                "sha256:03dbb224ee196ef30ed2156d41b579143e1efeb422974719a5392fc035e4f574",
+                "sha256:1f7908aab90c92ad85af9d2fec5fc79456a89b3adcc26314d2cde0e238bd789e",
-                "sha256:03eb0e04f929c102ae24bc436bf1c0c60a4e63b07ebd388e84d8b219df3e6acd",
+                "sha256:2ea3517cd5779843de8a759c2349a3cd8d3893e03ab47053b66d5ec6f8bc4f93",
-                "sha256:1be66b9a89e367e7d20d6cae419794997921fe105090fafd86ef39e20a3baab2",
+                "sha256:48a9f0538c91fc136b3a576bee0e7cd174773dc9920b310c21dcb5519722e82c",
-                "sha256:1e977a3ed998a599bda5021fb2c2889060617627d3ae228297a529a082a3cd5c",
+                "sha256:5280ebc42641a1283b7b1f2c20e5b936692198b9dd9995527c18b794850be1a8",
-                "sha256:22cf3406d135cfcc13ec6228ade774c8461e125c940e80455f500638429be273",
+                "sha256:5e34e4b5764af65551647f5cc67cf5198c1d05621781d5173b342e5e55bf023b",
-                "sha256:24adccf1e834f82718c7fc8e3ec1093738da95144b8b1e44c99d5fc7d3e9c554",
+                "sha256:63b120421ab85cad909792583f83b6ca3584610c2fe70751e23f606a3c2e87f0",
-                "sha256:2a3e362c97a5e6a259ee9cd66553292a1f8928a5bdfa3622fdb1501570834612",
+                "sha256:696b5e0109fe368d0057f484e2e91717b49a03f1e310f857f133a4acec9f91dd",
-                "sha256:3832e26ecbc9d8a500821e3a1d3765bda99d04ae29ffbb2efba49f5f788dc934",
+                "sha256:870ed021a42b1b02b5fe4a739ea735f671a84128c0a666c705db2cb9abd528eb",
-                "sha256:4fd1f0c2dc02aaec729d91c92cd85a2df0289d88e9f68d1e8faba750bb9c4786",
+                "sha256:916da1c19e4012d06a372127d7140dae894806fad67ef44330e5600d77833581",
-                "sha256:4fda62030f2c515b6e2e673c57caa55cb04026a81968f3128aae10fc28e5cc27",
+                "sha256:9303a289fa0811e1c6abd9ddebfc770556d7c3311cb2b32eff72164ddc49bc64",
-                "sha256:5044d75a68b49ce36a813c82d8201384207112d5d81643937fc758c05302f05b",
+                "sha256:9577888ecc0ad7d06c3746afaba339c94d62b59da16f7a5d1cff9e491f23dace",
-                "sha256:522184556921512ec484cb93bd84e0bab915d0ac5a372d49571c241a7f73db62",
+                "sha256:987e1c94a33c93d9b209315bfda9faa54b8edfce6438a1e93ae866ba20de5956",
-                "sha256:5914cff11f3e920626da48e564be6818831713a3087586302444b9c70e8552d9",
+                "sha256:99a3bbdbb844f4fb5d6dd59fac836a40749781c1fa63c563bc216c27aef63f60",
-                "sha256:6661a7908d68c4a133e03dac8178287aa20a99f841ea90beeb98a233ae3fd710",
+                "sha256:99db8dc3097ceafbcff9cb2bff384b974795edeb11d167d391a02c7bfeeb6e16",
-                "sha256:79258a8df3e309a54c7ef2ef4a59bb8e28f7e4a8992a3ad17c24b1889ced44f3",
+                "sha256:a5a96cf49eb580756a44ecf12949e52f211e20bffbf5a95760ac14b1e499cd37",
-                "sha256:7d74c20b8f1c3e99d3f781d3b8ff5abfefdd7363d61e23bdeba9992ff32cc4b4",
+                "sha256:aa6ca3eb56704cdc0d876fc6047ffd5ee960caad52452fbee0f99908a141a0ae",
-                "sha256:81918afeafc16ba5d9d0d4e9445905f21aac969a4ebb6f2bff4b9886da100f4b",
+                "sha256:aade5e66795c94e4a2b2624affeea8979648d1b0ae3fcee17e74e2c647fc4a8a",
-                "sha256:8194d913ca1f459377c8a4ed8f9b7ad750068b8e0e3f3f9c6963fcc87a84515f",
+                "sha256:b78905860336c1d292409e3df6ad39cc1f1c7f0964e66844bbc2ebfca434d073",
-                "sha256:84d5d31200b11b3c76fab853b89ac898bf2d05c8b3da07c1fcc23feb06359d6e",
+                "sha256:b92f521cdc4e4a3041cc343625b699f20b0b5f976793fb45681aac1efda565f8",
-                "sha256:989981db57abffb52026b114c9a1f114c7142860a6d30a352d28f8cbf186500b",
+                "sha256:bfde84bbd6ae5f782206d454b67b7ee8f7f818c29b99fd02bf022fd33bab14cb",
-                "sha256:a3d7511d3fad1618a82299aab71a5fceee5c015653a77ffea75ced9ef917e71a",
+                "sha256:c2b62d3df80e694c0e4a0ed47754c9480521e25642251b3ab1dff050a4e60409",
-                "sha256:b3ef168d4d6fd4fa6685aef7c91400f59f7ab1c0da734541f7031699741fb23f",
+                "sha256:c5e2be6c263b64f6f7656e23e18a4a9980cffc671442795682e8c4e4f815dd9f",
-                "sha256:c1c5792b6e74bbf2af0f8e892272c2a6c48efa895903211f11b8342e03129fea",
+                "sha256:c99aa3c63104e0818ec566f8ff3942fb7c7a8f35f9912cb63fd8e12318b214b2",
-                "sha256:c5dcb5a56aebb8a8f2585042b2f5c496d7624f0bcfe248f0cc33ceb2fd8d39e7",
+                "sha256:dae06620d3978da346375ebf88b9e2dd7d151335ba668c995aea9ed07af7add4",
-                "sha256:e2bed4a04e2ca1050bb5f00865cf2f83c0b92fd62454d9244f690fcd842e27a4",
+                "sha256:db5499d0710823fa4fb88206050d46544e8f0e0136a9a5f5570b026584c8fd74",
-                "sha256:e87a527c06319428007e8c30511e1f0ce035cb7f14bb4793b003ed532c3b9333",
+                "sha256:f36baafd82119c4a114b9518202f2a983819101dcc14b26e43fc12cbefdce00e",
-                "sha256:f63e420180cbe22ff6e32558b612e75f50616fc111c5e095a4631946c782e109",
+                "sha256:f52b79c8796d81391ab295b04e520bda6feed54d54931708872e8f9ae9db0ea1",
-                "sha256:f8b3d413c5a8f84b12cd4c5df1d8e211777c9852c6be3ee9c094b626644d3eab"
+                "sha256:ff8cff01582fa1a7e533cb97f628531c4014af4b5f38e33cdcfe5eec29b6d888"
            ],
            "index": "pypi",
-            "version": "==5.2.0"
+            "version": "==5.3.0"
        },
        "pluggy": {
            "hashes": [
-                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
+                "sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
-                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
+                "sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
            ],
-            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
+            "version": "==0.8.0"
-            "version": "==0.7.1"
+        },
        "prompt-toolkit": {
            "hashes": [
                "sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
                "sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
                "sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
            ],
            "version": "==2.0.7"
        },
        "ptyprocess": {
            "hashes": [
                "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
                "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
            ],
            "version": "==0.6.0"
        },
        "py": {
            "hashes": [
-                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
+                "sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
-                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
+                "sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
            ],
-            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
+            "version": "==1.7.0"
            "version": "==1.6.0"
        },
        "pycodestyle": {
            "hashes": [
@@ -302,6 +428,13 @@
            "index": "pypi",
            "version": "==2.4.0"
        },
        "pygments": {
            "hashes": [
                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
            ],
            "version": "==2.2.0"
        },
        "pyocr": {
            "hashes": [
                "sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
@@ -309,13 +442,20 @@
            "index": "pypi",
            "version": "==0.5.3"
        },
        "pyparsing": {
            "hashes": [
                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
            ],
            "version": "==2.3.0"
        },
        "pytest": {
            "hashes": [
-                "sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823",
+                "sha256:a9e5e8d7ab9d5b0747f37740276eb362e6a76275d76cebbb52c6049d93b475db",
-                "sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d"
+                "sha256:bf47e8ed20d03764f963f0070ff1c8fda6e2671fc5dd562a4d3b7148ad60f5ca"
            ],
            "index": "pypi",
-            "version": "==3.8.0"
+            "version": "==3.9.3"
        },
        "pytest-cov": {
            "hashes": [
@@ -327,11 +467,11 @@
        },
        "pytest-django": {
            "hashes": [
-                "sha256:2d2e0a618d91c280d463e90bcbea9b4e417609157f611a79685b1c561c4c0836",
+                "sha256:49e9ffc856bc6a1bec1c26c5c7b7213dff7cc8bc6b64d624c4d143d04aff0bcf",
-                "sha256:59683def396923b78d7e191a7086a48193f8d5db869ace79acb38f906522bc7b"
+                "sha256:b379282feaf89069cb790775ab6bbbd2bd2038a68c7ef9b84a41898e0b551081"
            ],
            "index": "pypi",
-            "version": "==3.4.2"
+            "version": "==3.4.3"
        },
        "pytest-env": {
            "hashes": [
@@ -345,7 +485,6 @@
                "sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
                "sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.2"
        },
        "pytest-sugar": {
@@ -357,19 +496,19 @@
        },
        "pytest-xdist": {
            "hashes": [
-                "sha256:0875deac20f6d96597036bdf63970887a6f36d28289c2f6682faf652dfea687b",
+                "sha256:3bc9dcb6ff47e607d3c710727cd9996fd7ac1466d405c3b40bb495da99b6b669",
-                "sha256:28e25e79698b2662b648319d3971c0f9ae0e6500f88258ccb9b153c31110ba9b"
+                "sha256:8e188d13ce6614c7a678179a76f46231199ffdfe6163de031c17e62ffa256917"
            ],
            "index": "pypi",
-            "version": "==1.23.0"
+            "version": "==1.24.0"
        },
        "python-dateutil": {
            "hashes": [
-                "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0",
+                "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
-                "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8"
+                "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
            ],
            "index": "pypi",
-            "version": "==2.7.3"
+            "version": "==2.7.5"
        },
        "python-dotenv": {
            "hashes": [
@@ -391,273 +530,37 @@
            "hashes": [
                "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
            ],
            "markers": "extra == 'speedup'",
            "version": "==0.12.0"
        },
        "pytz": {
            "hashes": [
-                "sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
+                "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
-                "sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
+                "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
            ],
            "index": "pypi",
-            "version": "==2018.5"
+            "version": "==2018.7"
        },
        "regex": {
            "hashes": [
-                "sha256:22d7ef8c2df344328a8a3c61edade2ee714e5de9360911d22a9213931c769faa",
+                "sha256:0ef96690c3d2294155b7d44187ca4a151e45c931cb768e106ba464a9fa64c5da",
-                "sha256:3a699780c6b712c67dc23207b129ccc6a7e1270233f7aadead3ea3f83c893702",
+                "sha256:251683e01a3bcacd9188acf0d4caf7b29a3b963c843159311825613ae144cddb",
-                "sha256:42f460d349baebd5faec02a0c920988fb0300b24baf898d9c139886565b66b6c",
+                "sha256:3fe15a75fe00f04d1ec16713d55cf1e206077c450267a10b33318756fb8b3f99",
-                "sha256:43bf3d79940cbdf19adda838d8b26b28b47bec793cda46590b5b25703742f440",
+                "sha256:53a962f9dc28cdf403978a142cb1e054479759ad64d312a999f9f042c25b5c9a",
-                "sha256:47d6c7f0588ef33464e00023067c4e7cce68e0d6a686a73c7ee15abfdad503d4",
+                "sha256:8bd1da6a93d32336a5e5432886dd8543004f0591c39b83dbfa60705cccdf414d",
-                "sha256:5b879f59f25ed9b91bc8693a9a994014b431f224f492519ad0255ce6b54b83e5",
+                "sha256:b5423061918f602e9342b54d746ac31c598d328ecaf4ef0618763e960c926fd4",
-                "sha256:8ba0093c412900f636b0f826c597a0c3ea0e395344bc99894ddefe88b76c9c7e",
+                "sha256:d80ebc65b1f7d0403117f59309c16eac24be6a0bc730b593a79f703462858d94",
-                "sha256:a4789254a1a0bd7a637036cce0b7ed72d8cc864e93f2e9cfd10ac00ae27bb7b0",
+                "sha256:fd8419979639b7de7fb964a13bce3ac47e6fe33043b83de0398c3067986e5659",
-                "sha256:b73cea07117dca888b0c3671770b501bef19aac9c45c8ffdb5bea2cca2377b0a",
+                "sha256:ff2f15b2b0b4b58ba8a1de651780a0d3fd54f96ad6b77dceb77695220e5d7b7a"
                "sha256:d3eb59fa3e5b5438438ec97acd9dc86f077428e020b015b43987e35bea68ef4c",
                "sha256:d51d232b4e2f106deaf286001f563947fee255bc5bd209a696f027e15cf0a1e7",
                "sha256:d59b03131a8e35061b47a8f186324a95eaf30d5f6ee9cc0637e7b87d29c7c9b5",
                "sha256:dd705df1b47470388fc4630e4df3cbbe7677e2ab80092a1c660cae630a307b2d",
                "sha256:e87fffa437a4b00afb17af785da9b01618425d6cd984c677639deb937037d8f2",
                "sha256:ed40e0474ab5ab228a8d133759d451b31d3ccdebaff698646e54aff82c3de4f8"
            ],
-            "version": "==2018.8.29"
+            "version": "==2018.11.2"
        },
        "requests": {
            "hashes": [
-                "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
+                "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
-                "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
+                "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
            ],
-            "version": "==2.19.1"
+            "version": "==2.20.0"
        },
        "six": {
            "hashes": [
                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
            ],
            "version": "==1.11.0"
        },
        "termcolor": {
            "hashes": [
                "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
            ],
            "version": "==1.1.0"
        },
        "text-unidecode": {
            "hashes": [
                "sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
                "sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
            ],
            "version": "==1.2"
        },
        "tzlocal": {
            "hashes": [
                "sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
            ],
            "version": "==1.5.1"
        },
        "urllib3": {
            "hashes": [
                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
            ],
            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
            "version": "==1.23"
        }
    },
    "develop": {
        "alabaster": {
            "hashes": [
                "sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
                "sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
            ],
            "version": "==0.7.11"
        },
        "babel": {
            "hashes": [
                "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
                "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
            ],
            "version": "==2.6.0"
        },
        "backcall": {
            "hashes": [
                "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
                "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
            ],
            "version": "==0.1.0"
        },
        "certifi": {
            "hashes": [
                "sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
                "sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
            ],
            "version": "==2018.8.24"
        },
        "chardet": {
            "hashes": [
                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
            ],
            "version": "==3.0.4"
        },
        "decorator": {
            "hashes": [
                "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
                "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
            ],
            "version": "==4.3.0"
        },
        "docutils": {
            "hashes": [
                "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
                "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
                "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
            ],
            "version": "==0.14"
        },
        "idna": {
            "hashes": [
                "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
                "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
            ],
            "version": "==2.7"
        },
        "imagesize": {
            "hashes": [
                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.1.0"
        },
        "ipython": {
            "hashes": [
                "sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62",
                "sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4"
            ],
            "index": "pypi",
            "version": "==6.5.0"
        },
        "ipython-genutils": {
            "hashes": [
                "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
                "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
            ],
            "version": "==0.2.0"
        },
        "jedi": {
            "hashes": [
                "sha256:b409ed0f6913a701ed474a614a3bb46e6953639033e31f769ca7581da5bd1ec1",
                "sha256:c254b135fb39ad76e78d4d8f92765ebc9bf92cbc76f49e97ade1d5f5121e1f6f"
            ],
            "version": "==0.12.1"
        },
        "jinja2": {
            "hashes": [
                "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
                "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
            ],
            "version": "==2.10"
        },
        "markupsafe": {
            "hashes": [
                "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
            ],
            "version": "==1.0"
        },
        "packaging": {
            "hashes": [
                "sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
                "sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
            ],
            "version": "==17.1"
        },
        "parso": {
            "hashes": [
                "sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
                "sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
            ],
            "version": "==0.3.1"
        },
        "pexpect": {
            "hashes": [
                "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
                "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
            ],
            "markers": "sys_platform != 'win32'",
            "version": "==4.6.0"
        },
        "pickleshare": {
            "hashes": [
                "sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b",
                "sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5"
            ],
            "version": "==0.7.4"
        },
        "pluggy": {
            "hashes": [
                "sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
                "sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==0.7.1"
        },
        "prompt-toolkit": {
            "hashes": [
                "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
                "sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4",
                "sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917"
            ],
            "version": "==1.0.15"
        },
        "ptyprocess": {
            "hashes": [
                "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
                "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
            ],
            "version": "==0.6.0"
        },
        "py": {
            "hashes": [
                "sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
                "sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.6.0"
        },
        "pygments": {
            "hashes": [
                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
            ],
            "version": "==2.2.0"
        },
        "pyparsing": {
            "hashes": [
                "sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04",
                "sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010"
            ],
            "version": "==2.2.0"
        },
        "pytz": {
            "hashes": [
                "sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
                "sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
            ],
            "index": "pypi",
            "version": "==2018.5"
        },
        "requests": {
            "hashes": [
                "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
                "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
            ],
            "version": "==2.19.1"
        },
        "simplegeneric": {
            "hashes": [
                "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
            ],
            "version": "==0.8.1"
        },
        "six": {
            "hashes": [
@@ -675,27 +578,46 @@
        },
        "sphinx": {
            "hashes": [
-                "sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4",
+                "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
-                "sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86"
+                "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
            ],
            "index": "pypi",
-            "version": "==1.7.9"
+            "version": "==1.8.1"
        },
        "sphinxcontrib-websupport": {
            "hashes": [
                "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
                "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
            ],
            "markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
            "version": "==1.1.0"
        },
        "termcolor": {
            "hashes": [
                "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
            ],
            "version": "==1.1.0"
        },
        "text-unidecode": {
            "hashes": [
                "sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
                "sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
            ],
            "version": "==1.2"
        },
        "toml": {
            "hashes": [
                "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
                "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
            ],
            "version": "==0.10.0"
        },
        "tox": {
            "hashes": [
-                "sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7",
+                "sha256:513e32fdf2f9e2d583c2f248f47ba9886428c949f068ac54a0469cac55df5862",
-                "sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600"
+                "sha256:75fa30e8329b41b664585f5fb837e23ce1d7e6fa1f7811f2be571c990f9d911b"
            ],
            "index": "pypi",
-            "version": "==3.2.1"
+            "version": "==3.5.3"
        },
        "traitlets": {
            "hashes": [
@@ -704,21 +626,25 @@
            ],
            "version": "==4.3.2"
        },
        "tzlocal": {
            "hashes": [
                "sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
            ],
            "version": "==1.5.1"
        },
        "urllib3": {
            "hashes": [
-                "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
+                "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
-                "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
+                "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
            ],
-            "markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
+            "version": "==1.24.1"
            "version": "==1.23"
        },
        "virtualenv": {
            "hashes": [
-                "sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669",
+                "sha256:686176c23a538ecc56d27ed9d5217abd34644823d6391cbeb232f42bf722baad",
-                "sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752"
+                "sha256:f899fafcd92e1150f40c8215328be38ff24b519cd95357fa6e78e006c7638208"
            ],
-            "markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'",
+            "version": "==16.1.0"
            "version": "==16.0.0"
        },
        "wcwidth": {
            "hashes": [
@@ -727,5 +653,6 @@
            ],
            "version": "==0.1.7"
        }
-    }
+    },
    "develop": {}
 }
--- a/20
+++ b/20
@@ -1,20 +0,0 @@
 # -*- mode: ruby -*-
 # vi: set ft=ruby :
 VAGRANT_API_VERSION = "2"
 Vagrant.configure(VAGRANT_API_VERSION) do |config|
  config.vm.box = "ubuntu/trusty64"
  # Provision using shell
  config.vm.host_name = "dev.paperless"
  config.vm.synced_folder ".", "/opt/paperless"
  config.vm.provision "shell", path: "scripts/vagrant-provision"
  # Networking details
  config.vm.network "private_network", ip: "172.28.128.4"
  config.vm.provider "virtualbox" do |vb|
    # Customize the amount of memory on the VM:
    vb.memory = "1024"
  end
 end
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,27 @@
 Changelog
 #########
 2.6.0
 =====
 * Allow an infinite number of logs to be deleted.  Thanks to `Ulli`_ for noting
  the problem in `#433`_.
 * Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
  in 2.4 to play nice with the defaults.  Thanks to `tsia`_ and `Sblop`_ who
  pointed this out. `#423`_.
 * Updated dependencies to include (among other things) a security patch to
  requests.
 * Fix text in sample data for tests so that the language guesser stops thinking
  that everything is in Catalan because we had *Lorem ipsum* in there.
 * Tweaked the gunicorn sample command to use filesystem paths instead of Python
  paths. `#441`_
 * Added pretty colour boxes next to the hex values in the Tags section, thanks
  to a pull request from `Joshua Taillon`_ `#442`_.
 * Added a ``.editorconfig`` file to better specify coding style.
 * `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
  into how it parses file names on import. `#440`_
 2.5.0
 =====
@@ -44,6 +65,7 @@ Changelog
 * The ``get_date()`` functionality of the parsers has been consolidated onto
  the ``DocumentParser`` class since much of that code was redundant anyway.
 2.4.0
 =====
@@ -55,13 +77,13 @@ Changelog
  It's now in the import step that we decide the storage type.  This allows you
  to export from an encrypted system and import into an unencrypted one, or
  vice-versa.
-* The migration history has been slightly modified to accomodate PostgreSQL
+* The migration history has been slightly modified to accommodate PostgreSQL
  users.  Additionally, you can now tell paperless to use PostgreSQL simply by
  declaring ``PAPERLESS_DBUSER`` in your environment.  This will attempt to
  connect to your Postgres database without a password unless you also set
  ``PAPERLESS_DBPASS``.
 * A bug was found in the REST API filter system that was the result of an
-  update of django-filter some time ago.  This has now been patched `#412`_.
+  update of django-filter some time ago.  This has now been patched in `#412`_.
  Thanks to `thepill`_ for spotting it!
@@ -570,6 +592,9 @@ bulk of the work on this big change.
 .. _thepill: https://github.com/thepill
 .. _Andrew Peng: https://github.com/pengc99
 .. _euri10: https://github.com/euri10
 .. _Ulli: https://github.com/Ulli2k
 .. _tsia: https://github.com/tsia
 .. _Sblop:  https://github.com/Sblop
 .. _#20: https://github.com/danielquinn/paperless/issues/20
 .. _#44: https://github.com/danielquinn/paperless/issues/44
@@ -664,6 +689,11 @@ bulk of the work on this big change.
 .. _#412: https://github.com/danielquinn/paperless/issues/412
 .. _#413: https://github.com/danielquinn/paperless/pull/413
 .. _#414: https://github.com/danielquinn/paperless/issues/414
 .. _#423: https://github.com/danielquinn/paperless/issues/423
 .. _#433: https://github.com/danielquinn/paperless/issues/433
 .. _#440: https://github.com/danielquinn/paperless/pull/440
 .. _#441: https://github.com/danielquinn/paperless/pull/441
 .. _#442: https://github.com/danielquinn/paperless/pull/442
 .. _pipenv: https://docs.pipenv.org/
 .. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
--- a/docs/changelog_jonaswinkler.rst
+++ b/docs/changelog_jonaswinkler.rst
@@ -0,0 +1,15 @@
 Changelog (jonaswinkler)
 ########################
 1.0.0
 =====
 * First release based on paperless 2.6.0
 * Added: Automatic document classification using neural networks (replaces
  regex-based tagging)
 * Added: Document types
 * Added: Archive serial number allows easy referencing of physical document
  copies
 * Added: Inbox tags (added automatically to newly consumed documents)
 * Added: Document viewer on document edit page
 * Database backend is now configurable
--- a/docs/guesswork.rst
+++ b/docs/guesswork.rst
@@ -43,6 +43,16 @@ These however wouldn't work:
 * ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
 * ``Another Company- Letter of Reference.jpg``
 Do I have to be so strict about naming?
 ---------------------------------------
 Rather than using the strict document naming rules, one can also set the option
 ``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
 that is accepted by dateparser_. Doing so will cause ``paperless`` to default
 to any date format that is found in the title, instead of a date pulled from
 the document's text, without requiring the strict formatting of the document
 filename as described above.
 .. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
 .. _guesswork-content:
@@ -82,11 +92,11 @@ text and matching algorithm.  From the help info there:
    uses a regex to match the PDF.  If you don't know what a regex is, you
    probably don't want this option.
-When using the "any" or "all" matching algorithms, you can search for terms that
+When using the "any" or "all" matching algorithms, you can search for terms
-consist of multiple words by enclosing them in double quotes. For example, defining
+that consist of multiple words by enclosing them in double quotes. For example,
-a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
+defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
-documents that contain either "Bank of America" or "BofA", but will not match
+will match documents that contain either "Bank of America" or "BofA", but will
-documents containing "Bank of South America".
+not match documents containing "Bank of South America".
 Then just save your tag/correspondent and run another document through the
 consumer.  Once complete, you should see the newly-created document,
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -46,3 +46,4 @@ Contents
   contributing
   scanners
   changelog
   changelog_jonaswinkler
--- a/docs/migrating.rst
+++ b/docs/migrating.rst
@@ -82,6 +82,7 @@ rolled in as part of the update:
    $ cd /path/to/project
    $ git pull
    $ pip install -r requirements.txt
    $ cd src
    $ ./manage.py migrate
--- a/docs/requirements.rst
+++ b/docs/requirements.rst
@@ -33,7 +33,7 @@ In addition to the above, there are a number of Python requirements, all of
 which are listed in a file called ``requirements.txt`` in the project root
 directory.
-If you're not working on a virtual environment (like Vagrant or Docker), you
+If you're not working on a virtual environment (like Docker), you
 should probably be using a virtualenv, but that's your call.  The reasons why
 you might choose a virtualenv or not aren't really within the scope of this
 document.  Needless to say if you don't know what a virtualenv is, you should
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -42,18 +42,14 @@ Installation & Configuration
 You can go multiple routes with setting up and running Paperless:
 * The `bare metal route`_
 * The `vagrant route`_
 * The `docker route`_
-The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
+The `docker route`_ is quick & easy.
 with memory consumption, cpu overhead etc. The `docker route`_ offers the same
 simplicity as Vagrant with lower resource consumption.
 The `bare metal route`_ is a bit more complicated to setup but makes it easier
 should you want to contribute some code back.
 .. _Vagrant route: setup-installation-vagrant_
 .. _docker route: setup-installation-docker_
 .. _bare metal route: setup-installation-bare-metal_
 .. _Docker Machine: https://docs.docker.com/machine/
@@ -267,54 +263,6 @@ Docker Method
   newer ``docker-compose.yml.example`` file
 .. _setup-installation-vagrant:
 Vagrant Method
 ++++++++++++++
 1. Install `Vagrant`_.  How you do that is really between you and your OS.
 2. Run ``vagrant up``.  An instance will start up for you.  When it's ready and
   provisioned...
 3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
   ``/etc/paperless.conf`` and set the values for:
    * ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
      dumped to be consumed by Paperless.
    * ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
      encrypt/decrypt the original document.  It's only required if you want
      your original files to be encrypted, otherwise, just leave it unset.
    * ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
      documents from mail or via the API.  If you don't use either, leaving it
      blank is just fine.
 4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again.  This
   updates the environment to make use of the changes you made to the config
   file.
 5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
 6. Still inside your vagrant box, create a user for your Paperless instance
   with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
   create your user.
 7. Start the webserver with
   ``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
   able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
   You can login with the user/pass you created in #6.
 8. In a separate window, run ``vagrant ssh`` again, but this time once inside
   your vagrant instance, you should start the consumer script with
   ``/opt/paperless/src/manage.py document_consumer``.
 9. Scan something.  Put it in the ``CONSUMPTION_DIR``.
 10. Wait a few minutes
 11. Visit the document list on your webserver, and it should be there, indexed
    and downloadable.
 .. caution::
    This installation is not secure. Once everything is working head up to
    `Making things more permanent`_
 .. _Vagrant: https://vagrantup.com/
 .. _Paperless server: http://172.28.128.4:8000
 .. _setup-permanent:
 Making Things a Little more Permanent
@@ -398,7 +346,7 @@ instance listening on localhost port 8000.
        location /static {
            autoindex on;
-            alias <path-to-paperless-static-directory>
+            alias <path-to-paperless-static-directory>;
        }
@@ -409,7 +357,7 @@ instance listening on localhost port 8000.
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_pass http://127.0.0.1:8000
+            proxy_pass http://127.0.0.1:8000;
        }
    }
@@ -418,7 +366,7 @@ The gunicorn server can be started with the command:
 .. code-block:: shell
-    $ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
+    $ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
 .. _setup-permanent-standard-systemd:
@@ -475,7 +423,7 @@ after restarting your system:
    respawn limit 10 5
    script
-      exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
+      exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
    end script
   Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
@@ -513,13 +461,6 @@ second period.
 .. _Upstart: http://upstart.ubuntu.com/
 Vagrant
 ~~~~~~~
 You may use the Ubuntu explanation above. Replace
 ``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
 .. _setup-permanent-docker:
 Docker
--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
 `Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
 marching your document's languages.
-As an example, if you are running Paperless from the Vagrant setup provided
+As an example, if you are running Paperless from any Ubuntu or Debian
-(or from any Ubuntu or Debian box), and your documents are written in Spanish
+box, and your documents are written in Spanish you may need to run::
 you may need to run::
    apt-get install -y tesseract-ocr-spa
--- a/models/.keep
+++ b/models/.keep
--- a/overrides/README.md
+++ b/overrides/README.md
@@ -0,0 +1,11 @@
 # Customizing Paperless
 *See customization
 [documentation](https://paperless.readthedocs.io/en/latest/customising.html) 
 for more detail!*
 The example `.css` and `.js` snippets in this folder can be placed into
 one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or 
 `overrides.css`. Please feel free to submit pull requests to the main 
 repository with other examples of customizations that you think others may
 find useful.
--- a/paperless.conf.example
+++ b/paperless.conf.example
@@ -3,6 +3,16 @@
 # As this file contains passwords it should only be readable by the user
 # running paperless.
 ###############################################################################
 ####                        Database Settings                              ####
 ###############################################################################
 # By default, sqlite is used as the database backend. This can be changed here.
 #PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2"
 #PAPERLESS_DBNAME="paperless"
 #PAPERLESS_DBUSER="paperless"
 #PAPERLESS_DBPASS="paperless"
 ###############################################################################
 ####                         Paths & Folders                               ####
@@ -38,6 +48,13 @@ PAPERLESS_CONSUMPTION_DIR=""
 #PAPERLESS_STATIC_URL="/static/"
 # You can specify where the document classification model file should be
 # stored. Make sure that this file is writeable by the user executing the
 # management command "document_create_classifier" and that the path exists.
 # The default location is /models/model.pickle wwithin the install folder.
 #PAPERLESS_MODEL_FILE=/path/to/model/file
 # These values are required if you want paperless to check a particular email
 # box every 10 minutes and attempt to consume documents from there.  If you
 # don't define a HOST, mail checking will just be disabled.
@@ -127,6 +144,14 @@ PAPERLESS_DEBUG="false"
 # "true", the document will instead be opened in the browser, if possible.
 #PAPERLESS_INLINE_DOC="false"
 # By default, paperless will check the document text for document date information.
 # Uncomment the line below to enable checking the document filename for date
 # information. The date order can be set to any option as specified in
 # https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
 # checked first, and if nothing is found, the document text will be checked
 # as normal.
 #PAPERLESS_FILENAME_DATE_ORDER="YMD"
 #
 # The following values use sensible defaults for modern systems, but if you're
 # running Paperless on a low-resource device (like a Raspberry Pi), modifying
@@ -188,6 +213,12 @@ PAPERLESS_DEBUG="false"
 #PAPERLESS_CONSUMER_LOOP_TIME=10
 # By default Paperless stops consuming a document if no language can be
 # detected. Set to true to consume documents even if the language detection
 # fails.
 #PAPERLESS_FORGIVING_OCR="false"
 ###############################################################################
 ####                            Interface                                  ####
 ###############################################################################
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,51 +1,83 @@
 -i https://pypi.python.org/simple
-apipkg==1.5; python_version != '3.3.*'
+alabaster==0.7.12
-atomicwrites==1.2.1; python_version != '3.3.*'
+apipkg==1.5
 atomicwrites==1.2.1
 attrs==18.2.0
-certifi==2018.8.24
+babel==2.6.0
 backcall==0.1.0
 certifi==2018.10.15
 chardet==3.0.4
-coverage==4.5.1; python_version < '4'
+coverage==4.5.1
-coveralls==1.5.0
+coveralls==1.5.1
 dateparser==0.7.0
 decorator==4.3.0
 django-cors-headers==2.4.0
 django-crispy-forms==1.7.2
-django-extensions==2.1.2
+django-extensions==2.1.3
 django-filter==2.0.0
-django==2.0.8
+django==2.0.9
-djangorestframework==3.8.2
+djangorestframework==3.9.0
 docopt==0.6.2
-execnet==1.5.0; python_version != '3.3.*'
+docutils==0.14
 execnet==1.5.0
 factory-boy==2.11.1
-faker==0.9.0; python_version >= '2.7'
+faker==0.9.2
 filelock==3.0.10
 filemagic==1.6
-fuzzywuzzy==0.15.0
+fuzzywuzzy[speedup]==0.15.0
 gunicorn==19.9.0
 idna==2.7
 imagesize==1.1.0
 inotify-simple==1.1.8
 ipython-genutils==0.2.0
 ipython==7.1.1
 jedi==0.13.1
 jinja2==2.10
 langdetect==1.0.7
 markupsafe==1.0
 more-itertools==4.3.0
-pdftotext==2.1.0
+numpy==1.15.1
-pillow==5.2.0
+packaging==18.0
-pluggy==0.7.1; python_version != '3.3.*'
+parso==0.3.1
-py==1.6.0; python_version != '3.3.*'
+pdftotext==2.1.1
 pexpect==4.6.0 
 pickleshare==0.7.5
 pillow==5.3.0
 pluggy==0.8.0
 psycopg2==2.7.6.1
 prompt-toolkit==2.0.7
 ptyprocess==0.6.0
 py==1.7.0
 pycodestyle==2.4.0
 pygments==2.2.0
 pyocr==0.5.3
 pyparsing==2.3.0
 pytest-cov==2.6.0
-pytest-django==3.4.2
+pytest-django==3.4.3
 pytest-env==0.6.2
-pytest-forked==0.2; python_version != '3.3.*'
+pytest-forked==0.2
 pytest-sugar==0.9.1
-pytest-xdist==1.23.0
+pytest-xdist==1.24.0
-pytest==3.8.0
+pytest==3.9.3
-python-dateutil==2.7.3
+python-dateutil==2.7.5
 python-dotenv==0.9.1
 python-gnupg==0.4.3
-python-levenshtein==0.12.0
+python-levenshtein==0.12.0 ; extra == 'speedup'
-pytz==2018.5
+pytz==2018.7
-regex==2018.8.29
+regex==2018.11.2
-requests==2.19.1
+requests==2.20.0
 six==1.11.0
 scikit-learn==0.19.2
 scipy==1.1.0
 snowballstemmer==1.2.1
 sphinx==1.8.1
 sphinxcontrib-websupport==1.1.0
 termcolor==1.1.0
 text-unidecode==1.2
 toml==0.10.0
 tox==3.5.3
 traitlets==4.3.2
 tzlocal==1.5.1
-urllib3==1.23; python_version != '3.3.*'
+urllib3==1.24.1
 virtualenv==16.1.0
 wcwidth==0.1.7
--- a/scripts/paperless-webserver.service
+++ b/scripts/paperless-webserver.service
@@ -4,7 +4,7 @@ Description=Paperless webserver
 [Service]
 User=paperless
 Group=paperless
-ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
+ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
 [Install]
 WantedBy=multi-user.target
--- a/scripts/vagrant-provision
+++ b/scripts/vagrant-provision
@@ -1,31 +0,0 @@
 #!/bin/bash
 # Install packages
 apt-get update
 apt-get build-dep -y python-imaging
 apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
 apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
 apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
 # Python dependencies
 pip3 install -r /opt/paperless/requirements.txt
 # Create the environment file
 cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
 chmod 0640 /etc/paperless.conf
 chown root:vagrant /etc/paperless.conf
 # Create the consumption directory
 mkdir /home/vagrant/consumption
 chown vagrant:vagrant /home/vagrant/consumption
 echo "
 Now follow the remaining steps in the Vagrant section of the setup
 documentation to complete the process:
 http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
 "
--- a/src/documents/actions.py
+++ b/src/documents/actions.py
@@ -4,7 +4,8 @@ from django.contrib.admin.utils import model_ngettext
 from django.core.exceptions import PermissionDenied
 from django.template.response import TemplateResponse
-from documents.models import Correspondent, Tag
+from documents.classifier import DocumentClassifier
 from documents.models import Correspondent, DocumentType, Tag
 def select_action(
@@ -17,9 +18,9 @@ def select_action(
    if not modeladmin.has_change_permission(request):
        raise PermissionDenied
-    if request.POST.get('post'):
+    if request.POST.get("post"):
        n = queryset.count()
-        selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
+        selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
        if n:
            for document in queryset:
                if document_action:
@@ -137,6 +138,57 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
    )
 def set_document_type_on_selected(modeladmin, request, queryset):
    return select_action(
        modeladmin=modeladmin,
        request=request,
        queryset=queryset,
        title="Set document type on multiple documents",
        action="set_document_type_on_selected",
        modelclass=DocumentType,
        success_message="Successfully set document type %(selected_object)s "
                        "on %(count)d %(items)s.",
        queryset_action=lambda qs, document_type: qs.update(
            document_type=document_type)
    )
 def remove_document_type_from_selected(modeladmin, request, queryset):
    return simple_action(
        modeladmin=modeladmin,
        request=request,
        queryset=queryset,
        success_message="Successfully removed document type from %(count)d "
                        "%(items)s.",
        queryset_action=lambda qs: qs.update(document_type=None)
    )
 def run_document_classifier_on_selected(modeladmin, request, queryset):
    clf = DocumentClassifier()
    try:
        clf.reload()
        return simple_action(
            modeladmin=modeladmin,
            request=request,
            queryset=queryset,
            success_message="Successfully applied document classifier to "
                            "%(count)d %(items)s.",
            document_action=lambda doc: clf.classify_document(
                doc,
                classify_correspondent=True,
                classify_tags=True,
                classify_document_type=True)
        )
    except FileNotFoundError:
        modeladmin.message_user(
            request,
            "Classifier model file not found.",
            messages.ERROR
        )
        return None
 add_tag_to_selected.short_description = "Add tag to selected documents"
 remove_tag_from_selected.short_description = \
    "Remove tag from selected documents"
@@ -144,3 +196,9 @@ set_correspondent_on_selected.short_description = \
    "Set correspondent on selected documents"
 remove_correspondent_from_selected.short_description = \
    "Remove correspondent from selected documents"
 set_document_type_on_selected.short_description = \
    "Set document type on selected documents"
 remove_document_type_from_selected.short_description = \
    "Remove document type from selected documents"
 run_document_classifier_on_selected.short_description = \
    "Run document classifier on selected"
--- a/src/documents/admin.py
+++ b/src/documents/admin.py
@@ -16,10 +16,13 @@ from documents.actions import (
    add_tag_to_selected,
    remove_correspondent_from_selected,
    remove_tag_from_selected,
-    set_correspondent_on_selected
+    set_correspondent_on_selected,
    set_document_type_on_selected,
    remove_document_type_from_selected,
    run_document_classifier_on_selected
 )
-from .models import Correspondent, Document, Log, Tag
+from .models import Correspondent, Document, DocumentType, Log, Tag
 class FinancialYearFilter(admin.SimpleListFilter):
@@ -61,12 +64,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
            # To keep it simple we use the same string for both
            # query parameter and the display.
-            return (query, query)
+            return query, query
        else:
            query = "{0}-{0}".format(date.year)
            display = "{}".format(date.year)
-            return (query, display)
+            return query, display
    def lookups(self, request, model_admin):
        if not settings.FY_START or not settings.FY_END:
@@ -88,25 +91,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
 class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
-
+    """
-    def __init__(self, *args, **kwargs):
+    If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
-        super().__init__(*args, **kwargs)
+    correspondents to documents sent our way over the past ``n`` years.
-        self.title = "correspondent (recent)"
+    """
    def field_choices(self, field, request, model_admin):
        years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
-        days = 365 * years
+        correspondents = Correspondent.objects.all()
        lookups = []
        if years and years > 0:
-            correspondents = Correspondent.objects.filter(
+            self.title = "Correspondent (Recent)"
            days = 365 * years
            correspondents = correspondents.filter(
                documents__created__gte=datetime.now() - timedelta(days=days)
            ).distinct()
            for c in correspondents:
                lookups.append((c.id, c.name))
-        return lookups
+        return [(c.id, c.name) for c in correspondents]
 class CommonAdmin(admin.ModelAdmin):
@@ -117,13 +119,11 @@ class CorrespondentAdmin(CommonAdmin):
    list_display = (
        "name",
-        "match",
+        "automatic_classification",
        "matching_algorithm",
        "document_count",
        "last_correspondence"
    )
-    list_filter = ("matching_algorithm",)
+    list_editable = ("automatic_classification",)
    list_editable = ("match", "matching_algorithm")
    readonly_fields = ("slug",)
@@ -146,15 +146,38 @@ class CorrespondentAdmin(CommonAdmin):
 class TagAdmin(CommonAdmin):
-    list_display = ("name", "colour", "match", "matching_algorithm",
+    list_display = (
-                    "document_count")
+        "name",
-    list_filter = ("colour", "matching_algorithm")
+        "colour",
-    list_editable = ("colour", "match", "matching_algorithm")
+        "automatic_classification",
        "document_count")
    list_filter = ("colour",)
    list_editable = ("colour", "automatic_classification")
    readonly_fields = ("slug",)
    class Media:
        js = ("js/colours.js",)
    def get_queryset(self, request):
        qs = super(TagAdmin, self).get_queryset(request)
        qs = qs.annotate(document_count=models.Count("documents"))
        return qs
    def document_count(self, obj):
        return obj.document_count
    document_count.admin_order_field = "document_count"
 class DocumentTypeAdmin(CommonAdmin):
    list_display = ("name", "automatic_classification", "document_count")
    list_editable = ("automatic_classification",)
    readonly_fields = ("slug",)
    def get_queryset(self, request):
-        qs = super(TagAdmin, self).get_queryset(request)
+        qs = super(DocumentTypeAdmin, self).get_queryset(request)
        qs = qs.annotate(document_count=models.Count("documents"))
        return qs
@@ -173,11 +196,11 @@ class DocumentAdmin(CommonAdmin):
    search_fields = ("correspondent__name", "title", "content", "tags__name")
    readonly_fields = ("added", "file_type", "storage_type",)
    list_display = ("title", "created", "added", "thumbnail", "correspondent",
-                    "tags_")
+                    "tags_", "archive_serial_number", "document_type")
    list_filter = (
        "document_type",
        "tags",
        ("correspondent", RecentCorrespondentFilter),
        "correspondent",
        FinancialYearFilter
    )
@@ -189,7 +212,10 @@ class DocumentAdmin(CommonAdmin):
        add_tag_to_selected,
        remove_tag_from_selected,
        set_correspondent_on_selected,
-        remove_correspondent_from_selected
+        remove_correspondent_from_selected,
        set_document_type_on_selected,
        remove_document_type_from_selected,
        run_document_classifier_on_selected
    ]
    date_hierarchy = "created"
@@ -222,6 +248,9 @@ class DocumentAdmin(CommonAdmin):
                    extra_context=None):
        extra_context = extra_context or {}
        doc = Document.objects.get(id=object_id)
        extra_context["download_url"] = doc.download_url
        extra_context["file_type"] = doc.file_type
        if self.document_queue and object_id:
            if int(object_id) in self.document_queue:
@@ -345,6 +374,7 @@ class LogAdmin(CommonAdmin):
 admin.site.register(Correspondent, CorrespondentAdmin)
 admin.site.register(Tag, TagAdmin)
 admin.site.register(DocumentType, DocumentTypeAdmin)
 admin.site.register(Document, DocumentAdmin)
 admin.site.register(Log, LogAdmin)
--- a/src/documents/apps.py
+++ b/src/documents/apps.py
@@ -11,8 +11,8 @@ class DocumentsConfig(AppConfig):
        from .signals import document_consumption_started
        from .signals import document_consumption_finished
        from .signals.handlers import (
-            set_correspondent,
+            classify_document,
-            set_tags,
+            add_inbox_tags,
            run_pre_consume_script,
            run_post_consume_script,
            cleanup_document_deletion,
@@ -21,8 +21,8 @@ class DocumentsConfig(AppConfig):
        document_consumption_started.connect(run_pre_consume_script)
-        document_consumption_finished.connect(set_tags)
+        document_consumption_finished.connect(classify_document)
-        document_consumption_finished.connect(set_correspondent)
+        document_consumption_finished.connect(add_inbox_tags)
        document_consumption_finished.connect(set_log_entry)
        document_consumption_finished.connect(run_post_consume_script)
--- a/src/documents/classifier.py
+++ b/src/documents/classifier.py
@@ -0,0 +1,240 @@
 import logging
 import os
 import pickle
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.neural_network import MLPClassifier
 from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
 from documents.models import Correspondent, DocumentType, Tag, Document
 from paperless import settings
 def preprocess_content(content):
    content = content.lower()
    content = content.strip()
    content = content.replace("\n", " ")
    content = content.replace("\r", " ")
    while content.find("  ") > -1:
        content = content.replace("  ", " ")
    return content
 class DocumentClassifier(object):
    def __init__(self):
        self.classifier_version = 0
        self.data_vectorizer = None
        self.tags_binarizer = None
        self.correspondent_binarizer = None
        self.document_type_binarizer = None
        self.tags_classifier = None
        self.correspondent_classifier = None
        self.document_type_classifier = None
    def reload(self):
        if os.path.getmtime(settings.MODEL_FILE) > self.classifier_version:
            logging.getLogger(__name__).info("Reloading classifier models")
            with open(settings.MODEL_FILE, "rb") as f:
                self.data_vectorizer = pickle.load(f)
                self.tags_binarizer = pickle.load(f)
                self.correspondent_binarizer = pickle.load(f)
                self.document_type_binarizer = pickle.load(f)
                self.tags_classifier = pickle.load(f)
                self.correspondent_classifier = pickle.load(f)
                self.document_type_classifier = pickle.load(f)
            self.classifier_version = os.path.getmtime(settings.MODEL_FILE)
    def save_classifier(self):
        with open(settings.MODEL_FILE, "wb") as f:
            pickle.dump(self.data_vectorizer, f)
            pickle.dump(self.tags_binarizer, f)
            pickle.dump(self.correspondent_binarizer, f)
            pickle.dump(self.document_type_binarizer, f)
            pickle.dump(self.tags_classifier, f)
            pickle.dump(self.correspondent_classifier, f)
            pickle.dump(self.document_type_classifier, f)
    def train(self):
        data = list()
        labels_tags = list()
        labels_correspondent = list()
        labels_document_type = list()
        # Step 1: Extract and preprocess training data from the database.
        logging.getLogger(__name__).info("Gathering data from database...")
        for doc in Document.objects.exclude(tags__is_inbox_tag=True):
            data.append(preprocess_content(doc.content))
            y = -1
            if doc.document_type:
                if doc.document_type.automatic_classification:
                    y = doc.document_type.id
            labels_document_type.append(y)
            y = -1
            if doc.correspondent:
                if doc.correspondent.automatic_classification:
                    y = doc.correspondent.id
            labels_correspondent.append(y)
            tags = [tag.id for tag in doc.tags.filter(
                automatic_classification=True
            )]
            labels_tags.append(tags)
        labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
        logging.getLogger(__name__).info(
            "{} documents, {} tag(s), {} correspondent(s), "
            "{} document type(s).".format(
                len(data),
                len(labels_tags_unique),
                len(set(labels_correspondent)),
                len(set(labels_document_type))
            )
        )
        # Step 2: vectorize data
        logging.getLogger(__name__).info("Vectorizing data...")
        self.data_vectorizer = CountVectorizer(
            analyzer="char",
            ngram_range=(3, 5),
            min_df=0.1
        )
        data_vectorized = self.data_vectorizer.fit_transform(data)
        self.tags_binarizer = MultiLabelBinarizer()
        labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
        self.correspondent_binarizer = LabelBinarizer()
        labels_correspondent_vectorized = \
            self.correspondent_binarizer.fit_transform(labels_correspondent)
        self.document_type_binarizer = LabelBinarizer()
        labels_document_type_vectorized = \
            self.document_type_binarizer.fit_transform(labels_document_type)
        # Step 3: train the classifiers
        if len(self.tags_binarizer.classes_) > 0:
            logging.getLogger(__name__).info("Training tags classifier...")
            self.tags_classifier = MLPClassifier(verbose=True)
            self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
        else:
            self.tags_classifier = None
            logging.getLogger(__name__).info(
                "There are no tags. Not training tags classifier."
            )
        if len(self.correspondent_binarizer.classes_) > 0:
            logging.getLogger(__name__).info(
                "Training correspondent classifier..."
            )
            self.correspondent_classifier = MLPClassifier(verbose=True)
            self.correspondent_classifier.fit(
                data_vectorized,
                labels_correspondent_vectorized
            )
        else:
            self.correspondent_classifier = None
            logging.getLogger(__name__).info(
                "There are no correspondents. Not training correspondent "
                "classifier."
            )
        if len(self.document_type_binarizer.classes_) > 0:
            logging.getLogger(__name__).info(
                "Training document type classifier..."
            )
            self.document_type_classifier = MLPClassifier(verbose=True)
            self.document_type_classifier.fit(
                data_vectorized,
                labels_document_type_vectorized
            )
        else:
            self.document_type_classifier = None
            logging.getLogger(__name__).info(
                "There are no document types. Not training document type "
                "classifier."
            )
    def classify_document(
            self, document, classify_correspondent=False,
            classify_document_type=False, classify_tags=False,
            replace_tags=False):
        X = self.data_vectorizer.transform(
            [preprocess_content(document.content)]
        )
        if classify_correspondent and self.correspondent_classifier:
            self._classify_correspondent(X, document)
        if classify_document_type and self.document_type_classifier:
            self._classify_document_type(X, document)
        if classify_tags and self.tags_classifier:
            self._classify_tags(X, document, replace_tags)
        document.save(update_fields=("correspondent", "document_type"))
    def _classify_correspondent(self, X, document):
        y = self.correspondent_classifier.predict(X)
        correspondent_id = self.correspondent_binarizer.inverse_transform(y)[0]
        try:
            correspondent = None
            if correspondent_id != -1:
                correspondent = Correspondent.objects.get(id=correspondent_id)
                logging.getLogger(__name__).info(
                    "Detected correspondent: {}".format(correspondent.name)
                )
            else:
                logging.getLogger(__name__).info("Detected correspondent: -")
            document.correspondent = correspondent
        except Correspondent.DoesNotExist:
            logging.getLogger(__name__).warning(
                "Detected correspondent with id {} does not exist "
                "anymore! Did you delete it?".format(correspondent_id)
            )
    def _classify_document_type(self, X, document):
        y = self.document_type_classifier.predict(X)
        document_type_id = self.document_type_binarizer.inverse_transform(y)[0]
        try:
            document_type = None
            if document_type_id != -1:
                document_type = DocumentType.objects.get(id=document_type_id)
                logging.getLogger(__name__).info(
                    "Detected document type: {}".format(document_type.name)
                )
            else:
                logging.getLogger(__name__).info("Detected document type: -")
            document.document_type = document_type
        except DocumentType.DoesNotExist:
            logging.getLogger(__name__).warning(
                "Detected document type with id {} does not exist "
                "anymore! Did you delete it?".format(document_type_id)
            )
    def _classify_tags(self, X, document, replace_tags):
        y = self.tags_classifier.predict(X)
        tags_ids = self.tags_binarizer.inverse_transform(y)[0]
        if replace_tags:
            document.tags.clear()
        for tag_id in tags_ids:
            try:
                tag = Tag.objects.get(id=tag_id)
                logging.getLogger(__name__).info(
                    "Detected tag: {}".format(tag.name)
                )
                document.tags.add(tag)
            except Tag.DoesNotExist:
                logging.getLogger(__name__).warning(
                    "Detected tag with id {} does not exist anymore! Did "
                    "you delete it?".format(tag_id)
                )
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -225,7 +225,7 @@ class Consumer:
                storage_type=self.storage_type
            )
-        relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
+        relevant_tags = set(file_info.tags)
        if relevant_tags:
            tag_names = ", ".join([t.slug for t in relevant_tags])
            self.log("debug", "Tagging with {}".format(tag_names))
--- a/src/documents/filters.py
+++ b/src/documents/filters.py
@@ -1,6 +1,6 @@
 from django_filters.rest_framework import BooleanFilter, FilterSet
-from .models import Correspondent, Document, Tag
+from .models import Correspondent, Document, Tag, DocumentType
 CHAR_KWARGS = (
@@ -35,6 +35,19 @@ class TagFilterSet(FilterSet):
        }
 class DocumentTypeFilterSet(FilterSet):
    class Meta:
        model = DocumentType
        fields = {
            "name": [
                "startswith", "endswith", "contains",
                "istartswith", "iendswith", "icontains"
            ],
            "slug": ["istartswith", "iendswith", "icontains"]
        }
 class DocumentFilterSet(FilterSet):
    tags_empty = BooleanFilter(
@@ -57,4 +70,7 @@ class DocumentFilterSet(FilterSet):
            "tags__name": CHAR_KWARGS,
            "tags__slug": CHAR_KWARGS,
            "document_type__name": CHAR_KWARGS,
            "document_type__slug": CHAR_KWARGS,
        }
--- a/src/documents/management/commands/document_correspondents.py
+++ b/src/documents/management/commands/document_correspondents.py
@@ -1,82 +0,0 @@
 import sys
 from django.core.management.base import BaseCommand
 from documents.models import Correspondent, Document
 from ...mixins import Renderable
 class Command(Renderable, BaseCommand):
    help = """
        Using the current set of correspondent rules, apply said rules to all
        documents in the database, effectively allowing you to back-tag all
        previously indexed documents with correspondent created (or modified)
        after their initial import.
    """.replace("    ", "")
    TOO_MANY_CONTINUE = (
        "Detected {} potential correspondents for {}, so we've opted for {}")
    TOO_MANY_SKIP = (
        "Detected {} potential correspondents for {}, so we're skipping it")
    CHANGE_MESSAGE = (
        'Document {}: "{}" was given the correspondent id {}: "{}"')
    def __init__(self, *args, **kwargs):
        self.verbosity = 0
        BaseCommand.__init__(self, *args, **kwargs)
    def add_arguments(self, parser):
        parser.add_argument(
            "--use-first",
            default=False,
            action="store_true",
            help="By default this command won't try to assign a correspondent "
                 "if more than one matches the document.  Use this flag if "
                 "you'd rather it just pick the first one it finds."
        )
    def handle(self, *args, **options):
        self.verbosity = options["verbosity"]
        for document in Document.objects.filter(correspondent__isnull=True):
            potential_correspondents = list(
                Correspondent.match_all(document.content))
            if not potential_correspondents:
                continue
            potential_count = len(potential_correspondents)
            correspondent = potential_correspondents[0]
            if potential_count > 1:
                if not options["use_first"]:
                    print(
                        self.TOO_MANY_SKIP.format(potential_count, document),
                        file=sys.stderr
                    )
                    continue
                print(
                    self.TOO_MANY_CONTINUE.format(
                        potential_count,
                        document,
                        correspondent
                    ),
                    file=sys.stderr
                )
            document.correspondent = correspondent
            document.save(update_fields=("correspondent",))
            print(
                self.CHANGE_MESSAGE.format(
                    document.pk,
                    document.title,
                    correspondent.pk,
                    correspondent.name
                ),
                file=sys.stderr
            )
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -0,0 +1,25 @@
 import logging
 from django.core.management.base import BaseCommand
 from documents.classifier import DocumentClassifier
 from paperless import settings
 from ...mixins import Renderable
 class Command(Renderable, BaseCommand):
    help = """
        Trains the classifier on your data and saves the resulting models to a
        file. The document consumer will then automatically use this new model.
    """.replace("    ", "")
    def __init__(self, *args, **kwargs):
        BaseCommand.__init__(self, *args, **kwargs)
    def handle(self, *args, **options):
        clf = DocumentClassifier()
        clf.train()
        logging.getLogger(__name__).info(
            "Saving models to {}...".format(settings.MODEL_FILE)
        )
        clf.save_classifier()
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -6,7 +6,7 @@ import shutil
 from django.core.management.base import BaseCommand, CommandError
 from django.core import serializers
-from documents.models import Document, Correspondent, Tag
+from documents.models import Document, Correspondent, Tag, DocumentType
 from paperless.db import GnuPG
 from ...mixins import Renderable
@@ -96,6 +96,9 @@ class Command(Renderable, BaseCommand):
        manifest += json.loads(serializers.serialize(
            "json", Tag.objects.all()))
        manifest += json.loads(serializers.serialize(
            "json", DocumentType.objects.all()))
        with open(os.path.join(self.target, "manifest.json"), "w") as f:
            json.dump(manifest, f, indent=2)
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -1,5 +1,8 @@
 import logging
 from django.core.management.base import BaseCommand
 from documents.classifier import DocumentClassifier
 from documents.models import Document, Tag
 from ...mixins import Renderable
@@ -8,25 +11,66 @@ from ...mixins import Renderable
 class Command(Renderable, BaseCommand):
    help = """
-        Using the current set of tagging rules, apply said rules to all
+        Using the current classification model, assigns correspondents, tags
-        documents in the database, effectively allowing you to back-tag all
+        and document types to all documents, effectively allowing you to
-        previously indexed documents with tags created (or modified) after
+        back-tag all previously indexed documents with metadata created (or
-        their initial import.
+        modified) after their initial import.
    """.replace("    ", "")
    def __init__(self, *args, **kwargs):
        self.verbosity = 0
        BaseCommand.__init__(self, *args, **kwargs)
    def add_arguments(self, parser):
        parser.add_argument(
            "-c", "--correspondent",
            action="store_true"
        )
        parser.add_argument(
            "-T", "--tags",
            action="store_true"
        )
        parser.add_argument(
            "-t", "--type",
            action="store_true"
        )
        parser.add_argument(
            "-i", "--inbox-only",
            action="store_true"
        )
        parser.add_argument(
            "-r", "--replace-tags",
            action="store_true"
        )
    def handle(self, *args, **options):
        self.verbosity = options["verbosity"]
-        for document in Document.objects.all():
+        if options["inbox_only"]:
            queryset = Document.objects.filter(tags__is_inbox_tag=True)
        else:
            queryset = Document.objects.all()
        documents = queryset.distinct()
-            tags = Tag.objects.exclude(
+        logging.getLogger(__name__).info("Loading classifier")
-                pk__in=document.tags.values_list("pk", flat=True))
+        clf = DocumentClassifier()
        try:
            clf.reload()
        except FileNotFoundError:
            logging.getLogger(__name__).fatal("Cannot classify documents, "
                                              "classifier model file was not "
                                              "found.")
            return
-            for tag in Tag.match_all(document.content, tags):
+        for document in documents:
-                print('Tagging {} with "{}"'.format(document, tag))
+            logging.getLogger(__name__).info(
-                document.tags.add(tag)
+                "Processing document {}".format(document.title)
            )
            clf.classify_document(
                document,
                classify_document_type=options["type"],
                classify_tags=options["tags"],
                classify_correspondent=options["correspondent"],
                replace_tags=options["replace_tags"]
            )
--- a/src/documents/migrations/0022_auto_20181007_1420.py
+++ b/src/documents/migrations/0022_auto_20181007_1420.py
@@ -11,7 +11,7 @@ def re_slug_all_the_things(apps, schema_editor):
    """
    Tag = apps.get_model("documents", "Tag")
-    Correspondent = apps.get_model("documents", "Tag")
+    Correspondent = apps.get_model("documents", "Correspondent")
    for klass in (Tag, Correspondent):
        for instance in klass.objects.all():
--- a/src/documents/migrations/1001_workflow_improvements.py
+++ b/src/documents/migrations/1001_workflow_improvements.py
@@ -0,0 +1,23 @@
 # Generated by Django 2.0.7 on 2018-07-12 09:52
 from django.db import migrations, models
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '0022_auto_20181007_1420'),
    ]
    operations = [
        migrations.AddField(
            model_name='document',
            name='archive_serial_number',
            field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
        ),
        migrations.AddField(
            model_name='tag',
            name='is_inbox_tag',
            field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
        ),
    ]
--- a/src/documents/migrations/1002_auto_20180823_1155.py
+++ b/src/documents/migrations/1002_auto_20180823_1155.py
@@ -0,0 +1,33 @@
 # Generated by Django 2.0.7 on 2018-08-23 11:55
 from django.db import migrations, models
 import django.db.models.deletion
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '1001_workflow_improvements'),
    ]
    operations = [
        migrations.CreateModel(
            name='DocumentType',
            fields=[
                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('name', models.CharField(max_length=128, unique=True)),
                ('slug', models.SlugField(blank=True)),
                ('match', models.CharField(blank=True, max_length=256)),
                ('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF.  Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided.  A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF.  (If you don\'t know what a regex is, you probably don\'t want this option.)  Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
                ('is_insensitive', models.BooleanField(default=True)),
            ],
            options={
                'abstract': False,
            },
        ),
        migrations.AddField(
            model_name='document',
            name='document_type',
            field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
        ),
    ]
--- a/src/documents/migrations/1003_auto_20180904_1425.py
+++ b/src/documents/migrations/1003_auto_20180904_1425.py
@@ -0,0 +1,77 @@
 # Generated by Django 2.0.8 on 2018-09-04 14:25
 from django.db import migrations, models
 def transfer_automatic_classification(apps, schema_editor):
    for model_name in ["Tag", "Correspondent", "DocumentType"]:
        model_class = apps.get_model("documents", model_name)
        for o in model_class.objects.all():
            o.automatic_classification = o.match is not None and len(o.match) > 0
            o.save()
 def reverse_automatic_classification(apps, schema_editor):
    pass
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '1002_auto_20180823_1155'),
    ]
    operations = [
        migrations.AddField(
            model_name='correspondent',
            name='automatic_classification',
            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
        ),
        migrations.AddField(
            model_name='documenttype',
            name='automatic_classification',
            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
        ),
        migrations.AddField(
            model_name='tag',
            name='automatic_classification',
            field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
        ),
        migrations.RunPython(transfer_automatic_classification, reverse_automatic_classification),
        migrations.RemoveField(
            model_name='correspondent',
            name='is_insensitive',
        ),
        migrations.RemoveField(
            model_name='correspondent',
            name='match',
        ),
        migrations.RemoveField(
            model_name='correspondent',
            name='matching_algorithm',
        ),
        migrations.RemoveField(
            model_name='documenttype',
            name='is_insensitive',
        ),
        migrations.RemoveField(
            model_name='documenttype',
            name='match',
        ),
        migrations.RemoveField(
            model_name='documenttype',
            name='matching_algorithm',
        ),
        migrations.RemoveField(
            model_name='tag',
            name='is_insensitive',
        ),
        migrations.RemoveField(
            model_name='tag',
            name='match',
        ),
        migrations.RemoveField(
            model_name='tag',
            name='matching_algorithm',
        ),
    ]
--- a/src/documents/migrations/1004_documenttype_slug.py
+++ b/src/documents/migrations/1004_documenttype_slug.py
@@ -0,0 +1,36 @@
 # Generated by Django 2.0.8 on 2018-10-07 14:20
 from django.db import migrations, models
 from django.utils.text import slugify
 def re_slug_all_the_things(apps, schema_editor):
    """
    Rewrite all slug values to make sure they're actually slugs before we brand
    them as uneditable.
    """
    DocumentType = apps.get_model("documents", "DocumentType")
    for instance in DocumentType.objects.all():
        DocumentType.objects.filter(
            pk=instance.pk
        ).update(
            slug=slugify(instance.slug)
        )
 class Migration(migrations.Migration):
    dependencies = [
        ('documents', '1003_auto_20180904_1425'),
    ]
    operations = [
        migrations.AlterField(
            model_name='documenttype',
            name='slug',
            field=models.SlugField(blank=True, editable=False),
        ),
        migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
    ]
--- a/src/documents/mixins.py
+++ b/src/documents/mixins.py
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -24,43 +24,15 @@ except ImportError:
 class MatchingModel(models.Model):
    MATCH_ANY = 1
    MATCH_ALL = 2
    MATCH_LITERAL = 3
    MATCH_REGEX = 4
    MATCH_FUZZY = 5
    MATCHING_ALGORITHMS = (
        (MATCH_ANY, "Any"),
        (MATCH_ALL, "All"),
        (MATCH_LITERAL, "Literal"),
        (MATCH_REGEX, "Regular Expression"),
        (MATCH_FUZZY, "Fuzzy Match"),
    )
    name = models.CharField(max_length=128, unique=True)
    slug = models.SlugField(blank=True, editable=False)
-    match = models.CharField(max_length=256, blank=True)
+    automatic_classification = models.BooleanField(
-    matching_algorithm = models.PositiveIntegerField(
+        default=False,
-        choices=MATCHING_ALGORITHMS,
+        help_text="Automatically assign to newly added documents based on "
-        default=MATCH_ANY,
+                  "current usage in your document collection."
        help_text=(
            "Which algorithm you want to use when matching text to the OCR'd "
            "PDF.  Here, \"any\" looks for any occurrence of any word "
            "provided in the PDF, while \"all\" requires that every word "
            "provided appear in the PDF, albeit not in the order provided.  A "
            "\"literal\" match means that the text you enter must appear in "
            "the PDF exactly as you've entered it, and \"regular expression\" "
            "uses a regex to match the PDF.  (If you don't know what a regex "
            "is, you probably don't want this option.)  Finally, a \"fuzzy "
            "match\" looks for words or phrases that are mostly—but not "
            "exactly—the same, which can be useful for matching against "
            "documents containg imperfections that foil accurate OCR."
        )
    )
    is_insensitive = models.BooleanField(default=True)
    class Meta:
        abstract = True
        ordering = ("name",)
@@ -68,86 +40,8 @@ class MatchingModel(models.Model):
    def __str__(self):
        return self.name
    @property
    def conditions(self):
        return "{}: \"{}\" ({})".format(
            self.name, self.match, self.get_matching_algorithm_display())
    @classmethod
    def match_all(cls, text, tags=None):
        if tags is None:
            tags = cls.objects.all()
        text = text.lower()
        for tag in tags:
            if tag.matches(text):
                yield tag
    def matches(self, text):
        search_kwargs = {}
        # Check that match is not empty
        if self.match.strip() == "":
            return False
        if self.is_insensitive:
            search_kwargs = {"flags": re.IGNORECASE}
        if self.matching_algorithm == self.MATCH_ALL:
            for word in self._split_match():
                search_result = re.search(
                    r"\b{}\b".format(word), text, **search_kwargs)
                if not search_result:
                    return False
            return True
        if self.matching_algorithm == self.MATCH_ANY:
            for word in self._split_match():
                if re.search(r"\b{}\b".format(word), text, **search_kwargs):
                    return True
            return False
        if self.matching_algorithm == self.MATCH_LITERAL:
            return bool(re.search(
                r"\b{}\b".format(self.match), text, **search_kwargs))
        if self.matching_algorithm == self.MATCH_REGEX:
            return bool(re.search(
                re.compile(self.match, **search_kwargs), text))
        if self.matching_algorithm == self.MATCH_FUZZY:
            match = re.sub(r'[^\w\s]', '', self.match)
            text = re.sub(r'[^\w\s]', '', text)
            if self.is_insensitive:
                match = match.lower()
                text = text.lower()
            return True if fuzz.partial_ratio(match, text) >= 90 else False
        raise NotImplementedError("Unsupported matching algorithm")
    def _split_match(self):
        """
        Splits the match to individual keywords, getting rid of unnecessary
        spaces and grouping quoted words together.
        Example:
          '  some random  words "with   quotes  " and   spaces'
            ==>
          ["some", "random", "words", "with+quotes", "and", "spaces"]
        """
        findterms = re.compile(r'"([^"]+)"|(\S+)').findall
        normspace = re.compile(r"\s+").sub
        return [
            normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
            for t in findterms(self.match)
        ]
    def save(self, *args, **kwargs):
        self.match = self.match.lower()
        self.slug = slugify(self.name)
        models.Model.save(self, *args, **kwargs)
@@ -183,6 +77,17 @@ class Tag(MatchingModel):
    colour = models.PositiveIntegerField(choices=COLOURS, default=1)
    is_inbox_tag = models.BooleanField(
        default=False,
        help_text="Marks this tag as an inbox tag: All newly consumed "
                  "documents will be tagged with inbox tags."
    )
 class DocumentType(MatchingModel):
    pass
 class Document(models.Model):
@@ -214,6 +119,14 @@ class Document(models.Model):
    title = models.CharField(max_length=128, blank=True, db_index=True)
    document_type = models.ForeignKey(
        DocumentType,
        blank=True,
        null=True,
        related_name="documents",
        on_delete=models.SET_NULL
    )
    content = models.TextField(
        db_index=True,
        blank=True,
@@ -254,6 +167,15 @@ class Document(models.Model):
    added = models.DateTimeField(
        default=timezone.now, editable=False, db_index=True)
    archive_serial_number = models.IntegerField(
        blank=True,
        null=True,
        unique=True,
        db_index=True,
        help_text="The position of this document in your physical document "
                  "archive."
    )
    class Meta:
        ordering = ("correspondent", "title")
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -14,14 +14,18 @@ from django.utils import timezone
 # - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
 # - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 # - MONTH ZZZZ, with ZZZZ being 4 digits
 # - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 DATE_REGEX = re.compile(
-    r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
+    r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
+    r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' +  # NOQA: E501
-    r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
+    r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' +  # NOQA: E501
-    r'\b([^\W\d_]{3,9} [0-9]{4})\b'
+    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
    r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
 )
@@ -37,6 +41,7 @@ class DocumentParser:
    SCRATCH = settings.SCRATCH_DIR
    DATE_ORDER = settings.DATE_ORDER
    FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
    OPTIPNG = settings.OPTIPNG_BINARY
    def __init__(self, path):
@@ -75,30 +80,60 @@ class DocumentParser:
        Returns the date of the document.
        """
        def __parser(ds, date_order):
            """
            Call dateparser.parse with a particular date ordering
            """
            return dateparser.parse(
                ds,
                settings={
                    "DATE_ORDER": date_order,
                    "PREFER_DAY_OF_MONTH": "first",
                    "RETURN_AS_TIMEZONE_AWARE":
                    True
                }
            )
        date = None
        date_string = None
        next_year = timezone.now().year + 5  # Arbitrary 5 year future limit
        title = os.path.basename(self.document_path)
        # if filename date parsing is enabled, search there first:
        if self.FILENAME_DATE_ORDER:
            self.log("info", "Checking document title for date")
            for m in re.finditer(DATE_REGEX, title):
                date_string = m.group(0)
                try:
                    date = __parser(date_string, self.FILENAME_DATE_ORDER)
                except TypeError:
                    # Skip all matches that do not parse to a proper date
                    continue
                if date is not None and next_year > date.year > 1900:
                    self.log(
                        "info",
                        "Detected document date {} based on string {} "
                        "from document title"
                        "".format(date.isoformat(), date_string)
                    )
                    return date
        try:
            # getting text after checking filename will save time if only
            # looking at the filename instead of the whole text
            text = self.get_text()
        except ParseError:
            return None
-        next_year = timezone.now().year + 5  # Arbitrary 5 year future limit
+        # Iterate through all regex matches in text and try to parse the date
        # Iterate through all regex matches and try to parse the date
        for m in re.finditer(DATE_REGEX, text):
            date_string = m.group(0)
            try:
-                date = dateparser.parse(
+                date = __parser(date_string, self.DATE_ORDER)
                    date_string,
                    settings={
                        "DATE_ORDER": self.DATE_ORDER,
                        "PREFER_DAY_OF_MONTH": "first",
                        "RETURN_AS_TIMEZONE_AWARE": True
                    }
                )
            except TypeError:
                # Skip all matches that do not parse to a proper date
                continue
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1,13 +1,20 @@
 from rest_framework import serializers
-from .models import Correspondent, Tag, Document, Log
+from .models import Correspondent, Tag, Document, Log, DocumentType
 class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
    class Meta:
        model = Correspondent
-        fields = ("id", "slug", "name")
+        fields = ("id", "slug", "name", "automatic_classification")
 class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
    class Meta:
        model = DocumentType
        fields = ("id", "slug", "name", "automatic_classification")
 class TagSerializer(serializers.HyperlinkedModelSerializer):
@@ -15,7 +22,7 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
    class Meta:
        model = Tag
        fields = (
-            "id", "slug", "name", "colour", "match", "matching_algorithm")
+            "id", "slug", "name", "colour", "automatic_classification")
 class CorrespondentField(serializers.HyperlinkedRelatedField):
@@ -28,17 +35,25 @@ class TagsField(serializers.HyperlinkedRelatedField):
        return Tag.objects.all()
 class DocumentTypeField(serializers.HyperlinkedRelatedField):
    def get_queryset(self):
        return DocumentType.objects.all()
 class DocumentSerializer(serializers.ModelSerializer):
    correspondent = CorrespondentField(
        view_name="drf:correspondent-detail", allow_null=True)
    tags = TagsField(view_name="drf:tag-detail", many=True)
    document_type = DocumentTypeField(
        view_name="drf:documenttype-detail", allow_null=True)
    class Meta:
        model = Document
        fields = (
            "id",
            "correspondent",
            "document_type",
            "title",
            "content",
            "file_type",
--- a/src/documents/signals/handlers.py
+++ b/src/documents/signals/handlers.py
@@ -8,57 +8,36 @@ from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.utils import timezone
-from ..models import Correspondent, Document, Tag
+from documents.classifier import DocumentClassifier
 from ..models import Document, Tag
 def logger(message, group):
    logging.getLogger(__name__).debug(message, extra={"group": group})
-def set_correspondent(sender, document=None, logging_group=None, **kwargs):
+classifier = DocumentClassifier()
    # No sense in assigning a correspondent when one is already set.
    if document.correspondent:
        return
-    # No matching correspondents, so no need to continue
+def classify_document(sender, document=None, logging_group=None, **kwargs):
-    potential_correspondents = list(Correspondent.match_all(document.content))
+    global classifier
-    if not potential_correspondents:
+    try:
-        return
+        classifier.reload()
-
+        classifier.classify_document(
-    potential_count = len(potential_correspondents)
+            document,
-    selected = potential_correspondents[0]
+            classify_correspondent=True,
-    if potential_count > 1:
+            classify_tags=True,
-        message = "Detected {} potential correspondents, so we've opted for {}"
+            classify_document_type=True
-        logger(
+        )
-            message.format(potential_count, selected),
+    except FileNotFoundError:
-            logging_group
+        logging.getLogger(__name__).fatal(
            "Cannot classify document, classifier model file was not found."
        )
    logger(
        'Assigning correspondent "{}" to "{}" '.format(selected, document),
        logging_group
    )
-    document.correspondent = selected
+def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
-    document.save(update_fields=("correspondent",))
+    inbox_tags = Tag.objects.filter(is_inbox_tag=True)
-
+    document.tags.add(*inbox_tags)
 def set_tags(sender, document=None, logging_group=None, **kwargs):
    current_tags = set(document.tags.all())
    relevant_tags = set(Tag.match_all(document.content)) - current_tags
    if not relevant_tags:
        return
    message = 'Tagging "{}" with "{}"'
    logger(
        message.format(document, ", ".join([t.slug for t in relevant_tags])),
        logging_group
    )
    document.tags.add(*relevant_tags)
 def run_pre_consume_script(sender, filename, **kwargs):
--- a/src/documents/static/documents/js/pdf.js
+++ b/src/documents/static/documents/js/pdf.js
--- a/src/documents/static/documents/js/pdf.js.map
+++ b/src/documents/static/documents/js/pdf.js.map
--- a/src/documents/static/documents/js/pdf.worker.js
+++ b/src/documents/static/documents/js/pdf.worker.js
--- a/src/documents/static/documents/js/pdf.worker.js.map
+++ b/src/documents/static/documents/js/pdf.worker.js.map
--- a/src/documents/static/js/colours.js
+++ b/src/documents/static/js/colours.js
@@ -0,0 +1,66 @@
 // The following jQuery snippet will add a small square next to the selection
 // drop-down on the `Add tag` page that will update to show the selected tag
 // color as the drop-down value is changed.
 django.jQuery(document).ready(function(){
  if (django.jQuery("#id_colour").length) {
    let colour;
    let colour_num;
    colour_num = django.jQuery("#id_colour").val() - 1;
    colour = django.jQuery('#id_colour')[0][colour_num].text;
    django.jQuery('#id_colour').after('<div class="colour_square"></div>');
    django.jQuery('.colour_square').css({
      'float': 'left',
      'width': '20px',
      'height': '20px',
      'margin': '5px',
      'border': '1px solid rgba(0, 0, 0, .2)',
      'background': colour
    });
    django.jQuery('#id_colour').change(function () {
      colour_num = django.jQuery("#id_colour").val() - 1;
      colour = django.jQuery('#id_colour')[0][colour_num].text;
      django.jQuery('.colour_square').css({'background': colour});
    });
  } else if (django.jQuery("select[id*='colour']").length) {
    django.jQuery('select[id*="-colour"]').each(function (index, element) {
      let id;
      let loop_colour_num;
      let loop_colour;
      id = "colour_square_" + index;
      django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
      loop_colour_num = django.jQuery(element).val() - 1;
      loop_colour = django.jQuery(element)[0][loop_colour_num].text;
      django.jQuery("<style type='text/css'>\
                        .colour_square{ \
                            float: left; \
                            width: 20px; \
                            height: 20px; \
                            margin: 5px; \
                            border: 1px solid rgba(0,0,0,.2); \
                        } </style>").appendTo("head");
      django.jQuery('#' + id).css({'background': loop_colour});
      console.log(id, loop_colour_num, loop_colour);
      django.jQuery(element).change(function () {
        loop_colour_num = django.jQuery(element).val() - 1;
        loop_colour = django.jQuery(element)[0][loop_colour_num].text;
        django.jQuery('#' + id).css({'background': loop_colour});
        console.log('#' + id, loop_colour)
      });
    })
  }
 });
--- a/src/documents/static/paperless.css
+++ b/src/documents/static/paperless.css
@@ -20,4 +20,17 @@ td a.tag {
 #result_list td textarea {
  width: 90%;
  height: 5em;
 }
 #change_form_twocolumn_parent {
  display: flex;
 }
 #change_form_form_parent {
  flex:50%;
  margin-right: 10px;
 }
 #change_form_viewer_parent {
  flex:50%;
  margin-left: 10px;
  text-align: center;
 }
--- a/src/documents/templates/admin/documents/document/change_form.html
+++ b/src/documents/templates/admin/documents/document/change_form.html
@@ -4,6 +4,27 @@
 {{ block.super }}
 {% if file_type in "pdf jpg png" %}
 	<div id="change_form_twocolumn_parent">
 		<div id="change_form_form_parent"></div>
 		<div id="change_form_viewer_parent">
 			{% if file_type == "pdf" %}
 				{% include "admin/documents/document/viewers/viewer_pdf.html" %}
 			{% endif %}
 			{% if file_type in "jpg png" %}
 				{% include "admin/documents/document/viewers/viewer_image.html" %}
 			{% endif %}
 		</div>
 	</div>
 	<script>
 		django.jQuery("#change_form_form_parent").append(django.jQuery("#document_form"));
 		django.jQuery("#content-main").append(django.jQuery("#change_form_twocolumn_parent"));
 	</script>
 {% endif %}
 {% if next_object %}
 	<script type="text/javascript">//<![CDATA[
 		(function($){
--- a/src/documents/templates/admin/documents/document/change_list_results.html
+++ b/src/documents/templates/admin/documents/document/change_list_results.html
@@ -24,7 +24,8 @@
    border: 1px solid #cccccc;
    border-radius: 2%;
    overflow: hidden;
-    height: 300px;
+    height: 350px;
    position: relative;
  }
  .result .header {
    padding: 5px;
@@ -60,6 +61,11 @@
  .result a.tag {
    color: #ffffff;
  }
  .result .documentType {
    padding: 5px;
    background-color: #eeeeee;
    text-align: center;
  }
  .result .date {
    padding: 5px;
  }
@@ -79,6 +85,15 @@
  .result .image img {
    width: 100%;
  }
  .result .footer {
    position: absolute;
    bottom: 0;
    right: 0;
    border-left: 1px solid #cccccc;
    border-top: 1px solid #cccccc;
    padding: 4px 10px 4px 10px;
    background: white;
  }
  .grid {
    margin-right: 260px;
@@ -152,7 +167,9 @@
    {# 4: Image #}
    {# 5: Correspondent #}
    {# 6: Tags #}
-    {# 7: Document edit url #}
+    {# 7: Archive serial number #}
    {# 8: Document type #}
    {# 9: Document edit url #}
    <div class="box">
      <div class="result">
        <div class="header">
@@ -166,7 +183,7 @@
            selection would not be possible with mouse click + drag. Instead,
            the underlying link would be dragged.
          {% endcomment %}
-          <div class="headerLink" onclick="location.href='{{ result.7 }}';"></div>
+          <div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
          <div class="checkbox">{{ result.0 }}</div>
          <div class="info">
            {{ result.5 }}
@@ -174,10 +191,14 @@
          {{ result.1 }}
          <div style="clear: both;"></div>
        </div>
        {% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
        <div class="tags">{{ result.6 }}</div>
        <div class="date">{{ result.2 }}</div>
        <div style="clear: both;"></div>
        <div class="image">{{ result.4 }}</div>
        {# Only show the archive serial number if it is set on the document. #}
        {# checking for >-< (i.e., will a dash be displayed) doesn't feel like a very good solution to me. #}
        {% if '>-<' not in result.7 %}<div class="footer">#{{ result.7 }}</div>{% endif %}
      </div>
    </div>
  {% endfor %}
--- a/src/documents/templates/admin/documents/document/select_object.html
+++ b/src/documents/templates/admin/documents/document/select_object.html
--- a/src/documents/templates/admin/documents/document/viewers/viewer_image.html
+++ b/src/documents/templates/admin/documents/document/viewers/viewer_image.html
@@ -0,0 +1 @@
 <img src="{{download_url}}" style="max-width: 100%">
--- a/src/documents/templates/admin/documents/document/viewers/viewer_pdf.html
+++ b/src/documents/templates/admin/documents/document/viewers/viewer_pdf.html
@@ -0,0 +1,130 @@
 {% load static %}
 <div>
    <input id="prev" value="Previous" class="default" type="button">
    <input id="next" value="Next" class="default" type="button">
    &nbsp; &nbsp;
    <span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
    &nbsp; &nbsp;
    <input id="zoomin" value="+" class="default" type="button">
    <input id="zoomout" value="-" class="default" type="button">
 </div>
 <div style="width: 100%; overflow: auto;">
    <canvas id="the-canvas"></canvas>
 </div>
 <script type="text/javascript" src="{% static 'documents/js/pdf.js' %}"></script>
 <script type="text/javascript" src="{% static 'documents/js/pdf.worker.js' %}"></script>
 {# Load and display PDF document#}
 <script>
 var pdfjsLib = window['pdfjs-dist/build/pdf'];
 var pdfDoc = null,
    pageNum = 1,
    pageRendering = false,
    pageNumPending = null,
    scale = 1.0,
    canvas = document.getElementById('the-canvas'),
    ctx = canvas.getContext('2d');
 /**
 * Get page info from document, resize canvas accordingly, and render page.
 * @param num Page number.
 */
 function renderPage(num) {
    pageRendering = true;
    // Using promise to fetch the page
    pdfDoc.getPage(num).then(function(page) {
        var viewport = page.getViewport(scale);
        canvas.height = viewport.height;
        canvas.width = viewport.width;
        // Render PDF page into canvas context
        var renderContext = {
            canvasContext: ctx,
            viewport: viewport
        };
        var renderTask = page.render(renderContext);
        // Wait for rendering to finish
        renderTask.promise.then(function () {
            pageRendering = false;
            if (pageNumPending !== null) {
                // New page rendering is pending
                renderPage(pageNumPending);
                pageNumPending = null;
            }
        });
    });
    // Update page counters
    document.getElementById('page_num').textContent = num;
 }
 /**
 * If another page rendering in progress, waits until the rendering is
 * finised. Otherwise, executes rendering immediately.
 */
 function queueRenderPage(num) {
    if (pageRendering) {
        pageNumPending = num;
    } else {
        renderPage(num);
    }
 }
 /**
 * Displays previous page.
 */
 function onPrevPage() {
    if (pageNum <= 1) {
        return;
    }
    pageNum--;
    queueRenderPage(pageNum);
 }
 document.getElementById('prev').addEventListener('click', onPrevPage);
 /**
 * Displays next page.
 */
 function onNextPage() {
    if (pageNum >= pdfDoc.numPages) {
        return;
    }
    pageNum++;
    queueRenderPage(pageNum);
 }
 document.getElementById('next').addEventListener('click', onNextPage);
 /**
 * Displays next page.
 */
 function onZoomIn() {
    scale *= 1.2;
    queueRenderPage(pageNum);
 }
 document.getElementById('zoomin').addEventListener('click', onZoomIn);
 /**
 * Displays next page.
 */
 function onZoomOut() {
    scale /= 1.2;
    queueRenderPage(pageNum);
 }
 document.getElementById('zoomout').addEventListener('click', onZoomOut);
 /**
 * Asynchronously downloads PDF.
 */
 pdfjsLib.getDocument("{{download_url}}").then(function (pdfDoc_) {
    pdfDoc = pdfDoc_;
    document.getElementById('page_count').textContent = pdfDoc.numPages;
    // Initial/first page rendering
    renderPage(pageNum);
 });
 </script>
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -20,14 +20,21 @@ from rest_framework.viewsets import (
    ReadOnlyModelViewSet
 )
-from .filters import CorrespondentFilterSet, DocumentFilterSet, TagFilterSet
+from .filters import (
    CorrespondentFilterSet,
    DocumentFilterSet,
    TagFilterSet,
    DocumentTypeFilterSet
 )
 from .forms import UploadForm
-from .models import Correspondent, Document, Log, Tag
+from .models import Correspondent, Document, Log, Tag, DocumentType
 from .serialisers import (
    CorrespondentSerializer,
    DocumentSerializer,
    LogSerializer,
-    TagSerializer
+    TagSerializer,
    DocumentTypeSerializer
 )
@@ -116,6 +123,17 @@ class TagViewSet(ModelViewSet):
    ordering_fields = ("name", "slug")
 class DocumentTypeViewSet(ModelViewSet):
    model = DocumentType
    queryset = DocumentType.objects.all()
    serializer_class = DocumentTypeSerializer
    pagination_class = StandardPagination
    permission_classes = (IsAuthenticated,)
    filter_backends = (DjangoFilterBackend, OrderingFilter)
    filter_class = DocumentTypeFilterSet
    ordering_fields = ("name", "slug")
 class DocumentViewSet(RetrieveModelMixin,
                      UpdateModelMixin,
                      DestroyModelMixin,
--- a/src/manage.py
+++ b/src/manage.py
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -58,7 +58,7 @@ if _allowed_hosts:
    ALLOWED_HOSTS = _allowed_hosts.split(",")
 FORCE_SCRIPT_NAME = os.getenv("PAPERLESS_FORCE_SCRIPT_NAME")
-    
+
 # Application definition
 INSTALLED_APPS = [
@@ -144,14 +144,18 @@ DATABASES = {
    }
 }
-if os.getenv("PAPERLESS_DBUSER"):
+if os.getenv("PAPERLESS_DBENGINE"):
    DATABASES["default"] = {
-        "ENGINE": "django.db.backends.postgresql_psycopg2",
+        "ENGINE": os.getenv("PAPERLESS_DBENGINE"),
        "NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
        "USER": os.getenv("PAPERLESS_DBUSER"),
    }
    if os.getenv("PAPERLESS_DBPASS"):
        DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
    if os.getenv("PAPERLESS_DBHOST"):
        DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
    if os.getenv("PAPERLESS_DBPORT"):
        DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
 # Password validation
@@ -199,6 +203,24 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
 MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
 # Other
 # Disable Django's artificial limit on the number of form fields to submit at
 # once.  This is a protection against overloading the server, but since this is
 # a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
 # of log entries outweight the benefits of such a safeguard.
 DATA_UPLOAD_MAX_NUMBER_FIELDS = None
 # Document classification models location
 MODEL_FILE = os.getenv(
    "PAPERLESS_MODEL_FILE", os.path.join(
        BASE_DIR, "..", "models", "model.pickle"
    )
 )
 # Paperless-specific stuff
 # You shouldn't have to edit any of these values.  Rather, you can set these
 # values in /etc/paperless.conf instead.
@@ -296,6 +318,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
 # Specify the default date order (for autodetected dates)
 DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
 FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
 # Specify for how many years a correspondent is considered recent. Recent
 # correspondents will be shown in a separate "Recent correspondents" filter as
--- a/src/paperless/urls.py
+++ b/src/paperless/urls.py
@@ -12,12 +12,14 @@ from documents.views import (
    FetchView,
    LogViewSet,
    PushView,
-    TagViewSet
+    TagViewSet,
    DocumentTypeViewSet
 )
 from reminders.views import ReminderViewSet
 router = DefaultRouter()
 router.register(r"correspondents", CorrespondentViewSet)
 router.register(r"document_types", DocumentTypeViewSet)
 router.register(r"documents", DocumentViewSet)
 router.register(r"logs", LogViewSet)
 router.register(r"reminders", ReminderViewSet)
--- a/src/paperless/version.py
+++ b/src/paperless/version.py
@@ -1 +1 @@
-__version__ = (2, 5, 0)
+__version__ = (1, 0, 0)
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
                )
                raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
                return raw_text
-            raise OCRError("Language detection failed")
+            error_msg = ("Language detection failed. Set "
                         "PAPERLESS_FORGIVING_OCR in config file to continue "
                         "anyway.")
            raise OCRError(error_msg)
        if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
            raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
@@ -218,7 +221,8 @@ def run_convert(*args):
 def run_unpaper(args):
    unpaper, pnm = args
-    command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
+    command_args = (unpaper, "--overwrite", pnm,
                    pnm.replace(".pnm", ".unpaper.pnm"))
    if not subprocess.Popen(command_args).wait() == 0:
        raise ParseError("Unpaper failed at {}".format(command_args))
--- a/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.pdf
+++ b/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.pdf
--- a/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.png
+++ b/src/paperless_tesseract/tests/samples/2013-12-11_tests_date_in_filename_2.png
--- a/src/paperless_tesseract/tests/samples/tests_date_3.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_3.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_3.png
--- a/src/paperless_tesseract/tests/samples/tests_date_4.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_4.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_4.png
--- a/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.pdf
+++ b/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.pdf
--- a/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.png
+++ b/src/paperless_tesseract/tests/samples/tests_date_in_filename_2018-03-20_1.png
--- a/src/paperless_tesseract/tests/test_date.py
+++ b/src/paperless_tesseract/tests/test_date.py
@@ -5,9 +5,10 @@ from unittest import mock
 from uuid import uuid4
 from dateutil import tz
-from django.test import TestCase
+from django.test import TestCase, override_settings
 from ..parsers import RasterisedDocumentParser
 from django.conf import settings
 class TestDate(TestCase):
@@ -59,9 +60,13 @@ class TestDate(TestCase):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = "lorem ipsum 13.02.2018 lorem ipsum"
        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
+            date,
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 2, 13, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -72,10 +77,16 @@ class TestDate(TestCase):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
        document._text = (
-            "lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
+            "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
            "ipsum"
        )
        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
+            date,
-            datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 2, 13, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -110,9 +121,13 @@ class TestDate(TestCase):
            "März 2019\n"
            "lorem ipsum"
        )
        date = document.get_date()
        self.assertEqual(
-            document.get_date(),
+            date,
-            datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2019, 3, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -122,19 +137,25 @@ class TestDate(TestCase):
    def test_date_format_8(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
+        document._text = (
-                          "Wohnort\n"
+            "lorem ipsum\n"
-                          "3100\n"
+            "Wohnort\n"
-                          "IBAN\n"
+            "3100\n"
-                          "AT87 4534\n"
+            "IBAN\n"
-                          "1234\n"
+            "AT87 4534\n"
-                          "1234 5678\n"
+            "1234\n"
-                          "BIC\n"
+            "1234 5678\n"
-                          "lorem ipsum\n"
+            "BIC\n"
-                          "März 2020")
+            "lorem ipsum\n"
-        self.assertEqual(document.get_date(),
+            "März 2020"
-                         datetime.datetime(2020, 3, 1, 0, 0,
+        )
-                                           tzinfo=tz.tzutc()))
+        self.assertEqual(
            document.get_date(),
            datetime.datetime(
                2020, 3, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
@@ -143,13 +164,19 @@ class TestDate(TestCase):
    def test_date_format_9(self):
        input_file = os.path.join(self.SAMPLE_FILES, "")
        document = RasterisedDocumentParser(input_file)
-        document._text = ("lorem ipsum\n"
+        document._text = (
-                          "27. Nullmonth 2020\n"
+            "lorem ipsum\n"
-                          "März 2020\n"
+            "27. Nullmonth 2020\n"
-                          "lorem ipsum")
+            "März 2020\n"
-        self.assertEqual(document.get_date(),
+            "lorem ipsum"
-                         datetime.datetime(2020, 3, 1, 0, 0,
+        )
-                                           tzinfo=tz.tzutc()))
+        self.assertEqual(
            document.get_date(),
            datetime.datetime(
                2020, 3, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
@@ -158,11 +185,16 @@ class TestDate(TestCase):
    def test_get_text_1_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        date = document.get_date()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
-            document.get_date(),
+            date,
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 4, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -172,11 +204,15 @@ class TestDate(TestCase):
    def test_get_text_1_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 4, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -186,11 +222,15 @@ class TestDate(TestCase):
    def test_get_text_2_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2013, 2, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -200,67 +240,91 @@ class TestDate(TestCase):
    def test_get_text_2_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2013, 2, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 10, 5, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    @override_settings(OCR_LANGUAGE="deu")
    def test_get_text_3_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 10, 5, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 10, 5, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    @override_settings(OCR_LANGUAGE="eng")
    def test_get_text_4_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 10, 5, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -270,11 +334,15 @@ class TestDate(TestCase):
    def test_get_text_5_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 12, 17, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -284,11 +352,15 @@ class TestDate(TestCase):
    def test_get_text_5_png(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 12, 17, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -303,7 +375,10 @@ class TestDate(TestCase):
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 12, 17, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -318,7 +393,10 @@ class TestDate(TestCase):
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 12, 17, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -328,6 +406,7 @@ class TestDate(TestCase):
    def test_get_text_6_pdf_eu(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(document.get_date(), None)
@@ -339,6 +418,7 @@ class TestDate(TestCase):
    def test_get_text_6_png_eu(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(document.get_date(), None)
@@ -350,11 +430,15 @@ class TestDate(TestCase):
    def test_get_text_7_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2018, 4, 1, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -364,11 +448,15 @@ class TestDate(TestCase):
    def test_get_text_8_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2017, 12, 31, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
@@ -378,11 +466,100 @@ class TestDate(TestCase):
    def test_get_text_9_pdf(self):
        input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
        document = RasterisedDocumentParser(input_file)
        document.DATE_ORDER = 'DMY'
        document.get_text()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            document.get_date(),
-            datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
+            datetime.datetime(
                2017, 12, 31, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    def test_filename_date_1_pdf(self):
        input_file = os.path.join(
            self.SAMPLE_FILES,
            "tests_date_in_filename_2018-03-20_1.pdf"
        )
        document = RasterisedDocumentParser(input_file)
        document.FILENAME_DATE_ORDER = 'YMD'
        document.get_text()
        date = document.get_date()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            date,
            datetime.datetime(
                2018, 3, 20, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    def test_filename_date_1_png(self):
        input_file = os.path.join(
            self.SAMPLE_FILES,
            "tests_date_in_filename_2018-03-20_1.png"
        )
        document = RasterisedDocumentParser(input_file)
        document.FILENAME_DATE_ORDER = 'YMD'
        date = document.get_date()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            date,
            datetime.datetime(
                2018, 3, 20, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    def test_filename_date_2_pdf(self):
        input_file = os.path.join(
            self.SAMPLE_FILES,
            "2013-12-11_tests_date_in_filename_2.pdf"
        )
        document = RasterisedDocumentParser(input_file)
        document.FILENAME_DATE_ORDER = 'YMD'
        date = document.get_date()
        self.assertEqual(document._is_ocred(), True)
        self.assertEqual(
            date,
            datetime.datetime(
                2013, 12, 11, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
        "paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
        SCRATCH
    )
    def test_filename_date_2_png(self):
        input_file = os.path.join(
            self.SAMPLE_FILES,
            "2013-12-11_tests_date_in_filename_2.png"
        )
        document = RasterisedDocumentParser(input_file)
        document.FILENAME_DATE_ORDER = 'YMD'
        date = document.get_date()
        self.assertEqual(document._is_ocred(), False)
        self.assertEqual(
            date,
            datetime.datetime(
                2013, 12, 11, 0, 0,
                tzinfo=tz.gettz(settings.TIME_ZONE)
            )
        )
    @mock.patch(
Author	SHA1	Message	Date
Jonas Winkler	872d657361	Version bumb	2018-12-11 14:32:30 +01:00
Jonas Winkler	ea58c66fd4	Merge branch 'master' into dev	2018-12-11 12:38:15 +01:00
Jonas Winkler	bcd9220021	minor changes	2018-12-11 12:26:44 +01:00
Jonas Winkler	766109ae4e	Merge remote-tracking branch 'upstream/master'	2018-12-11 12:06:15 +01:00
Daniel Quinn	3c2a1a8c13	Merge pull request #451 from speshak/remote_pg Add DBHOST & DBPORT parameters to settings	2018-12-06 23:38:50 +00:00
Daniel Quinn	1c7047bbb8	Move ipython out of the base dependencies	2018-12-06 23:28:33 +00:00
Scott Peshak	96dafe8c43	Add psycopg2 dependencies to Dockerfile	2018-12-02 16:14:58 -06:00
Scott Peshak	d6896daece	Add psycopg2 to requirements.txt	2018-12-02 16:14:58 -06:00
Scott Peshak	d12f0642f2	Add DBHOST & DBPORT parameters Resolves #445	2018-12-02 15:20:29 -06:00
Daniel Quinn	e3a616ebc3	Version bump	2018-12-01 17:12:34 +00:00
Daniel Quinn	f898ec792f	Added notes for 2.6.0	2018-12-01 17:11:58 +00:00
Daniel Quinn	f45b6762f2	Merge branch 'jat255-ENH_filename_date_parsing'	2018-12-01 17:10:26 +00:00
Daniel Quinn	d544f269e0	Conform everything to the coding standards https://paperless.readthedocs.io/en/latest/contributing.html#additional-style-guides	2018-12-01 17:09:12 +00:00
Daniel Quinn	650db75c2b	Merge branch 'ENH_filename_date_parsing' of https://github.com/jat255/paperless into jat255-ENH_filename_date_parsing	2018-12-01 16:57:16 +00:00
Daniel Quinn	7dbb77e57b	Add a .editorconfig	2018-12-01 16:56:58 +00:00
Daniel Quinn	f1b3312bcb	Merge branch 'jat255-ENH_tag_colour_override'	2018-12-01 16:22:38 +00:00
Daniel Quinn	ea05ab2b06	Restructure colour.js to work withing a .ready()	2018-12-01 16:22:19 +00:00
Daniel Quinn	4f4c515629	Add colours to the tags pages	2018-12-01 16:21:58 +00:00
Daniel Quinn	c1f926a40c	Merge branch 'ENH_tag_colour_override' of https://github.com/jat255/paperless into jat255-ENH_tag_colour_override	2018-12-01 15:56:37 +00:00
Daniel Quinn	c1d18c1e83	Fix language guesses in tests It turns out that the Lorem ipsum text in the sample files was confuing the language guesser, causing it to think the file was in Catalan and not English or German.	2018-12-01 15:55:59 +00:00
Joshua Taillon	ba452e0524	move tag colour override to static folder	2018-12-01 09:14:44 -05:00
Daniel Quinn	c5488dcb98	Merge pull request #441 from jat255/patch-1 Update gunicorn commands	2018-11-30 19:45:01 +00:00
Joshua Taillon	d6eefbccee	encapsulate in if blocks so no errors on non-tag pages; added support for edit tags page	2018-11-17 21:34:11 -05:00
Joshua Taillon	a813288aaf	add example override for tag colour display	2018-11-17 09:18:36 -05:00
Joshua Taillon	63e2fbe0c9	Update paperless-webserver.service Update `gunicorn` command to use `--pythonpath`	2018-11-16 09:21:07 -05:00
Joshua Taillon	597a7bb391	Update setup.rst The provided `gunicorn` command did not work for me, failing with the following error: ``` ModuleNotFoundError: No module named '/home/paperless/paperless/src/paperless' ``` The solution was to provide only `paperless.wsgi` as the argument to `gunicorn`, and provide a flag for `--pythonpath`. After changing it to this, the server started up fine.	2018-11-16 09:20:08 -05:00
Joshua Taillon	730daa3d6d	Merge branch 'master' of github.com:danielquinn/paperless into ENH_filename_date_parsing	2018-11-15 23:17:59 -05:00
Joshua Taillon	c225281f95	Change the massive regex to match boundaries with _ or - characters (not just word breaks); add line for year first formats like YYYY-MM-DD	2018-11-15 20:38:53 -05:00
Joshua Taillon	e1d8744c66	Add option for parsing of date from filename (and associated tests)	2018-11-15 20:32:15 -05:00
Joshua Taillon	4409f65840	Update date tests to be more explicit with settings and allow tests to pass if using a timezone other than UTC	2018-11-15 20:30:23 -05:00
Daniel Quinn	c83dc666a4	I'm going to have to ditch requirements.txt if it can't be reliably generated	2018-11-03 13:42:03 +00:00
Daniel Quinn	9ab50ed09d	Fix requiremnts.txt	2018-11-03 13:29:22 +00:00
Daniel Quinn	e0acb4a40b	Update dependencies This includes a security update for requests.	2018-11-03 12:49:35 +00:00
Daniel Quinn	eca6250c1b	Fix the correspondent filters #423	2018-11-03 11:06:55 +00:00
Daniel Quinn	33abec0663	Code cleanup	2018-11-03 11:05:22 +00:00
Daniel Quinn	d825667c9b	Allow an infinite number of logs to be deleted.	2018-11-03 10:25:51 +00:00
Daniel Quinn	84511f8418	Merge pull request #432 from deanpcmad/patch-1 Added missing ; to nginx config	2018-10-31 13:12:32 +00:00
Dean Perry	81e488b90d	added missing ; to nginx config	2018-10-31 12:39:48 +00:00
Daniel Quinn	bff28113df	Merge pull request #425 from mrwacky42/remove_vagrant Remove Vagrant docs	2018-10-14 09:57:41 +01:00
Sharif Nassar	0b377a76d0	Remove Vagrant docs * Vagrant does not seem to have any libvirt boxes for Ubuntu any more. * Vagrant 2 was released a year ago, but vagrant-libvirt only claims to support up to Vagrant 1.8.	2018-10-13 11:31:53 -07:00
Daniel Quinn	ec1d5c80ff	Add pip install to update process	2018-10-08 10:38:53 +01:00
Daniel Quinn	bd95804fbf	Merge pull request #421 from ddddavidmartin/clarify_forgiving_ocr_handling Clarify forgiving ocr handling	2018-10-08 09:35:57 +00:00
Daniel Quinn	8dc355a66f	Merge pull request #422 from erikarvstedt/inotify-linux requirements.txt: bring back Linux-only restriction for inotify-simple	2018-10-08 09:34:47 +00:00
Daniel Quinn	fbb389553c	Merge pull request #419 from ddddavidmartin/let_unpaper_overwrite_temp_files Let unpaper overwrite temporary files.	2018-10-08 09:32:30 +00:00
Erik Arvstedt	f8cfbb44d2	requirements.txt: bring back Linux-only restriction for inotify-simple Fixes #418	2018-10-08 11:00:34 +02:00
David Martin	818780a191	Add PAPERLESS_FORGIVING_OCR option to example config. It helps having it in the example config as that makes it more clear that it exists.	2018-10-08 19:38:38 +11:00
David Martin	b350ec48b7	Mention FORGIVING_OCR config option when language detection fails. It is not obvious that the PAPERLESS_FORGIVING_OCR allows to let document consumption happen even if no language can be detected. Mentioning it in the actual error message in the log seems like the best way to make it clear.	2018-10-08 19:37:05 +11:00
David Martin	f948ee11be	Let unpaper overwrite temporary files. I'm not sure what the circumstances are, but it looks like unpaper can attempt to write a temporary file that already exists [0]. This then fails the consumption. As per daedadu's comment simply letting unpaper overwrite files fixes this. [0] unpaper: error: output file '/tmp/paperless/paperless-pjkrcr4l/convert-0000.unpaper.pnm' already present. See https://web.archive.org/web/20181008081515/https://github.com/danielquinn/paperless/issues/406#issue-360651630	2018-10-08 19:12:11 +11:00
Jonas Winkler	b347e3347d	Restored tagging functionality	2018-09-27 20:41:16 +02:00
Jonas Winkler	7257cece30	Code style changes	2018-09-26 10:51:42 +02:00
Jonas Winkler	5b9f38d398	Removed the archive tag, as it wasnt really used anyway.	2018-09-25 21:51:38 +02:00
Jonas Winkler	b31d4779bf	Code style changes	2018-09-25 21:12:47 +02:00
Jonas Winkler	60618381f8	Code style adjustments	2018-09-25 16:09:33 +02:00
Jonas Winkler	779ea6a015	Merge branch 'master' into dev	2018-09-25 14:53:21 +02:00
Jonas Winkler	94ede7389d	Merge remote-tracking branch 'upstream/master'	2018-09-25 14:47:12 +02:00
Jonas Winkler	03beca7838	Fixed api issue (some parameter name got renamed)	2018-09-16 13:29:56 +02:00
Jonas Winkler	fb1dcb6e08	Merge branch 'fix-document-viewer' into dev	2018-09-14 16:48:37 +02:00
Jonas Winkler	a298cbd4ce	Merge branch 'fix-document-viewer'	2018-09-14 16:48:27 +02:00
Jonas Winkler	f1a1e7f1a4	fixed document viewer	2018-09-14 16:48:08 +02:00
Jonas Winkler	8371c2399f	Merge branch 'dev'	2018-09-13 14:15:33 +02:00
Jonas Winkler	909586bf25	Code style changed	2018-09-13 14:15:16 +02:00
Jonas Winkler	8d003a6a85	Save and edit next button appears on documents without viewer as well. Made the new recent correspondents filter optional. Disabled by default.	2018-09-13 13:10:05 +02:00
Jonas Winkler	0209b71404	Merge branch 'dev'	2018-09-13 10:29:10 +02:00
Jonas Winkler	0dc3644cc1	Added missing dependencies	2018-09-12 17:43:13 +02:00
Jonas Winkler	fb1a2ee577	Merge branch 'dev'	2018-09-12 17:20:12 +02:00
Jonas Winkler	7c589f71a4	Fixed a few minor issues.	2018-09-12 16:25:23 +02:00
Jonas Winkler	25a6aa909b	removed duplicate code	2018-09-12 13:43:28 +02:00
Jonas Winkler	ef0d37985b	Merge branch 'master' into dev	2018-09-12 11:47:35 +02:00
Jonas Winkler	898931cc03	bugfix	2018-09-11 20:45:36 +02:00
Jonas Winkler	17803e7936	fixed settings	2018-09-11 17:30:46 +02:00
Jonas Winkler	e72735c4f0	Merge remote-tracking branch 'upstream/master'	2018-09-11 14:43:59 +02:00
Jonas Winkler	46a5bc00d7	Merge branch 'machine-learning' into dev	2018-09-11 14:36:21 +02:00
Jonas Winkler	d46ee11143	The classifier works with ids now, not names. Minor changes.	2018-09-11 14:30:18 +02:00
Jonas Winkler	d2534a73e5	changed classifier	2018-09-11 00:33:07 +02:00
Jonas Winkler	11adc94e5e	mode change	2018-09-06 12:00:01 +02:00
Jonas Winkler	04bf5fc094	fixed merge error	2018-09-06 10:15:15 +02:00
Jonas Winkler	d26f940a91	Merge branch 'dev' into machine-learning	2018-09-06 00:29:41 +02:00
Jonas Winkler	13725ef8ee	Merge branch 'master' into dev	2018-09-06 00:28:58 +02:00
Jonas Winkler	6f0ca432c4	Added scikit-learn to requirements	2018-09-06 00:20:44 +02:00
Jonas Winkler	dd8746bac7	fixed the api	2018-09-05 15:29:05 +02:00
Jonas Winkler	8eeded95c4	Merge branch 'dev' into machine-learning	2018-09-05 15:26:39 +02:00
Jonas Winkler	131e1c9dd8	fixed the api	2018-09-05 15:25:14 +02:00
Jonas Winkler	a6b4fc7e81	fixed api	2018-09-05 14:57:37 +02:00
Jonas Winkler	cea880f245	implemented automatic classification field functionality	2018-09-05 14:31:02 +02:00
Jonas Winkler	82bc0e3368	Fixed a few things	2018-09-05 12:43:11 +02:00
Jonas Winkler	70bd05450a	removed matching model fields, automatic classifier reloading, added autmatic_classification field to matching model	2018-09-04 18:40:26 +02:00
Jonas Winkler	c765ef5eeb	Merge remote-tracking branch 'upstream/master'	2018-09-04 16:02:48 +02:00
Jonas Winkler	30134034e2	Fixed documents not being saved after modification	2018-09-04 15:33:51 +02:00
Jonas Winkler	8a1a736340	Merge branch 'document-type' into dev	2018-09-04 14:55:59 +02:00
Jonas Winkler	68652c8c37	Document Type exporting	2018-09-04 14:55:29 +02:00
Jonas Winkler	c091eba26e	Implemented the classifier model, including automatic tagging of new documents	2018-09-04 14:39:55 +02:00
Jonas Winkler	ca315ba76c	Added code that trains models based on data from the databasae	2018-09-03 15:55:41 +02:00
Jonas Winkler	350da81081	Added command to create datasets	2018-09-02 12:47:19 +02:00
Jonas Winkler	4129002086	Added static to ignore	2018-09-02 11:46:45 +02:00
Jonas Winkler	781a1dae71	- added recent correspondents filter - sortable document_count fields - added last correspondence field to CorrespondentAdmin	2018-08-28 15:42:39 +02:00
Jonas Winkler	01fed4f49d	Removed WebDAV from dev, since it is kind of broken.	2018-08-28 12:12:29 +02:00
Jonas Winkler	d7ab69fed9	Added document type	2018-08-24 13:45:15 +02:00
Jonas Winkler	dfa5ea423f	Merge branch 'ui-improvements' into dev	2018-07-16 20:56:49 +02:00
Jonas Winkler	a698a1b66b	Different way to get the changelist.	2018-07-16 18:35:01 +02:00
Jonas Winkler	a5129018d2	Merge branch 'ui-improvements' into dev	2018-07-16 18:19:05 +02:00
Jonas Winkler	e3974c68ba	bugfix	2018-07-16 18:01:27 +02:00
Jonas Winkler	d72604eb86	Merge branch 'ui-improvements' into dev	2018-07-16 16:09:41 +02:00
Jonas Winkler	f0c94cc65f	Added 'save and edit next' functionality	2018-07-16 16:08:51 +02:00
Jonas Winkler	f21debe95d	css stuff	2018-07-16 14:39:09 +02:00
Jonas Winkler	033ab72475	Merge branch 'workflow-improvements' into dev	2018-07-15 13:42:00 +02:00
Jonas Winkler	b059602050	Merge branch 'db-config' into dev	2018-07-15 13:41:54 +02:00
Jonas Winkler	2775dfb735	Merge branch 'ui-improvements' into dev	2018-07-15 13:41:49 +02:00
Jonas Winkler	04384c7037	Merge branch 'master' into dev	2018-07-15 13:41:43 +02:00
Jonas Winkler	75beb91791	added options to change database backend	2018-07-15 13:40:38 +02:00
Jonas Winkler	b138f4b52b	fixed image width	2018-07-15 13:07:00 +02:00
Jonas Winkler	d108a69f1b	added document viewers on document change form for easier editing of metadata, supports pdf, png, jpg	2018-07-14 23:05:28 +02:00
Jonas Winkler	bdaea3915e	Merge branch 'master' into ui-improvements	2018-07-13 11:24:19 +02:00
Jonas Winkler	9e71b70d4b	fixed the api	2018-07-13 11:20:45 +02:00
Jonas Winkler	960340a5db	updated migrations	2018-07-12 11:54:03 +02:00
Jonas Winkler	b3709663f1	Merge branch 'ui-improvements' into dev	2018-07-11 15:07:30 +02:00
Jonas Winkler	9f20175cd3	Merge branch 'workflow-improvements' into dev	2018-07-11 15:05:56 +02:00
Jonas Winkler	adf57b2669	Merge branch 'master' into webdav	2018-07-11 15:02:50 +02:00
Jonas Winkler	f2c32d840e	Added setting to enable webdav (default: disabled), cleaned up the code somewhat.	2018-07-11 14:59:47 +02:00
Jonas Winkler	ba9d7c8892	Moved actions to separate file	2018-07-11 13:02:18 +02:00
Jonas Winkler	270b0487ec	Merge branch 'master' into workflow-improvements	2018-07-10 15:53:38 +02:00
Jonas Winkler	a63880ed19	Merge remote-tracking branch 'upstream/master'	2018-07-10 15:46:46 +02:00
Jonas Winkler	a40737bd0e	Added actions to modify tags and correspondents on multiple documents	2018-07-10 15:39:24 +02:00
Jonas Winkler	c5b315f518	Show document serial number on change list	2018-07-06 18:04:31 +02:00
Jonas Winkler	e143a20f50	automatically update documents whenever a tag or correspondent is changed (this should make the document_retagger and document_correspondent managers somewhat obsolete (?)	2018-07-06 13:51:50 +02:00
Jonas Winkler	c3a144f2ca	inbox tags, archive tags, archive serial number for documents	2018-07-06 13:25:02 +02:00
Jonas Winkler	38bb1f9672	Some minor changes	2018-07-06 11:53:08 +02:00
Jonas Winkler	22da848be4	Updated WebDAV filtering. Filters resulting in empty results are not available anymore.	2018-07-05 17:21:13 +02:00
Jonas Winkler	a53e30e0a5	Initial support for WebDAV. Lots of stuff is not there yet and most of the stuff which is there is not really tested. But it kind of already works.	2018-07-05 16:18:20 +02:00
Jonas Winkler	7a2bd58ef8	Updated date filter to use the drilldown feature of django	2018-07-04 17:10:56 +02:00
Jonas Winkler	8f6231bd34	Updated to Django 2	2018-07-04 17:03:59 +02:00
		`@@ -0,0 +1 @@`
							`<img src="{{download_url}}" style="max-width: 100%">`
`@@ -1 +1 @@`
	`__version__ = (2, 5, 0)`	`__version__ = (1, 0, 0)`