mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-12-21 10:31:20 +00:00
Compare commits
130 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
872d657361 | ||
|
|
ea58c66fd4 | ||
|
|
bcd9220021 | ||
|
|
766109ae4e | ||
|
|
3c2a1a8c13 | ||
|
|
1c7047bbb8 | ||
|
|
96dafe8c43 | ||
|
|
d6896daece | ||
|
|
d12f0642f2 | ||
|
|
e3a616ebc3 | ||
|
|
f898ec792f | ||
|
|
f45b6762f2 | ||
|
|
d544f269e0 | ||
|
|
650db75c2b | ||
|
|
7dbb77e57b | ||
|
|
f1b3312bcb | ||
|
|
ea05ab2b06 | ||
|
|
4f4c515629 | ||
|
|
c1f926a40c | ||
|
|
c1d18c1e83 | ||
|
|
ba452e0524 | ||
|
|
c5488dcb98 | ||
|
|
d6eefbccee | ||
|
|
a813288aaf | ||
|
|
63e2fbe0c9 | ||
|
|
597a7bb391 | ||
|
|
730daa3d6d | ||
|
|
c225281f95 | ||
|
|
e1d8744c66 | ||
|
|
4409f65840 | ||
|
|
c83dc666a4 | ||
|
|
9ab50ed09d | ||
|
|
e0acb4a40b | ||
|
|
eca6250c1b | ||
|
|
33abec0663 | ||
|
|
d825667c9b | ||
|
|
84511f8418 | ||
|
|
81e488b90d | ||
|
|
bff28113df | ||
|
|
0b377a76d0 | ||
|
|
ec1d5c80ff | ||
|
|
bd95804fbf | ||
|
|
8dc355a66f | ||
|
|
fbb389553c | ||
|
|
f8cfbb44d2 | ||
|
|
818780a191 | ||
|
|
b350ec48b7 | ||
|
|
f948ee11be | ||
|
|
b347e3347d | ||
|
|
7257cece30 | ||
|
|
5b9f38d398 | ||
|
|
b31d4779bf | ||
|
|
60618381f8 | ||
|
|
779ea6a015 | ||
|
|
94ede7389d | ||
|
|
03beca7838 | ||
|
|
fb1dcb6e08 | ||
|
|
a298cbd4ce | ||
|
|
f1a1e7f1a4 | ||
|
|
8371c2399f | ||
|
|
909586bf25 | ||
|
|
8d003a6a85 | ||
|
|
0209b71404 | ||
|
|
0dc3644cc1 | ||
|
|
fb1a2ee577 | ||
|
|
7c589f71a4 | ||
|
|
25a6aa909b | ||
|
|
ef0d37985b | ||
|
|
898931cc03 | ||
|
|
17803e7936 | ||
|
|
e72735c4f0 | ||
|
|
46a5bc00d7 | ||
|
|
d46ee11143 | ||
|
|
d2534a73e5 | ||
|
|
11adc94e5e | ||
|
|
04bf5fc094 | ||
|
|
d26f940a91 | ||
|
|
13725ef8ee | ||
|
|
6f0ca432c4 | ||
|
|
dd8746bac7 | ||
|
|
8eeded95c4 | ||
|
|
131e1c9dd8 | ||
|
|
a6b4fc7e81 | ||
|
|
cea880f245 | ||
|
|
82bc0e3368 | ||
|
|
70bd05450a | ||
|
|
c765ef5eeb | ||
|
|
30134034e2 | ||
|
|
8a1a736340 | ||
|
|
68652c8c37 | ||
|
|
c091eba26e | ||
|
|
ca315ba76c | ||
|
|
350da81081 | ||
|
|
4129002086 | ||
|
|
781a1dae71 | ||
|
|
01fed4f49d | ||
|
|
d7ab69fed9 | ||
|
|
dfa5ea423f | ||
|
|
a698a1b66b | ||
|
|
a5129018d2 | ||
|
|
e3974c68ba | ||
|
|
d72604eb86 | ||
|
|
f0c94cc65f | ||
|
|
f21debe95d | ||
|
|
033ab72475 | ||
|
|
b059602050 | ||
|
|
2775dfb735 | ||
|
|
04384c7037 | ||
|
|
75beb91791 | ||
|
|
b138f4b52b | ||
|
|
d108a69f1b | ||
|
|
bdaea3915e | ||
|
|
9e71b70d4b | ||
|
|
960340a5db | ||
|
|
b3709663f1 | ||
|
|
9f20175cd3 | ||
|
|
adf57b2669 | ||
|
|
f2c32d840e | ||
|
|
ba9d7c8892 | ||
|
|
270b0487ec | ||
|
|
a63880ed19 | ||
|
|
a40737bd0e | ||
|
|
c5b315f518 | ||
|
|
e143a20f50 | ||
|
|
c3a144f2ca | ||
|
|
38bb1f9672 | ||
|
|
22da848be4 | ||
|
|
a53e30e0a5 | ||
|
|
7a2bd58ef8 | ||
|
|
8f6231bd34 |
25
.editorconfig
Normal file
25
.editorconfig
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# EditorConfig: http://EditorConfig.org
|
||||||
|
|
||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 2
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
end_of_line = lf
|
||||||
|
charset = utf-8
|
||||||
|
max_line_length = 79
|
||||||
|
|
||||||
|
[{*.html,*.css,*.js}]
|
||||||
|
max_line_length = off
|
||||||
|
|
||||||
|
[*.py]
|
||||||
|
indent_size = 4
|
||||||
|
indent_style = space
|
||||||
|
|
||||||
|
# Tests don't get a line width restriction. It's still a good idea to follow
|
||||||
|
# the 79 character rule, but in the interests of clarity, tests often need to
|
||||||
|
# violate it.
|
||||||
|
[**/test_*.py]
|
||||||
|
max_line_length = off
|
||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -73,7 +73,6 @@ db.sqlite3
|
|||||||
# Other stuff that doesn't belong
|
# Other stuff that doesn't belong
|
||||||
.virtualenv
|
.virtualenv
|
||||||
virtualenv
|
virtualenv
|
||||||
.vagrant
|
|
||||||
docker-compose.yml
|
docker-compose.yml
|
||||||
docker-compose.env
|
docker-compose.env
|
||||||
|
|
||||||
@@ -83,3 +82,6 @@ scripts/nuke
|
|||||||
|
|
||||||
# Static files collected by the collectstatic command
|
# Static files collected by the collectstatic command
|
||||||
static/
|
static/
|
||||||
|
|
||||||
|
# Classification Models
|
||||||
|
models/
|
||||||
|
|||||||
@@ -13,10 +13,10 @@ ENV PAPERLESS_EXPORT_DIR=/export \
|
|||||||
PAPERLESS_CONSUMPTION_DIR=/consume
|
PAPERLESS_CONSUMPTION_DIR=/consume
|
||||||
|
|
||||||
|
|
||||||
RUN apk update --no-cache && apk add python3 gnupg libmagic bash shadow curl \
|
RUN apk update --no-cache && apk add python3 gnupg libmagic libpq bash shadow curl \
|
||||||
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
sudo poppler tesseract-ocr imagemagick ghostscript unpaper optipng && \
|
||||||
apk add --virtual .build-dependencies \
|
apk add --virtual .build-dependencies \
|
||||||
python3-dev poppler-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
python3-dev poppler-dev postgresql-dev gcc g++ musl-dev zlib-dev jpeg-dev && \
|
||||||
# Install python dependencies
|
# Install python dependencies
|
||||||
python3 -m ensurepip && \
|
python3 -m ensurepip && \
|
||||||
rm -r /usr/lib/python*/ensurepip && \
|
rm -r /usr/lib/python*/ensurepip && \
|
||||||
|
|||||||
5
Pipfile
5
Pipfile
@@ -25,6 +25,8 @@ python-dateutil = "*"
|
|||||||
python-dotenv = "*"
|
python-dotenv = "*"
|
||||||
python-gnupg = "*"
|
python-gnupg = "*"
|
||||||
pytz = "*"
|
pytz = "*"
|
||||||
|
sphinx = "*"
|
||||||
|
tox = "*"
|
||||||
pycodestyle = "*"
|
pycodestyle = "*"
|
||||||
pytest = "*"
|
pytest = "*"
|
||||||
pytest-cov = "*"
|
pytest-cov = "*"
|
||||||
@@ -35,6 +37,3 @@ pytest-xdist = "*"
|
|||||||
|
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
ipython = "*"
|
ipython = "*"
|
||||||
sphinx = "*"
|
|
||||||
tox = "*"
|
|
||||||
|
|
||||||
|
|||||||
623
Pipfile.lock
generated
623
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "6d8bad24aa5d0c102b13b5ae27acba04836cd5a07a4003cb2763de1e0a3406b7"
|
"sha256": "3782f7e6b5461c39c8fd0d0048a4622418f247439113bd3cdc91712fd47036f6"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {},
|
"requires": {},
|
||||||
@@ -14,12 +14,18 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"default": {
|
"default": {
|
||||||
|
"alabaster": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
|
||||||
|
"sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
|
||||||
|
],
|
||||||
|
"version": "==0.7.12"
|
||||||
|
},
|
||||||
"apipkg": {
|
"apipkg": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
|
"sha256:37228cda29411948b422fae072f57e31d3396d2ee1c9783775980ee9c9990af6",
|
||||||
"sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
|
"sha256:58587dd4dc3daefad0487f6d9ae32b4542b185e1c36db6993290e7c41ca2b47c"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.5"
|
"version": "==1.5"
|
||||||
},
|
},
|
||||||
"atomicwrites": {
|
"atomicwrites": {
|
||||||
@@ -27,7 +33,6 @@
|
|||||||
"sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
|
"sha256:0312ad34fcad8fac3704d441f7b317e50af620823353ec657a53e981f92920c0",
|
||||||
"sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
|
"sha256:ec9ae8adaae229e4f8446952d204a3e4b5fdd2d099f9be3aaf556120135fb3ee"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.2.1"
|
"version": "==1.2.1"
|
||||||
},
|
},
|
||||||
"attrs": {
|
"attrs": {
|
||||||
@@ -37,12 +42,26 @@
|
|||||||
],
|
],
|
||||||
"version": "==18.2.0"
|
"version": "==18.2.0"
|
||||||
},
|
},
|
||||||
|
"babel": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
|
||||||
|
"sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
|
||||||
|
],
|
||||||
|
"version": "==2.6.0"
|
||||||
|
},
|
||||||
|
"backcall": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||||
|
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||||
|
],
|
||||||
|
"version": "==0.1.0"
|
||||||
|
},
|
||||||
"certifi": {
|
"certifi": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
|
"sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
|
||||||
"sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
|
"sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
|
||||||
],
|
],
|
||||||
"version": "==2018.8.24"
|
"version": "==2018.10.15"
|
||||||
},
|
},
|
||||||
"chardet": {
|
"chardet": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -55,6 +74,7 @@
|
|||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
|
"sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba",
|
||||||
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
|
"sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed",
|
||||||
|
"sha256:0bf8cbbd71adfff0ef1f3a1531e6402d13b7b01ac50a79c97ca15f030dba6306",
|
||||||
"sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
|
"sha256:10a46017fef60e16694a30627319f38a2b9b52e90182dddb6e37dcdab0f4bf95",
|
||||||
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
|
"sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640",
|
||||||
"sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
|
"sha256:23d341cdd4a0371820eb2b0bd6b88f5003a7438bbedb33688cd33b8eae59affd",
|
||||||
@@ -83,18 +103,18 @@
|
|||||||
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
|
"sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd",
|
||||||
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
|
"sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d",
|
||||||
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
|
"sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6",
|
||||||
|
"sha256:f05a636b4564104120111800021a92e43397bc12a5c72fed7036be8556e0029e",
|
||||||
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
|
"sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4'",
|
|
||||||
"version": "==4.5.1"
|
"version": "==4.5.1"
|
||||||
},
|
},
|
||||||
"coveralls": {
|
"coveralls": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22",
|
"sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
|
||||||
"sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a"
|
"sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.5.0"
|
"version": "==1.5.1"
|
||||||
},
|
},
|
||||||
"dateparser": {
|
"dateparser": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -104,13 +124,20 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.7.0"
|
"version": "==0.7.0"
|
||||||
},
|
},
|
||||||
|
"decorator": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
|
||||||
|
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
|
||||||
|
],
|
||||||
|
"version": "==4.3.0"
|
||||||
|
},
|
||||||
"django": {
|
"django": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:0c5b65847d00845ee404bbc0b4a85686f15eb3001ffddda3db4e9baa265bf136",
|
"sha256:25df265e1fdb74f7e7305a1de620a84681bcc9c05e84a3ed97e4a1a63024f18d",
|
||||||
"sha256:68aeea369a8130259354b6ba1fa9babe0c5ee6bced505dea4afcd00f765ae38b"
|
"sha256:d6d94554abc82ca37e447c3d28958f5ac39bd7d4adaa285543ae97fb1129fd69"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.0.8"
|
"version": "==2.0.9"
|
||||||
},
|
},
|
||||||
"django-cors-headers": {
|
"django-cors-headers": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -130,11 +157,11 @@
|
|||||||
},
|
},
|
||||||
"django-extensions": {
|
"django-extensions": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:1f626353a11479014bfe0d77e76d8f866ebca1bb5d595cb57b776230b9e0eb92",
|
"sha256:30cb6a8c7d6f75a55edf0c0c4491bd98f8264ae1616ce105f9cecac4387edd07",
|
||||||
"sha256:f21b898598a1628cb73017fb9672e2c5e624133be9764f0eb138e0abf8a62b62"
|
"sha256:4ad86a7a5e84f1c77db030761ae87a600647250c652030a2b71a16e87f3a3d62"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.1.2"
|
"version": "==2.1.3"
|
||||||
},
|
},
|
||||||
"django-filter": {
|
"django-filter": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -146,11 +173,11 @@
|
|||||||
},
|
},
|
||||||
"djangorestframework": {
|
"djangorestframework": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:b6714c3e4b0f8d524f193c91ecf5f5450092c2145439ac2769711f7eba89a9d9",
|
"sha256:607865b0bb1598b153793892101d881466bd5a991de12bd6229abb18b1c86136",
|
||||||
"sha256:c375e4f95a3a64fccac412e36fb42ba36881e52313ec021ef410b40f67cddca4"
|
"sha256:63f76cbe1e7d12b94c357d7e54401103b2e52aef0f7c1650d6c820ad708776e5"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==3.8.2"
|
"version": "==3.9.0"
|
||||||
},
|
},
|
||||||
"docopt": {
|
"docopt": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -158,12 +185,19 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.6.2"
|
"version": "==0.6.2"
|
||||||
},
|
},
|
||||||
|
"docutils": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
|
||||||
|
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
|
||||||
|
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
|
||||||
|
],
|
||||||
|
"version": "==0.14"
|
||||||
|
},
|
||||||
"execnet": {
|
"execnet": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
|
"sha256:a7a84d5fa07a089186a329528f127c9d73b9de57f1a1131b82bb5320ee651f6a",
|
||||||
"sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
|
"sha256:fc155a6b553c66c838d1a22dba1dc9f5f505c43285a878c6f74a79c024750b83"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.5.0"
|
"version": "==1.5.0"
|
||||||
},
|
},
|
||||||
"factory-boy": {
|
"factory-boy": {
|
||||||
@@ -176,11 +210,17 @@
|
|||||||
},
|
},
|
||||||
"faker": {
|
"faker": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:ea7cfd3aeb1544732d08bd9cfba40c5b78e3a91e17b1a0698ab81bfc5554c628",
|
"sha256:2621643b80a10b91999925cfd20f64d2b36f20bf22136bbdc749bb57d6ffe124",
|
||||||
"sha256:f6d67f04abfb2b4bea7afc7fa6c18cf4c523a67956e455668be9ae42bccc21ad"
|
"sha256:5ed822d31bd2d6edf10944d176d30dc9c886afdd381eefb7ba8b7aad86171646"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'",
|
"version": "==0.9.2"
|
||||||
"version": "==0.9.0"
|
},
|
||||||
|
"filelock": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:b8d5ca5ca1c815e1574aee746650ea7301de63d87935b3463d26368b76e31633",
|
||||||
|
"sha256:d610c1bb404daf85976d7a82eb2ada120f04671007266b708606565dd03b5be6"
|
||||||
|
],
|
||||||
|
"version": "==3.0.10"
|
||||||
},
|
},
|
||||||
"filemagic": {
|
"filemagic": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -190,12 +230,14 @@
|
|||||||
"version": "==1.6"
|
"version": "==1.6"
|
||||||
},
|
},
|
||||||
"fuzzywuzzy": {
|
"fuzzywuzzy": {
|
||||||
|
"extras": [
|
||||||
|
"speedup"
|
||||||
|
],
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f",
|
"sha256:3759bc6859daa0eecef8c82b45404bdac20c23f23136cf4c18b46b426bbc418f",
|
||||||
"sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602"
|
"sha256:5b36957ccf836e700f4468324fa80ba208990385392e217be077d5cd738ae602"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"markers": null,
|
|
||||||
"version": "==0.15.0"
|
"version": "==0.15.0"
|
||||||
},
|
},
|
||||||
"gunicorn": {
|
"gunicorn": {
|
||||||
@@ -213,6 +255,13 @@
|
|||||||
],
|
],
|
||||||
"version": "==2.7"
|
"version": "==2.7"
|
||||||
},
|
},
|
||||||
|
"imagesize": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
|
||||||
|
"sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
|
||||||
|
],
|
||||||
|
"version": "==1.1.0"
|
||||||
|
},
|
||||||
"inotify-simple": {
|
"inotify-simple": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd"
|
"sha256:fc2c10dd73278a1027d0663f2db51240af5946390f363a154361406ebdddd8dd"
|
||||||
@@ -220,6 +269,35 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.1.8"
|
"version": "==1.1.8"
|
||||||
},
|
},
|
||||||
|
"ipython": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:a5781d6934a3341a1f9acb4ea5acdc7ea0a0855e689dbe755d070ca51e995435",
|
||||||
|
"sha256:b10a7ddd03657c761fc503495bc36471c8158e3fc948573fb9fe82a7029d8efd"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==7.1.1"
|
||||||
|
},
|
||||||
|
"ipython-genutils": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||||
|
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||||
|
],
|
||||||
|
"version": "==0.2.0"
|
||||||
|
},
|
||||||
|
"jedi": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0191c447165f798e6a730285f2eee783fff81b0d3df261945ecb80983b5c3ca7",
|
||||||
|
"sha256:b7493f73a2febe0dc33d51c99b474547f7f6c0b2c8fb2b21f453eef204c12148"
|
||||||
|
],
|
||||||
|
"version": "==0.13.1"
|
||||||
|
},
|
||||||
|
"jinja2": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
|
||||||
|
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
|
||||||
|
],
|
||||||
|
"version": "==2.10"
|
||||||
|
},
|
||||||
"langdetect": {
|
"langdetect": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30"
|
"sha256:91a170d5f0ade380db809b3ba67f08e95fe6c6c8641f96d67a51ff7e98a9bf30"
|
||||||
@@ -227,6 +305,12 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.0.7"
|
"version": "==1.0.7"
|
||||||
},
|
},
|
||||||
|
"markupsafe": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
|
||||||
|
],
|
||||||
|
"version": "==1.0"
|
||||||
|
},
|
||||||
"more-itertools": {
|
"more-itertools": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
|
"sha256:c187a73da93e7a8acc0001572aebc7e3c69daf7bf6881a2cea10650bd4420092",
|
||||||
@@ -235,64 +319,106 @@
|
|||||||
],
|
],
|
||||||
"version": "==4.3.0"
|
"version": "==4.3.0"
|
||||||
},
|
},
|
||||||
|
"packaging": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
|
||||||
|
"sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
|
||||||
|
],
|
||||||
|
"version": "==18.0"
|
||||||
|
},
|
||||||
|
"parso": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
|
||||||
|
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
|
||||||
|
],
|
||||||
|
"version": "==0.3.1"
|
||||||
|
},
|
||||||
"pdftotext": {
|
"pdftotext": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:b7312302007e19fc784263a321b41682f01a582af84e14200cef53b3f4e69a50"
|
"sha256:e3ad11efe0aa22cbfc46aa1296b2ea5a52ad208b778288311f2801adef178ccb"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.1.0"
|
"version": "==2.1.1"
|
||||||
|
},
|
||||||
|
"pexpect": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
|
||||||
|
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
|
||||||
|
],
|
||||||
|
"markers": "sys_platform != 'win32'",
|
||||||
|
"version": "==4.6.0"
|
||||||
|
},
|
||||||
|
"pickleshare": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||||
|
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||||
|
],
|
||||||
|
"version": "==0.7.5"
|
||||||
},
|
},
|
||||||
"pillow": {
|
"pillow": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:00def5b638994f888d1058e4d17c86dec8e1113c3741a0a8a659039aec59a83a",
|
"sha256:00203f406818c3f45d47bb8fe7e67d3feddb8dcbbd45a289a1de7dd789226360",
|
||||||
"sha256:026449b64e559226cdb8e6d8c931b5965d8fc90ec18ebbb0baa04c5b36503c72",
|
"sha256:0616f800f348664e694dddb0b0c88d26761dd5e9f34e1ed7b7a7d2da14b40cb7",
|
||||||
"sha256:03dbb224ee196ef30ed2156d41b579143e1efeb422974719a5392fc035e4f574",
|
"sha256:1f7908aab90c92ad85af9d2fec5fc79456a89b3adcc26314d2cde0e238bd789e",
|
||||||
"sha256:03eb0e04f929c102ae24bc436bf1c0c60a4e63b07ebd388e84d8b219df3e6acd",
|
"sha256:2ea3517cd5779843de8a759c2349a3cd8d3893e03ab47053b66d5ec6f8bc4f93",
|
||||||
"sha256:1be66b9a89e367e7d20d6cae419794997921fe105090fafd86ef39e20a3baab2",
|
"sha256:48a9f0538c91fc136b3a576bee0e7cd174773dc9920b310c21dcb5519722e82c",
|
||||||
"sha256:1e977a3ed998a599bda5021fb2c2889060617627d3ae228297a529a082a3cd5c",
|
"sha256:5280ebc42641a1283b7b1f2c20e5b936692198b9dd9995527c18b794850be1a8",
|
||||||
"sha256:22cf3406d135cfcc13ec6228ade774c8461e125c940e80455f500638429be273",
|
"sha256:5e34e4b5764af65551647f5cc67cf5198c1d05621781d5173b342e5e55bf023b",
|
||||||
"sha256:24adccf1e834f82718c7fc8e3ec1093738da95144b8b1e44c99d5fc7d3e9c554",
|
"sha256:63b120421ab85cad909792583f83b6ca3584610c2fe70751e23f606a3c2e87f0",
|
||||||
"sha256:2a3e362c97a5e6a259ee9cd66553292a1f8928a5bdfa3622fdb1501570834612",
|
"sha256:696b5e0109fe368d0057f484e2e91717b49a03f1e310f857f133a4acec9f91dd",
|
||||||
"sha256:3832e26ecbc9d8a500821e3a1d3765bda99d04ae29ffbb2efba49f5f788dc934",
|
"sha256:870ed021a42b1b02b5fe4a739ea735f671a84128c0a666c705db2cb9abd528eb",
|
||||||
"sha256:4fd1f0c2dc02aaec729d91c92cd85a2df0289d88e9f68d1e8faba750bb9c4786",
|
"sha256:916da1c19e4012d06a372127d7140dae894806fad67ef44330e5600d77833581",
|
||||||
"sha256:4fda62030f2c515b6e2e673c57caa55cb04026a81968f3128aae10fc28e5cc27",
|
"sha256:9303a289fa0811e1c6abd9ddebfc770556d7c3311cb2b32eff72164ddc49bc64",
|
||||||
"sha256:5044d75a68b49ce36a813c82d8201384207112d5d81643937fc758c05302f05b",
|
"sha256:9577888ecc0ad7d06c3746afaba339c94d62b59da16f7a5d1cff9e491f23dace",
|
||||||
"sha256:522184556921512ec484cb93bd84e0bab915d0ac5a372d49571c241a7f73db62",
|
"sha256:987e1c94a33c93d9b209315bfda9faa54b8edfce6438a1e93ae866ba20de5956",
|
||||||
"sha256:5914cff11f3e920626da48e564be6818831713a3087586302444b9c70e8552d9",
|
"sha256:99a3bbdbb844f4fb5d6dd59fac836a40749781c1fa63c563bc216c27aef63f60",
|
||||||
"sha256:6661a7908d68c4a133e03dac8178287aa20a99f841ea90beeb98a233ae3fd710",
|
"sha256:99db8dc3097ceafbcff9cb2bff384b974795edeb11d167d391a02c7bfeeb6e16",
|
||||||
"sha256:79258a8df3e309a54c7ef2ef4a59bb8e28f7e4a8992a3ad17c24b1889ced44f3",
|
"sha256:a5a96cf49eb580756a44ecf12949e52f211e20bffbf5a95760ac14b1e499cd37",
|
||||||
"sha256:7d74c20b8f1c3e99d3f781d3b8ff5abfefdd7363d61e23bdeba9992ff32cc4b4",
|
"sha256:aa6ca3eb56704cdc0d876fc6047ffd5ee960caad52452fbee0f99908a141a0ae",
|
||||||
"sha256:81918afeafc16ba5d9d0d4e9445905f21aac969a4ebb6f2bff4b9886da100f4b",
|
"sha256:aade5e66795c94e4a2b2624affeea8979648d1b0ae3fcee17e74e2c647fc4a8a",
|
||||||
"sha256:8194d913ca1f459377c8a4ed8f9b7ad750068b8e0e3f3f9c6963fcc87a84515f",
|
"sha256:b78905860336c1d292409e3df6ad39cc1f1c7f0964e66844bbc2ebfca434d073",
|
||||||
"sha256:84d5d31200b11b3c76fab853b89ac898bf2d05c8b3da07c1fcc23feb06359d6e",
|
"sha256:b92f521cdc4e4a3041cc343625b699f20b0b5f976793fb45681aac1efda565f8",
|
||||||
"sha256:989981db57abffb52026b114c9a1f114c7142860a6d30a352d28f8cbf186500b",
|
"sha256:bfde84bbd6ae5f782206d454b67b7ee8f7f818c29b99fd02bf022fd33bab14cb",
|
||||||
"sha256:a3d7511d3fad1618a82299aab71a5fceee5c015653a77ffea75ced9ef917e71a",
|
"sha256:c2b62d3df80e694c0e4a0ed47754c9480521e25642251b3ab1dff050a4e60409",
|
||||||
"sha256:b3ef168d4d6fd4fa6685aef7c91400f59f7ab1c0da734541f7031699741fb23f",
|
"sha256:c5e2be6c263b64f6f7656e23e18a4a9980cffc671442795682e8c4e4f815dd9f",
|
||||||
"sha256:c1c5792b6e74bbf2af0f8e892272c2a6c48efa895903211f11b8342e03129fea",
|
"sha256:c99aa3c63104e0818ec566f8ff3942fb7c7a8f35f9912cb63fd8e12318b214b2",
|
||||||
"sha256:c5dcb5a56aebb8a8f2585042b2f5c496d7624f0bcfe248f0cc33ceb2fd8d39e7",
|
"sha256:dae06620d3978da346375ebf88b9e2dd7d151335ba668c995aea9ed07af7add4",
|
||||||
"sha256:e2bed4a04e2ca1050bb5f00865cf2f83c0b92fd62454d9244f690fcd842e27a4",
|
"sha256:db5499d0710823fa4fb88206050d46544e8f0e0136a9a5f5570b026584c8fd74",
|
||||||
"sha256:e87a527c06319428007e8c30511e1f0ce035cb7f14bb4793b003ed532c3b9333",
|
"sha256:f36baafd82119c4a114b9518202f2a983819101dcc14b26e43fc12cbefdce00e",
|
||||||
"sha256:f63e420180cbe22ff6e32558b612e75f50616fc111c5e095a4631946c782e109",
|
"sha256:f52b79c8796d81391ab295b04e520bda6feed54d54931708872e8f9ae9db0ea1",
|
||||||
"sha256:f8b3d413c5a8f84b12cd4c5df1d8e211777c9852c6be3ee9c094b626644d3eab"
|
"sha256:ff8cff01582fa1a7e533cb97f628531c4014af4b5f38e33cdcfe5eec29b6d888"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==5.2.0"
|
"version": "==5.3.0"
|
||||||
},
|
},
|
||||||
"pluggy": {
|
"pluggy": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
|
"sha256:447ba94990e8014ee25ec853339faf7b0fc8050cdc3289d4d71f7f410fb90095",
|
||||||
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
|
"sha256:bde19360a8ec4dfd8a20dcb811780a30998101f078fc7ded6162f0076f50508f"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
"version": "==0.8.0"
|
||||||
"version": "==0.7.1"
|
},
|
||||||
|
"prompt-toolkit": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:c1d6aff5252ab2ef391c2fe498ed8c088066f66bc64a8d5c095bbf795d9fec34",
|
||||||
|
"sha256:d4c47f79b635a0e70b84fdb97ebd9a274203706b1ee5ed44c10da62755cf3ec9",
|
||||||
|
"sha256:fd17048d8335c1e6d5ee403c3569953ba3eb8555d710bfc548faf0712666ea39"
|
||||||
|
],
|
||||||
|
"version": "==2.0.7"
|
||||||
|
},
|
||||||
|
"ptyprocess": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||||
|
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||||
|
],
|
||||||
|
"version": "==0.6.0"
|
||||||
},
|
},
|
||||||
"py": {
|
"py": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
|
"sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
|
||||||
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
|
"sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
"version": "==1.7.0"
|
||||||
"version": "==1.6.0"
|
|
||||||
},
|
},
|
||||||
"pycodestyle": {
|
"pycodestyle": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -302,6 +428,13 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.4.0"
|
"version": "==2.4.0"
|
||||||
},
|
},
|
||||||
|
"pygments": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
||||||
|
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
|
||||||
|
],
|
||||||
|
"version": "==2.2.0"
|
||||||
|
},
|
||||||
"pyocr": {
|
"pyocr": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
|
"sha256:b6ba6263fd92da56627dff6d263d991a2246aacd117d1788f11b93f419ca395f"
|
||||||
@@ -309,13 +442,20 @@
|
|||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==0.5.3"
|
"version": "==0.5.3"
|
||||||
},
|
},
|
||||||
|
"pyparsing": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
|
||||||
|
"sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
|
||||||
|
],
|
||||||
|
"version": "==2.3.0"
|
||||||
|
},
|
||||||
"pytest": {
|
"pytest": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:453cbbbe5ce6db38717d282b758b917de84802af4288910c12442984bde7b823",
|
"sha256:a9e5e8d7ab9d5b0747f37740276eb362e6a76275d76cebbb52c6049d93b475db",
|
||||||
"sha256:a8a07f84e680482eb51e244370aaf2caa6301ef265f37c2bdefb3dd3b663f99d"
|
"sha256:bf47e8ed20d03764f963f0070ff1c8fda6e2671fc5dd562a4d3b7148ad60f5ca"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==3.8.0"
|
"version": "==3.9.3"
|
||||||
},
|
},
|
||||||
"pytest-cov": {
|
"pytest-cov": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -327,11 +467,11 @@
|
|||||||
},
|
},
|
||||||
"pytest-django": {
|
"pytest-django": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:2d2e0a618d91c280d463e90bcbea9b4e417609157f611a79685b1c561c4c0836",
|
"sha256:49e9ffc856bc6a1bec1c26c5c7b7213dff7cc8bc6b64d624c4d143d04aff0bcf",
|
||||||
"sha256:59683def396923b78d7e191a7086a48193f8d5db869ace79acb38f906522bc7b"
|
"sha256:b379282feaf89069cb790775ab6bbbd2bd2038a68c7ef9b84a41898e0b551081"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==3.4.2"
|
"version": "==3.4.3"
|
||||||
},
|
},
|
||||||
"pytest-env": {
|
"pytest-env": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -345,7 +485,6 @@
|
|||||||
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
|
"sha256:e4500cd0509ec4a26535f7d4112a8cc0f17d3a41c29ffd4eab479d2a55b30805",
|
||||||
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
|
"sha256:f275cb48a73fc61a6710726348e1da6d68a978f0ec0c54ece5a5fae5977e5a08"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==0.2"
|
"version": "==0.2"
|
||||||
},
|
},
|
||||||
"pytest-sugar": {
|
"pytest-sugar": {
|
||||||
@@ -357,19 +496,19 @@
|
|||||||
},
|
},
|
||||||
"pytest-xdist": {
|
"pytest-xdist": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:0875deac20f6d96597036bdf63970887a6f36d28289c2f6682faf652dfea687b",
|
"sha256:3bc9dcb6ff47e607d3c710727cd9996fd7ac1466d405c3b40bb495da99b6b669",
|
||||||
"sha256:28e25e79698b2662b648319d3971c0f9ae0e6500f88258ccb9b153c31110ba9b"
|
"sha256:8e188d13ce6614c7a678179a76f46231199ffdfe6163de031c17e62ffa256917"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.23.0"
|
"version": "==1.24.0"
|
||||||
},
|
},
|
||||||
"python-dateutil": {
|
"python-dateutil": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0",
|
"sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
|
||||||
"sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8"
|
"sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2.7.3"
|
"version": "==2.7.5"
|
||||||
},
|
},
|
||||||
"python-dotenv": {
|
"python-dotenv": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -391,273 +530,37 @@
|
|||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
|
"sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
|
||||||
],
|
],
|
||||||
|
"markers": "extra == 'speedup'",
|
||||||
"version": "==0.12.0"
|
"version": "==0.12.0"
|
||||||
},
|
},
|
||||||
"pytz": {
|
"pytz": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
|
"sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
|
||||||
"sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
|
"sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==2018.5"
|
"version": "==2018.7"
|
||||||
},
|
},
|
||||||
"regex": {
|
"regex": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:22d7ef8c2df344328a8a3c61edade2ee714e5de9360911d22a9213931c769faa",
|
"sha256:0ef96690c3d2294155b7d44187ca4a151e45c931cb768e106ba464a9fa64c5da",
|
||||||
"sha256:3a699780c6b712c67dc23207b129ccc6a7e1270233f7aadead3ea3f83c893702",
|
"sha256:251683e01a3bcacd9188acf0d4caf7b29a3b963c843159311825613ae144cddb",
|
||||||
"sha256:42f460d349baebd5faec02a0c920988fb0300b24baf898d9c139886565b66b6c",
|
"sha256:3fe15a75fe00f04d1ec16713d55cf1e206077c450267a10b33318756fb8b3f99",
|
||||||
"sha256:43bf3d79940cbdf19adda838d8b26b28b47bec793cda46590b5b25703742f440",
|
"sha256:53a962f9dc28cdf403978a142cb1e054479759ad64d312a999f9f042c25b5c9a",
|
||||||
"sha256:47d6c7f0588ef33464e00023067c4e7cce68e0d6a686a73c7ee15abfdad503d4",
|
"sha256:8bd1da6a93d32336a5e5432886dd8543004f0591c39b83dbfa60705cccdf414d",
|
||||||
"sha256:5b879f59f25ed9b91bc8693a9a994014b431f224f492519ad0255ce6b54b83e5",
|
"sha256:b5423061918f602e9342b54d746ac31c598d328ecaf4ef0618763e960c926fd4",
|
||||||
"sha256:8ba0093c412900f636b0f826c597a0c3ea0e395344bc99894ddefe88b76c9c7e",
|
"sha256:d80ebc65b1f7d0403117f59309c16eac24be6a0bc730b593a79f703462858d94",
|
||||||
"sha256:a4789254a1a0bd7a637036cce0b7ed72d8cc864e93f2e9cfd10ac00ae27bb7b0",
|
"sha256:fd8419979639b7de7fb964a13bce3ac47e6fe33043b83de0398c3067986e5659",
|
||||||
"sha256:b73cea07117dca888b0c3671770b501bef19aac9c45c8ffdb5bea2cca2377b0a",
|
"sha256:ff2f15b2b0b4b58ba8a1de651780a0d3fd54f96ad6b77dceb77695220e5d7b7a"
|
||||||
"sha256:d3eb59fa3e5b5438438ec97acd9dc86f077428e020b015b43987e35bea68ef4c",
|
|
||||||
"sha256:d51d232b4e2f106deaf286001f563947fee255bc5bd209a696f027e15cf0a1e7",
|
|
||||||
"sha256:d59b03131a8e35061b47a8f186324a95eaf30d5f6ee9cc0637e7b87d29c7c9b5",
|
|
||||||
"sha256:dd705df1b47470388fc4630e4df3cbbe7677e2ab80092a1c660cae630a307b2d",
|
|
||||||
"sha256:e87fffa437a4b00afb17af785da9b01618425d6cd984c677639deb937037d8f2",
|
|
||||||
"sha256:ed40e0474ab5ab228a8d133759d451b31d3ccdebaff698646e54aff82c3de4f8"
|
|
||||||
],
|
],
|
||||||
"version": "==2018.8.29"
|
"version": "==2018.11.2"
|
||||||
},
|
},
|
||||||
"requests": {
|
"requests": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
|
"sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
|
||||||
"sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
|
"sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
|
||||||
],
|
],
|
||||||
"version": "==2.19.1"
|
"version": "==2.20.0"
|
||||||
},
|
|
||||||
"six": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
|
|
||||||
"sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
|
|
||||||
],
|
|
||||||
"version": "==1.11.0"
|
|
||||||
},
|
|
||||||
"termcolor": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
|
|
||||||
],
|
|
||||||
"version": "==1.1.0"
|
|
||||||
},
|
|
||||||
"text-unidecode": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
|
|
||||||
"sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
|
|
||||||
],
|
|
||||||
"version": "==1.2"
|
|
||||||
},
|
|
||||||
"tzlocal": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
|
|
||||||
],
|
|
||||||
"version": "==1.5.1"
|
|
||||||
},
|
|
||||||
"urllib3": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
|
||||||
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
|
||||||
],
|
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.23"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"develop": {
|
|
||||||
"alabaster": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
|
|
||||||
"sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
|
|
||||||
],
|
|
||||||
"version": "==0.7.11"
|
|
||||||
},
|
|
||||||
"babel": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
|
|
||||||
"sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
|
|
||||||
],
|
|
||||||
"version": "==2.6.0"
|
|
||||||
},
|
|
||||||
"backcall": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
|
||||||
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
|
||||||
],
|
|
||||||
"version": "==0.1.0"
|
|
||||||
},
|
|
||||||
"certifi": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638",
|
|
||||||
"sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a"
|
|
||||||
],
|
|
||||||
"version": "==2018.8.24"
|
|
||||||
},
|
|
||||||
"chardet": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
|
|
||||||
"sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
|
|
||||||
],
|
|
||||||
"version": "==3.0.4"
|
|
||||||
},
|
|
||||||
"decorator": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
|
|
||||||
"sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
|
|
||||||
],
|
|
||||||
"version": "==4.3.0"
|
|
||||||
},
|
|
||||||
"docutils": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
|
|
||||||
"sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
|
|
||||||
"sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
|
|
||||||
],
|
|
||||||
"version": "==0.14"
|
|
||||||
},
|
|
||||||
"idna": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
|
|
||||||
"sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
|
|
||||||
],
|
|
||||||
"version": "==2.7"
|
|
||||||
},
|
|
||||||
"imagesize": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
|
|
||||||
"sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
|
|
||||||
],
|
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.1.0"
|
|
||||||
},
|
|
||||||
"ipython": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62",
|
|
||||||
"sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4"
|
|
||||||
],
|
|
||||||
"index": "pypi",
|
|
||||||
"version": "==6.5.0"
|
|
||||||
},
|
|
||||||
"ipython-genutils": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
|
||||||
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
|
||||||
],
|
|
||||||
"version": "==0.2.0"
|
|
||||||
},
|
|
||||||
"jedi": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:b409ed0f6913a701ed474a614a3bb46e6953639033e31f769ca7581da5bd1ec1",
|
|
||||||
"sha256:c254b135fb39ad76e78d4d8f92765ebc9bf92cbc76f49e97ade1d5f5121e1f6f"
|
|
||||||
],
|
|
||||||
"version": "==0.12.1"
|
|
||||||
},
|
|
||||||
"jinja2": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
|
|
||||||
"sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
|
|
||||||
],
|
|
||||||
"version": "==2.10"
|
|
||||||
},
|
|
||||||
"markupsafe": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
|
|
||||||
],
|
|
||||||
"version": "==1.0"
|
|
||||||
},
|
|
||||||
"packaging": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
|
|
||||||
"sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
|
|
||||||
],
|
|
||||||
"version": "==17.1"
|
|
||||||
},
|
|
||||||
"parso": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2",
|
|
||||||
"sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24"
|
|
||||||
],
|
|
||||||
"version": "==0.3.1"
|
|
||||||
},
|
|
||||||
"pexpect": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
|
|
||||||
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
|
|
||||||
],
|
|
||||||
"markers": "sys_platform != 'win32'",
|
|
||||||
"version": "==4.6.0"
|
|
||||||
},
|
|
||||||
"pickleshare": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b",
|
|
||||||
"sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5"
|
|
||||||
],
|
|
||||||
"version": "==0.7.4"
|
|
||||||
},
|
|
||||||
"pluggy": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:6e3836e39f4d36ae72840833db137f7b7d35105079aee6ec4a62d9f80d594dd1",
|
|
||||||
"sha256:95eb8364a4708392bae89035f45341871286a333f749c3141c20573d2b3876e1"
|
|
||||||
],
|
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==0.7.1"
|
|
||||||
},
|
|
||||||
"prompt-toolkit": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381",
|
|
||||||
"sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4",
|
|
||||||
"sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917"
|
|
||||||
],
|
|
||||||
"version": "==1.0.15"
|
|
||||||
},
|
|
||||||
"ptyprocess": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
|
||||||
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
|
||||||
],
|
|
||||||
"version": "==0.6.0"
|
|
||||||
},
|
|
||||||
"py": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:06a30435d058473046be836d3fc4f27167fd84c45b99704f2fb5509ef61f9af1",
|
|
||||||
"sha256:50402e9d1c9005d759426988a492e0edaadb7f4e68bcddfea586bc7432d009c6"
|
|
||||||
],
|
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.6.0"
|
|
||||||
},
|
|
||||||
"pygments": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
|
|
||||||
"sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
|
|
||||||
],
|
|
||||||
"version": "==2.2.0"
|
|
||||||
},
|
|
||||||
"pyparsing": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:0832bcf47acd283788593e7a0f542407bd9550a55a8a8435214a1960e04bcb04",
|
|
||||||
"sha256:fee43f17a9c4087e7ed1605bd6df994c6173c1e977d7ade7b651292fab2bd010"
|
|
||||||
],
|
|
||||||
"version": "==2.2.0"
|
|
||||||
},
|
|
||||||
"pytz": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:a061aa0a9e06881eb8b3b2b43f05b9439d6583c206d0a6c340ff72a7b6669053",
|
|
||||||
"sha256:ffb9ef1de172603304d9d2819af6f5ece76f2e85ec10692a524dd876e72bf277"
|
|
||||||
],
|
|
||||||
"index": "pypi",
|
|
||||||
"version": "==2018.5"
|
|
||||||
},
|
|
||||||
"requests": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1",
|
|
||||||
"sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a"
|
|
||||||
],
|
|
||||||
"version": "==2.19.1"
|
|
||||||
},
|
|
||||||
"simplegeneric": {
|
|
||||||
"hashes": [
|
|
||||||
"sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173"
|
|
||||||
],
|
|
||||||
"version": "==0.8.1"
|
|
||||||
},
|
},
|
||||||
"six": {
|
"six": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -675,27 +578,46 @@
|
|||||||
},
|
},
|
||||||
"sphinx": {
|
"sphinx": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:217a7705adcb573da5bbe1e0f5cab4fa0bd89fd9342c9159121746f593c2d5a4",
|
"sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
|
||||||
"sha256:a602513f385f1d5785ff1ca420d9c7eb1a1b63381733b2f0ea8188a391314a86"
|
"sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.7.9"
|
"version": "==1.8.1"
|
||||||
},
|
},
|
||||||
"sphinxcontrib-websupport": {
|
"sphinxcontrib-websupport": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
|
"sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
|
||||||
"sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
|
"sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
|
||||||
],
|
],
|
||||||
"markers": "python_version != '3.2.*' and python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.1.*' and python_version != '3.3.*'",
|
|
||||||
"version": "==1.1.0"
|
"version": "==1.1.0"
|
||||||
},
|
},
|
||||||
|
"termcolor": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
|
||||||
|
],
|
||||||
|
"version": "==1.1.0"
|
||||||
|
},
|
||||||
|
"text-unidecode": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5a1375bb2ba7968740508ae38d92e1f889a0832913cb1c447d5e2046061a396d",
|
||||||
|
"sha256:801e38bd550b943563660a91de8d4b6fa5df60a542be9093f7abf819f86050cc"
|
||||||
|
],
|
||||||
|
"version": "==1.2"
|
||||||
|
},
|
||||||
|
"toml": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
|
||||||
|
"sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
|
||||||
|
],
|
||||||
|
"version": "==0.10.0"
|
||||||
|
},
|
||||||
"tox": {
|
"tox": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:37cf240781b662fb790710c6998527e65ca6851eace84d1595ee71f7af4e85f7",
|
"sha256:513e32fdf2f9e2d583c2f248f47ba9886428c949f068ac54a0469cac55df5862",
|
||||||
"sha256:eb61aa5bcce65325538686f09848f04ef679b5cd9b83cc491272099b28739600"
|
"sha256:75fa30e8329b41b664585f5fb837e23ce1d7e6fa1f7811f2be571c990f9d911b"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==3.2.1"
|
"version": "==3.5.3"
|
||||||
},
|
},
|
||||||
"traitlets": {
|
"traitlets": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -704,21 +626,25 @@
|
|||||||
],
|
],
|
||||||
"version": "==4.3.2"
|
"version": "==4.3.2"
|
||||||
},
|
},
|
||||||
|
"tzlocal": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:4ebeb848845ac898da6519b9b31879cf13b6626f7184c496037b818e238f2c4e"
|
||||||
|
],
|
||||||
|
"version": "==1.5.1"
|
||||||
|
},
|
||||||
"urllib3": {
|
"urllib3": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf",
|
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
|
||||||
"sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5"
|
"sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.6' and python_version != '3.2.*' and python_version != '3.0.*' and python_version != '3.1.*' and python_version < '4' and python_version != '3.3.*'",
|
"version": "==1.24.1"
|
||||||
"version": "==1.23"
|
|
||||||
},
|
},
|
||||||
"virtualenv": {
|
"virtualenv": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:2ce32cd126117ce2c539f0134eb89de91a8413a29baac49cbab3eb50e2026669",
|
"sha256:686176c23a538ecc56d27ed9d5217abd34644823d6391cbeb232f42bf722baad",
|
||||||
"sha256:ca07b4c0b54e14a91af9f34d0919790b016923d157afda5efdde55c96718f752"
|
"sha256:f899fafcd92e1150f40c8215328be38ff24b519cd95357fa6e78e006c7638208"
|
||||||
],
|
],
|
||||||
"markers": "python_version >= '2.7' and python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*'",
|
"version": "==16.1.0"
|
||||||
"version": "==16.0.0"
|
|
||||||
},
|
},
|
||||||
"wcwidth": {
|
"wcwidth": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -727,5 +653,6 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.1.7"
|
"version": "==0.1.7"
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"develop": {}
|
||||||
}
|
}
|
||||||
|
|||||||
20
Vagrantfile
vendored
20
Vagrantfile
vendored
@@ -1,20 +0,0 @@
|
|||||||
# -*- mode: ruby -*-
|
|
||||||
# vi: set ft=ruby :
|
|
||||||
|
|
||||||
VAGRANT_API_VERSION = "2"
|
|
||||||
Vagrant.configure(VAGRANT_API_VERSION) do |config|
|
|
||||||
config.vm.box = "ubuntu/trusty64"
|
|
||||||
|
|
||||||
# Provision using shell
|
|
||||||
config.vm.host_name = "dev.paperless"
|
|
||||||
config.vm.synced_folder ".", "/opt/paperless"
|
|
||||||
config.vm.provision "shell", path: "scripts/vagrant-provision"
|
|
||||||
|
|
||||||
# Networking details
|
|
||||||
config.vm.network "private_network", ip: "172.28.128.4"
|
|
||||||
|
|
||||||
config.vm.provider "virtualbox" do |vb|
|
|
||||||
# Customize the amount of memory on the VM:
|
|
||||||
vb.memory = "1024"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
@@ -1,6 +1,27 @@
|
|||||||
Changelog
|
Changelog
|
||||||
#########
|
#########
|
||||||
|
|
||||||
|
2.6.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||||
|
the problem in `#433`_.
|
||||||
|
* Fix the ``RecentCorrespondentsFilter`` correspondents filter that was added
|
||||||
|
in 2.4 to play nice with the defaults. Thanks to `tsia`_ and `Sblop`_ who
|
||||||
|
pointed this out. `#423`_.
|
||||||
|
* Updated dependencies to include (among other things) a security patch to
|
||||||
|
requests.
|
||||||
|
* Fix text in sample data for tests so that the language guesser stops thinking
|
||||||
|
that everything is in Catalan because we had *Lorem ipsum* in there.
|
||||||
|
* Tweaked the gunicorn sample command to use filesystem paths instead of Python
|
||||||
|
paths. `#441`_
|
||||||
|
* Added pretty colour boxes next to the hex values in the Tags section, thanks
|
||||||
|
to a pull request from `Joshua Taillon`_ `#442`_.
|
||||||
|
* Added a ``.editorconfig`` file to better specify coding style.
|
||||||
|
* `Joshua Taillon`_ also added some logic to tie Paperless' date guessing logic
|
||||||
|
into how it parses file names on import. `#440`_
|
||||||
|
|
||||||
|
|
||||||
2.5.0
|
2.5.0
|
||||||
=====
|
=====
|
||||||
|
|
||||||
@@ -44,6 +65,7 @@ Changelog
|
|||||||
* The ``get_date()`` functionality of the parsers has been consolidated onto
|
* The ``get_date()`` functionality of the parsers has been consolidated onto
|
||||||
the ``DocumentParser`` class since much of that code was redundant anyway.
|
the ``DocumentParser`` class since much of that code was redundant anyway.
|
||||||
|
|
||||||
|
|
||||||
2.4.0
|
2.4.0
|
||||||
=====
|
=====
|
||||||
|
|
||||||
@@ -55,13 +77,13 @@ Changelog
|
|||||||
It's now in the import step that we decide the storage type. This allows you
|
It's now in the import step that we decide the storage type. This allows you
|
||||||
to export from an encrypted system and import into an unencrypted one, or
|
to export from an encrypted system and import into an unencrypted one, or
|
||||||
vice-versa.
|
vice-versa.
|
||||||
* The migration history has been slightly modified to accomodate PostgreSQL
|
* The migration history has been slightly modified to accommodate PostgreSQL
|
||||||
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
users. Additionally, you can now tell paperless to use PostgreSQL simply by
|
||||||
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
declaring ``PAPERLESS_DBUSER`` in your environment. This will attempt to
|
||||||
connect to your Postgres database without a password unless you also set
|
connect to your Postgres database without a password unless you also set
|
||||||
``PAPERLESS_DBPASS``.
|
``PAPERLESS_DBPASS``.
|
||||||
* A bug was found in the REST API filter system that was the result of an
|
* A bug was found in the REST API filter system that was the result of an
|
||||||
update of django-filter some time ago. This has now been patched `#412`_.
|
update of django-filter some time ago. This has now been patched in `#412`_.
|
||||||
Thanks to `thepill`_ for spotting it!
|
Thanks to `thepill`_ for spotting it!
|
||||||
|
|
||||||
|
|
||||||
@@ -570,6 +592,9 @@ bulk of the work on this big change.
|
|||||||
.. _thepill: https://github.com/thepill
|
.. _thepill: https://github.com/thepill
|
||||||
.. _Andrew Peng: https://github.com/pengc99
|
.. _Andrew Peng: https://github.com/pengc99
|
||||||
.. _euri10: https://github.com/euri10
|
.. _euri10: https://github.com/euri10
|
||||||
|
.. _Ulli: https://github.com/Ulli2k
|
||||||
|
.. _tsia: https://github.com/tsia
|
||||||
|
.. _Sblop: https://github.com/Sblop
|
||||||
|
|
||||||
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
.. _#20: https://github.com/danielquinn/paperless/issues/20
|
||||||
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
.. _#44: https://github.com/danielquinn/paperless/issues/44
|
||||||
@@ -664,6 +689,11 @@ bulk of the work on this big change.
|
|||||||
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
.. _#412: https://github.com/danielquinn/paperless/issues/412
|
||||||
.. _#413: https://github.com/danielquinn/paperless/pull/413
|
.. _#413: https://github.com/danielquinn/paperless/pull/413
|
||||||
.. _#414: https://github.com/danielquinn/paperless/issues/414
|
.. _#414: https://github.com/danielquinn/paperless/issues/414
|
||||||
|
.. _#423: https://github.com/danielquinn/paperless/issues/423
|
||||||
|
.. _#433: https://github.com/danielquinn/paperless/issues/433
|
||||||
|
.. _#440: https://github.com/danielquinn/paperless/pull/440
|
||||||
|
.. _#441: https://github.com/danielquinn/paperless/pull/441
|
||||||
|
.. _#442: https://github.com/danielquinn/paperless/pull/442
|
||||||
|
|
||||||
.. _pipenv: https://docs.pipenv.org/
|
.. _pipenv: https://docs.pipenv.org/
|
||||||
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
.. _a new home on Docker Hub: https://hub.docker.com/r/danielquinn/paperless/
|
||||||
|
|||||||
15
docs/changelog_jonaswinkler.rst
Normal file
15
docs/changelog_jonaswinkler.rst
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
Changelog (jonaswinkler)
|
||||||
|
########################
|
||||||
|
|
||||||
|
1.0.0
|
||||||
|
=====
|
||||||
|
|
||||||
|
* First release based on paperless 2.6.0
|
||||||
|
* Added: Automatic document classification using neural networks (replaces
|
||||||
|
regex-based tagging)
|
||||||
|
* Added: Document types
|
||||||
|
* Added: Archive serial number allows easy referencing of physical document
|
||||||
|
copies
|
||||||
|
* Added: Inbox tags (added automatically to newly consumed documents)
|
||||||
|
* Added: Document viewer on document edit page
|
||||||
|
* Database backend is now configurable
|
||||||
@@ -43,6 +43,16 @@ These however wouldn't work:
|
|||||||
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
* ``Some Company Name, Invoice 2016-01-01, money, invoices.pdf``
|
||||||
* ``Another Company- Letter of Reference.jpg``
|
* ``Another Company- Letter of Reference.jpg``
|
||||||
|
|
||||||
|
Do I have to be so strict about naming?
|
||||||
|
---------------------------------------
|
||||||
|
Rather than using the strict document naming rules, one can also set the option
|
||||||
|
``PAPERLESS_FILENAME_DATE_ORDER`` in ``paperless.conf`` to any date order
|
||||||
|
that is accepted by dateparser_. Doing so will cause ``paperless`` to default
|
||||||
|
to any date format that is found in the title, instead of a date pulled from
|
||||||
|
the document's text, without requiring the strict formatting of the document
|
||||||
|
filename as described above.
|
||||||
|
|
||||||
|
.. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
|
||||||
|
|
||||||
.. _guesswork-content:
|
.. _guesswork-content:
|
||||||
|
|
||||||
@@ -82,11 +92,11 @@ text and matching algorithm. From the help info there:
|
|||||||
uses a regex to match the PDF. If you don't know what a regex is, you
|
uses a regex to match the PDF. If you don't know what a regex is, you
|
||||||
probably don't want this option.
|
probably don't want this option.
|
||||||
|
|
||||||
When using the "any" or "all" matching algorithms, you can search for terms that
|
When using the "any" or "all" matching algorithms, you can search for terms
|
||||||
consist of multiple words by enclosing them in double quotes. For example, defining
|
that consist of multiple words by enclosing them in double quotes. For example,
|
||||||
a match text of ``"Bank of America" BofA`` using the "any" algorithm, will match
|
defining a match text of ``"Bank of America" BofA`` using the "any" algorithm,
|
||||||
documents that contain either "Bank of America" or "BofA", but will not match
|
will match documents that contain either "Bank of America" or "BofA", but will
|
||||||
documents containing "Bank of South America".
|
not match documents containing "Bank of South America".
|
||||||
|
|
||||||
Then just save your tag/correspondent and run another document through the
|
Then just save your tag/correspondent and run another document through the
|
||||||
consumer. Once complete, you should see the newly-created document,
|
consumer. Once complete, you should see the newly-created document,
|
||||||
|
|||||||
@@ -46,3 +46,4 @@ Contents
|
|||||||
contributing
|
contributing
|
||||||
scanners
|
scanners
|
||||||
changelog
|
changelog
|
||||||
|
changelog_jonaswinkler
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ rolled in as part of the update:
|
|||||||
|
|
||||||
$ cd /path/to/project
|
$ cd /path/to/project
|
||||||
$ git pull
|
$ git pull
|
||||||
|
$ pip install -r requirements.txt
|
||||||
$ cd src
|
$ cd src
|
||||||
$ ./manage.py migrate
|
$ ./manage.py migrate
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ In addition to the above, there are a number of Python requirements, all of
|
|||||||
which are listed in a file called ``requirements.txt`` in the project root
|
which are listed in a file called ``requirements.txt`` in the project root
|
||||||
directory.
|
directory.
|
||||||
|
|
||||||
If you're not working on a virtual environment (like Vagrant or Docker), you
|
If you're not working on a virtual environment (like Docker), you
|
||||||
should probably be using a virtualenv, but that's your call. The reasons why
|
should probably be using a virtualenv, but that's your call. The reasons why
|
||||||
you might choose a virtualenv or not aren't really within the scope of this
|
you might choose a virtualenv or not aren't really within the scope of this
|
||||||
document. Needless to say if you don't know what a virtualenv is, you should
|
document. Needless to say if you don't know what a virtualenv is, you should
|
||||||
|
|||||||
@@ -42,18 +42,14 @@ Installation & Configuration
|
|||||||
You can go multiple routes with setting up and running Paperless:
|
You can go multiple routes with setting up and running Paperless:
|
||||||
|
|
||||||
* The `bare metal route`_
|
* The `bare metal route`_
|
||||||
* The `vagrant route`_
|
|
||||||
* The `docker route`_
|
* The `docker route`_
|
||||||
|
|
||||||
|
|
||||||
The `Vagrant route`_ is quick & easy, but means you're running a VM which comes
|
The `docker route`_ is quick & easy.
|
||||||
with memory consumption, cpu overhead etc. The `docker route`_ offers the same
|
|
||||||
simplicity as Vagrant with lower resource consumption.
|
|
||||||
|
|
||||||
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
The `bare metal route`_ is a bit more complicated to setup but makes it easier
|
||||||
should you want to contribute some code back.
|
should you want to contribute some code back.
|
||||||
|
|
||||||
.. _Vagrant route: setup-installation-vagrant_
|
|
||||||
.. _docker route: setup-installation-docker_
|
.. _docker route: setup-installation-docker_
|
||||||
.. _bare metal route: setup-installation-bare-metal_
|
.. _bare metal route: setup-installation-bare-metal_
|
||||||
.. _Docker Machine: https://docs.docker.com/machine/
|
.. _Docker Machine: https://docs.docker.com/machine/
|
||||||
@@ -267,54 +263,6 @@ Docker Method
|
|||||||
newer ``docker-compose.yml.example`` file
|
newer ``docker-compose.yml.example`` file
|
||||||
|
|
||||||
|
|
||||||
.. _setup-installation-vagrant:
|
|
||||||
|
|
||||||
Vagrant Method
|
|
||||||
++++++++++++++
|
|
||||||
|
|
||||||
1. Install `Vagrant`_. How you do that is really between you and your OS.
|
|
||||||
2. Run ``vagrant up``. An instance will start up for you. When it's ready and
|
|
||||||
provisioned...
|
|
||||||
3. Run ``vagrant ssh`` and once inside your new vagrant box, edit
|
|
||||||
``/etc/paperless.conf`` and set the values for:
|
|
||||||
|
|
||||||
* ``PAPERLESS_CONSUMPTION_DIR``: This is where your documents will be
|
|
||||||
dumped to be consumed by Paperless.
|
|
||||||
* ``PAPERLESS_PASSPHRASE``: This is the passphrase Paperless uses to
|
|
||||||
encrypt/decrypt the original document. It's only required if you want
|
|
||||||
your original files to be encrypted, otherwise, just leave it unset.
|
|
||||||
* ``PAPERLESS_EMAIL_SECRET``: this is the "magic word" used when consuming
|
|
||||||
documents from mail or via the API. If you don't use either, leaving it
|
|
||||||
blank is just fine.
|
|
||||||
|
|
||||||
4. Exit the vagrant box and re-enter it with ``vagrant ssh`` again. This
|
|
||||||
updates the environment to make use of the changes you made to the config
|
|
||||||
file.
|
|
||||||
5. Initialise the database with ``/opt/paperless/src/manage.py migrate``.
|
|
||||||
6. Still inside your vagrant box, create a user for your Paperless instance
|
|
||||||
with ``/opt/paperless/src/manage.py createsuperuser``. Follow the prompts to
|
|
||||||
create your user.
|
|
||||||
7. Start the webserver with
|
|
||||||
``/opt/paperless/src/manage.py runserver 0.0.0.0:8000``. You should now be
|
|
||||||
able to visit your (empty) `Paperless webserver`_ at ``172.28.128.4:8000``.
|
|
||||||
You can login with the user/pass you created in #6.
|
|
||||||
8. In a separate window, run ``vagrant ssh`` again, but this time once inside
|
|
||||||
your vagrant instance, you should start the consumer script with
|
|
||||||
``/opt/paperless/src/manage.py document_consumer``.
|
|
||||||
9. Scan something. Put it in the ``CONSUMPTION_DIR``.
|
|
||||||
10. Wait a few minutes
|
|
||||||
11. Visit the document list on your webserver, and it should be there, indexed
|
|
||||||
and downloadable.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
This installation is not secure. Once everything is working head up to
|
|
||||||
`Making things more permanent`_
|
|
||||||
|
|
||||||
.. _Vagrant: https://vagrantup.com/
|
|
||||||
.. _Paperless server: http://172.28.128.4:8000
|
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent:
|
.. _setup-permanent:
|
||||||
|
|
||||||
Making Things a Little more Permanent
|
Making Things a Little more Permanent
|
||||||
@@ -398,7 +346,7 @@ instance listening on localhost port 8000.
|
|||||||
location /static {
|
location /static {
|
||||||
|
|
||||||
autoindex on;
|
autoindex on;
|
||||||
alias <path-to-paperless-static-directory>
|
alias <path-to-paperless-static-directory>;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -409,7 +357,7 @@ instance listening on localhost port 8000.
|
|||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
proxy_set_header X-Forwarded-Proto $scheme;
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
|
||||||
proxy_pass http://127.0.0.1:8000
|
proxy_pass http://127.0.0.1:8000;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -418,7 +366,7 @@ The gunicorn server can be started with the command:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
$ <path-to-paperless-virtual-environment>/bin/gunicorn <path-to-paperless>/src/paperless.wsgi -w 2
|
$ <path-to-paperless-virtual-environment>/bin/gunicorn --pythonpath=<path-to-paperless>/src paperless.wsgi -w 2
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent-standard-systemd:
|
.. _setup-permanent-standard-systemd:
|
||||||
@@ -475,7 +423,7 @@ after restarting your system:
|
|||||||
respawn limit 10 5
|
respawn limit 10 5
|
||||||
|
|
||||||
script
|
script
|
||||||
exec <path to paperless virtual environment>/bin/gunicorn <path to parperless>/src/paperless.wsgi -w 2
|
exec <path to paperless virtual environment>/bin/gunicorn --pythonpath=<path to parperless>/src paperless.wsgi -w 2
|
||||||
end script
|
end script
|
||||||
|
|
||||||
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
Note that you'll need to replace ``/srv/paperless/src/manage.py`` with the
|
||||||
@@ -513,13 +461,6 @@ second period.
|
|||||||
.. _Upstart: http://upstart.ubuntu.com/
|
.. _Upstart: http://upstart.ubuntu.com/
|
||||||
|
|
||||||
|
|
||||||
Vagrant
|
|
||||||
~~~~~~~
|
|
||||||
|
|
||||||
You may use the Ubuntu explanation above. Replace
|
|
||||||
``(local-filesystems and net-device-up IFACE=eth0)`` with ``vagrant-mounted``.
|
|
||||||
|
|
||||||
|
|
||||||
.. _setup-permanent-docker:
|
.. _setup-permanent-docker:
|
||||||
|
|
||||||
Docker
|
Docker
|
||||||
|
|||||||
@@ -14,9 +14,8 @@ FORGIVING_OCR is enabled``, then you might need to install the
|
|||||||
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
`Tesseract language files <http://packages.ubuntu.com/search?keywords=tesseract-ocr>`_
|
||||||
marching your document's languages.
|
marching your document's languages.
|
||||||
|
|
||||||
As an example, if you are running Paperless from the Vagrant setup provided
|
As an example, if you are running Paperless from any Ubuntu or Debian
|
||||||
(or from any Ubuntu or Debian box), and your documents are written in Spanish
|
box, and your documents are written in Spanish you may need to run::
|
||||||
you may need to run::
|
|
||||||
|
|
||||||
apt-get install -y tesseract-ocr-spa
|
apt-get install -y tesseract-ocr-spa
|
||||||
|
|
||||||
|
|||||||
0
models/.keep
Normal file
0
models/.keep
Normal file
11
overrides/README.md
Normal file
11
overrides/README.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Customizing Paperless
|
||||||
|
|
||||||
|
*See customization
|
||||||
|
[documentation](https://paperless.readthedocs.io/en/latest/customising.html)
|
||||||
|
for more detail!*
|
||||||
|
|
||||||
|
The example `.css` and `.js` snippets in this folder can be placed into
|
||||||
|
one of two files in your ``PAPERLESS_MEDIADIR`` folder: `overrides.js` or
|
||||||
|
`overrides.css`. Please feel free to submit pull requests to the main
|
||||||
|
repository with other examples of customizations that you think others may
|
||||||
|
find useful.
|
||||||
@@ -3,6 +3,16 @@
|
|||||||
# As this file contains passwords it should only be readable by the user
|
# As this file contains passwords it should only be readable by the user
|
||||||
# running paperless.
|
# running paperless.
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
#### Database Settings ####
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# By default, sqlite is used as the database backend. This can be changed here.
|
||||||
|
#PAPERLESS_DBENGINE="django.db.backends.postgresql_psycopg2"
|
||||||
|
#PAPERLESS_DBNAME="paperless"
|
||||||
|
#PAPERLESS_DBUSER="paperless"
|
||||||
|
#PAPERLESS_DBPASS="paperless"
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#### Paths & Folders ####
|
#### Paths & Folders ####
|
||||||
@@ -38,6 +48,13 @@ PAPERLESS_CONSUMPTION_DIR=""
|
|||||||
#PAPERLESS_STATIC_URL="/static/"
|
#PAPERLESS_STATIC_URL="/static/"
|
||||||
|
|
||||||
|
|
||||||
|
# You can specify where the document classification model file should be
|
||||||
|
# stored. Make sure that this file is writeable by the user executing the
|
||||||
|
# management command "document_create_classifier" and that the path exists.
|
||||||
|
# The default location is /models/model.pickle wwithin the install folder.
|
||||||
|
#PAPERLESS_MODEL_FILE=/path/to/model/file
|
||||||
|
|
||||||
|
|
||||||
# These values are required if you want paperless to check a particular email
|
# These values are required if you want paperless to check a particular email
|
||||||
# box every 10 minutes and attempt to consume documents from there. If you
|
# box every 10 minutes and attempt to consume documents from there. If you
|
||||||
# don't define a HOST, mail checking will just be disabled.
|
# don't define a HOST, mail checking will just be disabled.
|
||||||
@@ -127,6 +144,14 @@ PAPERLESS_DEBUG="false"
|
|||||||
# "true", the document will instead be opened in the browser, if possible.
|
# "true", the document will instead be opened in the browser, if possible.
|
||||||
#PAPERLESS_INLINE_DOC="false"
|
#PAPERLESS_INLINE_DOC="false"
|
||||||
|
|
||||||
|
# By default, paperless will check the document text for document date information.
|
||||||
|
# Uncomment the line below to enable checking the document filename for date
|
||||||
|
# information. The date order can be set to any option as specified in
|
||||||
|
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
|
||||||
|
# checked first, and if nothing is found, the document text will be checked
|
||||||
|
# as normal.
|
||||||
|
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
|
||||||
|
|
||||||
#
|
#
|
||||||
# The following values use sensible defaults for modern systems, but if you're
|
# The following values use sensible defaults for modern systems, but if you're
|
||||||
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
# running Paperless on a low-resource device (like a Raspberry Pi), modifying
|
||||||
@@ -188,6 +213,12 @@ PAPERLESS_DEBUG="false"
|
|||||||
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
#PAPERLESS_CONSUMER_LOOP_TIME=10
|
||||||
|
|
||||||
|
|
||||||
|
# By default Paperless stops consuming a document if no language can be
|
||||||
|
# detected. Set to true to consume documents even if the language detection
|
||||||
|
# fails.
|
||||||
|
#PAPERLESS_FORGIVING_OCR="false"
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
#### Interface ####
|
#### Interface ####
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|||||||
@@ -1,51 +1,83 @@
|
|||||||
-i https://pypi.python.org/simple
|
-i https://pypi.python.org/simple
|
||||||
apipkg==1.5; python_version != '3.3.*'
|
alabaster==0.7.12
|
||||||
atomicwrites==1.2.1; python_version != '3.3.*'
|
apipkg==1.5
|
||||||
|
atomicwrites==1.2.1
|
||||||
attrs==18.2.0
|
attrs==18.2.0
|
||||||
certifi==2018.8.24
|
babel==2.6.0
|
||||||
|
backcall==0.1.0
|
||||||
|
certifi==2018.10.15
|
||||||
chardet==3.0.4
|
chardet==3.0.4
|
||||||
coverage==4.5.1; python_version < '4'
|
coverage==4.5.1
|
||||||
coveralls==1.5.0
|
coveralls==1.5.1
|
||||||
dateparser==0.7.0
|
dateparser==0.7.0
|
||||||
|
decorator==4.3.0
|
||||||
django-cors-headers==2.4.0
|
django-cors-headers==2.4.0
|
||||||
django-crispy-forms==1.7.2
|
django-crispy-forms==1.7.2
|
||||||
django-extensions==2.1.2
|
django-extensions==2.1.3
|
||||||
django-filter==2.0.0
|
django-filter==2.0.0
|
||||||
django==2.0.8
|
django==2.0.9
|
||||||
djangorestframework==3.8.2
|
djangorestframework==3.9.0
|
||||||
docopt==0.6.2
|
docopt==0.6.2
|
||||||
execnet==1.5.0; python_version != '3.3.*'
|
docutils==0.14
|
||||||
|
execnet==1.5.0
|
||||||
factory-boy==2.11.1
|
factory-boy==2.11.1
|
||||||
faker==0.9.0; python_version >= '2.7'
|
faker==0.9.2
|
||||||
|
filelock==3.0.10
|
||||||
filemagic==1.6
|
filemagic==1.6
|
||||||
fuzzywuzzy==0.15.0
|
fuzzywuzzy[speedup]==0.15.0
|
||||||
gunicorn==19.9.0
|
gunicorn==19.9.0
|
||||||
idna==2.7
|
idna==2.7
|
||||||
|
imagesize==1.1.0
|
||||||
inotify-simple==1.1.8
|
inotify-simple==1.1.8
|
||||||
|
ipython-genutils==0.2.0
|
||||||
|
ipython==7.1.1
|
||||||
|
jedi==0.13.1
|
||||||
|
jinja2==2.10
|
||||||
langdetect==1.0.7
|
langdetect==1.0.7
|
||||||
|
markupsafe==1.0
|
||||||
more-itertools==4.3.0
|
more-itertools==4.3.0
|
||||||
pdftotext==2.1.0
|
numpy==1.15.1
|
||||||
pillow==5.2.0
|
packaging==18.0
|
||||||
pluggy==0.7.1; python_version != '3.3.*'
|
parso==0.3.1
|
||||||
py==1.6.0; python_version != '3.3.*'
|
pdftotext==2.1.1
|
||||||
|
pexpect==4.6.0
|
||||||
|
pickleshare==0.7.5
|
||||||
|
pillow==5.3.0
|
||||||
|
pluggy==0.8.0
|
||||||
|
psycopg2==2.7.6.1
|
||||||
|
prompt-toolkit==2.0.7
|
||||||
|
ptyprocess==0.6.0
|
||||||
|
py==1.7.0
|
||||||
pycodestyle==2.4.0
|
pycodestyle==2.4.0
|
||||||
|
pygments==2.2.0
|
||||||
pyocr==0.5.3
|
pyocr==0.5.3
|
||||||
|
pyparsing==2.3.0
|
||||||
pytest-cov==2.6.0
|
pytest-cov==2.6.0
|
||||||
pytest-django==3.4.2
|
pytest-django==3.4.3
|
||||||
pytest-env==0.6.2
|
pytest-env==0.6.2
|
||||||
pytest-forked==0.2; python_version != '3.3.*'
|
pytest-forked==0.2
|
||||||
pytest-sugar==0.9.1
|
pytest-sugar==0.9.1
|
||||||
pytest-xdist==1.23.0
|
pytest-xdist==1.24.0
|
||||||
pytest==3.8.0
|
pytest==3.9.3
|
||||||
python-dateutil==2.7.3
|
python-dateutil==2.7.5
|
||||||
python-dotenv==0.9.1
|
python-dotenv==0.9.1
|
||||||
python-gnupg==0.4.3
|
python-gnupg==0.4.3
|
||||||
python-levenshtein==0.12.0
|
python-levenshtein==0.12.0 ; extra == 'speedup'
|
||||||
pytz==2018.5
|
pytz==2018.7
|
||||||
regex==2018.8.29
|
regex==2018.11.2
|
||||||
requests==2.19.1
|
requests==2.20.0
|
||||||
six==1.11.0
|
six==1.11.0
|
||||||
|
scikit-learn==0.19.2
|
||||||
|
scipy==1.1.0
|
||||||
|
snowballstemmer==1.2.1
|
||||||
|
sphinx==1.8.1
|
||||||
|
sphinxcontrib-websupport==1.1.0
|
||||||
termcolor==1.1.0
|
termcolor==1.1.0
|
||||||
text-unidecode==1.2
|
text-unidecode==1.2
|
||||||
|
toml==0.10.0
|
||||||
|
tox==3.5.3
|
||||||
|
traitlets==4.3.2
|
||||||
tzlocal==1.5.1
|
tzlocal==1.5.1
|
||||||
urllib3==1.23; python_version != '3.3.*'
|
urllib3==1.24.1
|
||||||
|
virtualenv==16.1.0
|
||||||
|
wcwidth==0.1.7
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ Description=Paperless webserver
|
|||||||
[Service]
|
[Service]
|
||||||
User=paperless
|
User=paperless
|
||||||
Group=paperless
|
Group=paperless
|
||||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn /home/paperless/project/src/paperless.wsgi -w 2
|
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
@@ -1,31 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Install packages
|
|
||||||
apt-get update
|
|
||||||
apt-get build-dep -y python-imaging
|
|
||||||
apt-get install -y libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
|
|
||||||
apt-get install -y build-essential python3-dev python3-pip sqlite3 libsqlite3-dev git
|
|
||||||
apt-get install -y tesseract-ocr tesseract-ocr-eng imagemagick unpaper
|
|
||||||
|
|
||||||
# Python dependencies
|
|
||||||
pip3 install -r /opt/paperless/requirements.txt
|
|
||||||
|
|
||||||
# Create the environment file
|
|
||||||
cat /opt/paperless/paperless.conf.example | sed -e 's#CONSUMPTION_DIR=""#CONSUMPTION_DIR="/home/vagrant/consumption"#' > /etc/paperless.conf
|
|
||||||
chmod 0640 /etc/paperless.conf
|
|
||||||
chown root:vagrant /etc/paperless.conf
|
|
||||||
|
|
||||||
# Create the consumption directory
|
|
||||||
mkdir /home/vagrant/consumption
|
|
||||||
chown vagrant:vagrant /home/vagrant/consumption
|
|
||||||
|
|
||||||
echo "
|
|
||||||
|
|
||||||
|
|
||||||
Now follow the remaining steps in the Vagrant section of the setup
|
|
||||||
documentation to complete the process:
|
|
||||||
|
|
||||||
http://paperless.readthedocs.org/en/latest/setup.html#setup-installation-vagrant
|
|
||||||
|
|
||||||
|
|
||||||
"
|
|
||||||
64
src/documents/actions.py
Normal file → Executable file
64
src/documents/actions.py
Normal file → Executable file
@@ -4,7 +4,8 @@ from django.contrib.admin.utils import model_ngettext
|
|||||||
from django.core.exceptions import PermissionDenied
|
from django.core.exceptions import PermissionDenied
|
||||||
from django.template.response import TemplateResponse
|
from django.template.response import TemplateResponse
|
||||||
|
|
||||||
from documents.models import Correspondent, Tag
|
from documents.classifier import DocumentClassifier
|
||||||
|
from documents.models import Correspondent, DocumentType, Tag
|
||||||
|
|
||||||
|
|
||||||
def select_action(
|
def select_action(
|
||||||
@@ -17,9 +18,9 @@ def select_action(
|
|||||||
if not modeladmin.has_change_permission(request):
|
if not modeladmin.has_change_permission(request):
|
||||||
raise PermissionDenied
|
raise PermissionDenied
|
||||||
|
|
||||||
if request.POST.get('post'):
|
if request.POST.get("post"):
|
||||||
n = queryset.count()
|
n = queryset.count()
|
||||||
selected_object = modelclass.objects.get(id=request.POST.get('obj_id'))
|
selected_object = modelclass.objects.get(id=request.POST.get("obj_id"))
|
||||||
if n:
|
if n:
|
||||||
for document in queryset:
|
for document in queryset:
|
||||||
if document_action:
|
if document_action:
|
||||||
@@ -137,6 +138,57 @@ def remove_correspondent_from_selected(modeladmin, request, queryset):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def set_document_type_on_selected(modeladmin, request, queryset):
|
||||||
|
return select_action(
|
||||||
|
modeladmin=modeladmin,
|
||||||
|
request=request,
|
||||||
|
queryset=queryset,
|
||||||
|
title="Set document type on multiple documents",
|
||||||
|
action="set_document_type_on_selected",
|
||||||
|
modelclass=DocumentType,
|
||||||
|
success_message="Successfully set document type %(selected_object)s "
|
||||||
|
"on %(count)d %(items)s.",
|
||||||
|
queryset_action=lambda qs, document_type: qs.update(
|
||||||
|
document_type=document_type)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_document_type_from_selected(modeladmin, request, queryset):
|
||||||
|
return simple_action(
|
||||||
|
modeladmin=modeladmin,
|
||||||
|
request=request,
|
||||||
|
queryset=queryset,
|
||||||
|
success_message="Successfully removed document type from %(count)d "
|
||||||
|
"%(items)s.",
|
||||||
|
queryset_action=lambda qs: qs.update(document_type=None)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_document_classifier_on_selected(modeladmin, request, queryset):
|
||||||
|
clf = DocumentClassifier()
|
||||||
|
try:
|
||||||
|
clf.reload()
|
||||||
|
return simple_action(
|
||||||
|
modeladmin=modeladmin,
|
||||||
|
request=request,
|
||||||
|
queryset=queryset,
|
||||||
|
success_message="Successfully applied document classifier to "
|
||||||
|
"%(count)d %(items)s.",
|
||||||
|
document_action=lambda doc: clf.classify_document(
|
||||||
|
doc,
|
||||||
|
classify_correspondent=True,
|
||||||
|
classify_tags=True,
|
||||||
|
classify_document_type=True)
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
modeladmin.message_user(
|
||||||
|
request,
|
||||||
|
"Classifier model file not found.",
|
||||||
|
messages.ERROR
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
add_tag_to_selected.short_description = "Add tag to selected documents"
|
add_tag_to_selected.short_description = "Add tag to selected documents"
|
||||||
remove_tag_from_selected.short_description = \
|
remove_tag_from_selected.short_description = \
|
||||||
"Remove tag from selected documents"
|
"Remove tag from selected documents"
|
||||||
@@ -144,3 +196,9 @@ set_correspondent_on_selected.short_description = \
|
|||||||
"Set correspondent on selected documents"
|
"Set correspondent on selected documents"
|
||||||
remove_correspondent_from_selected.short_description = \
|
remove_correspondent_from_selected.short_description = \
|
||||||
"Remove correspondent from selected documents"
|
"Remove correspondent from selected documents"
|
||||||
|
set_document_type_on_selected.short_description = \
|
||||||
|
"Set document type on selected documents"
|
||||||
|
remove_document_type_from_selected.short_description = \
|
||||||
|
"Remove document type from selected documents"
|
||||||
|
run_document_classifier_on_selected.short_description = \
|
||||||
|
"Run document classifier on selected"
|
||||||
|
|||||||
82
src/documents/admin.py
Normal file → Executable file
82
src/documents/admin.py
Normal file → Executable file
@@ -16,10 +16,13 @@ from documents.actions import (
|
|||||||
add_tag_to_selected,
|
add_tag_to_selected,
|
||||||
remove_correspondent_from_selected,
|
remove_correspondent_from_selected,
|
||||||
remove_tag_from_selected,
|
remove_tag_from_selected,
|
||||||
set_correspondent_on_selected
|
set_correspondent_on_selected,
|
||||||
|
set_document_type_on_selected,
|
||||||
|
remove_document_type_from_selected,
|
||||||
|
run_document_classifier_on_selected
|
||||||
)
|
)
|
||||||
|
|
||||||
from .models import Correspondent, Document, Log, Tag
|
from .models import Correspondent, Document, DocumentType, Log, Tag
|
||||||
|
|
||||||
|
|
||||||
class FinancialYearFilter(admin.SimpleListFilter):
|
class FinancialYearFilter(admin.SimpleListFilter):
|
||||||
@@ -61,12 +64,12 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
|||||||
|
|
||||||
# To keep it simple we use the same string for both
|
# To keep it simple we use the same string for both
|
||||||
# query parameter and the display.
|
# query parameter and the display.
|
||||||
return (query, query)
|
return query, query
|
||||||
|
|
||||||
else:
|
else:
|
||||||
query = "{0}-{0}".format(date.year)
|
query = "{0}-{0}".format(date.year)
|
||||||
display = "{}".format(date.year)
|
display = "{}".format(date.year)
|
||||||
return (query, display)
|
return query, display
|
||||||
|
|
||||||
def lookups(self, request, model_admin):
|
def lookups(self, request, model_admin):
|
||||||
if not settings.FY_START or not settings.FY_END:
|
if not settings.FY_START or not settings.FY_END:
|
||||||
@@ -88,25 +91,24 @@ class FinancialYearFilter(admin.SimpleListFilter):
|
|||||||
|
|
||||||
|
|
||||||
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
class RecentCorrespondentFilter(admin.RelatedFieldListFilter):
|
||||||
|
"""
|
||||||
def __init__(self, *args, **kwargs):
|
If PAPERLESS_RECENT_CORRESPONDENT_YEARS is set, we limit the available
|
||||||
super().__init__(*args, **kwargs)
|
correspondents to documents sent our way over the past ``n`` years.
|
||||||
self.title = "correspondent (recent)"
|
"""
|
||||||
|
|
||||||
def field_choices(self, field, request, model_admin):
|
def field_choices(self, field, request, model_admin):
|
||||||
|
|
||||||
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
years = settings.PAPERLESS_RECENT_CORRESPONDENT_YEARS
|
||||||
days = 365 * years
|
correspondents = Correspondent.objects.all()
|
||||||
|
|
||||||
lookups = []
|
|
||||||
if years and years > 0:
|
if years and years > 0:
|
||||||
correspondents = Correspondent.objects.filter(
|
self.title = "Correspondent (Recent)"
|
||||||
|
days = 365 * years
|
||||||
|
correspondents = correspondents.filter(
|
||||||
documents__created__gte=datetime.now() - timedelta(days=days)
|
documents__created__gte=datetime.now() - timedelta(days=days)
|
||||||
).distinct()
|
).distinct()
|
||||||
for c in correspondents:
|
|
||||||
lookups.append((c.id, c.name))
|
|
||||||
|
|
||||||
return lookups
|
return [(c.id, c.name) for c in correspondents]
|
||||||
|
|
||||||
|
|
||||||
class CommonAdmin(admin.ModelAdmin):
|
class CommonAdmin(admin.ModelAdmin):
|
||||||
@@ -117,13 +119,11 @@ class CorrespondentAdmin(CommonAdmin):
|
|||||||
|
|
||||||
list_display = (
|
list_display = (
|
||||||
"name",
|
"name",
|
||||||
"match",
|
"automatic_classification",
|
||||||
"matching_algorithm",
|
|
||||||
"document_count",
|
"document_count",
|
||||||
"last_correspondence"
|
"last_correspondence"
|
||||||
)
|
)
|
||||||
list_filter = ("matching_algorithm",)
|
list_editable = ("automatic_classification",)
|
||||||
list_editable = ("match", "matching_algorithm")
|
|
||||||
|
|
||||||
readonly_fields = ("slug",)
|
readonly_fields = ("slug",)
|
||||||
|
|
||||||
@@ -146,15 +146,38 @@ class CorrespondentAdmin(CommonAdmin):
|
|||||||
|
|
||||||
class TagAdmin(CommonAdmin):
|
class TagAdmin(CommonAdmin):
|
||||||
|
|
||||||
list_display = ("name", "colour", "match", "matching_algorithm",
|
list_display = (
|
||||||
"document_count")
|
"name",
|
||||||
list_filter = ("colour", "matching_algorithm")
|
"colour",
|
||||||
list_editable = ("colour", "match", "matching_algorithm")
|
"automatic_classification",
|
||||||
|
"document_count")
|
||||||
|
list_filter = ("colour",)
|
||||||
|
list_editable = ("colour", "automatic_classification")
|
||||||
|
|
||||||
|
readonly_fields = ("slug",)
|
||||||
|
|
||||||
|
class Media:
|
||||||
|
js = ("js/colours.js",)
|
||||||
|
|
||||||
|
def get_queryset(self, request):
|
||||||
|
qs = super(TagAdmin, self).get_queryset(request)
|
||||||
|
qs = qs.annotate(document_count=models.Count("documents"))
|
||||||
|
return qs
|
||||||
|
|
||||||
|
def document_count(self, obj):
|
||||||
|
return obj.document_count
|
||||||
|
document_count.admin_order_field = "document_count"
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeAdmin(CommonAdmin):
|
||||||
|
|
||||||
|
list_display = ("name", "automatic_classification", "document_count")
|
||||||
|
list_editable = ("automatic_classification",)
|
||||||
|
|
||||||
readonly_fields = ("slug",)
|
readonly_fields = ("slug",)
|
||||||
|
|
||||||
def get_queryset(self, request):
|
def get_queryset(self, request):
|
||||||
qs = super(TagAdmin, self).get_queryset(request)
|
qs = super(DocumentTypeAdmin, self).get_queryset(request)
|
||||||
qs = qs.annotate(document_count=models.Count("documents"))
|
qs = qs.annotate(document_count=models.Count("documents"))
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
@@ -173,11 +196,11 @@ class DocumentAdmin(CommonAdmin):
|
|||||||
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
search_fields = ("correspondent__name", "title", "content", "tags__name")
|
||||||
readonly_fields = ("added", "file_type", "storage_type",)
|
readonly_fields = ("added", "file_type", "storage_type",)
|
||||||
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
list_display = ("title", "created", "added", "thumbnail", "correspondent",
|
||||||
"tags_")
|
"tags_", "archive_serial_number", "document_type")
|
||||||
list_filter = (
|
list_filter = (
|
||||||
|
"document_type",
|
||||||
"tags",
|
"tags",
|
||||||
("correspondent", RecentCorrespondentFilter),
|
("correspondent", RecentCorrespondentFilter),
|
||||||
"correspondent",
|
|
||||||
FinancialYearFilter
|
FinancialYearFilter
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -189,7 +212,10 @@ class DocumentAdmin(CommonAdmin):
|
|||||||
add_tag_to_selected,
|
add_tag_to_selected,
|
||||||
remove_tag_from_selected,
|
remove_tag_from_selected,
|
||||||
set_correspondent_on_selected,
|
set_correspondent_on_selected,
|
||||||
remove_correspondent_from_selected
|
remove_correspondent_from_selected,
|
||||||
|
set_document_type_on_selected,
|
||||||
|
remove_document_type_from_selected,
|
||||||
|
run_document_classifier_on_selected
|
||||||
]
|
]
|
||||||
|
|
||||||
date_hierarchy = "created"
|
date_hierarchy = "created"
|
||||||
@@ -222,6 +248,9 @@ class DocumentAdmin(CommonAdmin):
|
|||||||
extra_context=None):
|
extra_context=None):
|
||||||
|
|
||||||
extra_context = extra_context or {}
|
extra_context = extra_context or {}
|
||||||
|
doc = Document.objects.get(id=object_id)
|
||||||
|
extra_context["download_url"] = doc.download_url
|
||||||
|
extra_context["file_type"] = doc.file_type
|
||||||
|
|
||||||
if self.document_queue and object_id:
|
if self.document_queue and object_id:
|
||||||
if int(object_id) in self.document_queue:
|
if int(object_id) in self.document_queue:
|
||||||
@@ -345,6 +374,7 @@ class LogAdmin(CommonAdmin):
|
|||||||
|
|
||||||
admin.site.register(Correspondent, CorrespondentAdmin)
|
admin.site.register(Correspondent, CorrespondentAdmin)
|
||||||
admin.site.register(Tag, TagAdmin)
|
admin.site.register(Tag, TagAdmin)
|
||||||
|
admin.site.register(DocumentType, DocumentTypeAdmin)
|
||||||
admin.site.register(Document, DocumentAdmin)
|
admin.site.register(Document, DocumentAdmin)
|
||||||
admin.site.register(Log, LogAdmin)
|
admin.site.register(Log, LogAdmin)
|
||||||
|
|
||||||
|
|||||||
@@ -11,8 +11,8 @@ class DocumentsConfig(AppConfig):
|
|||||||
from .signals import document_consumption_started
|
from .signals import document_consumption_started
|
||||||
from .signals import document_consumption_finished
|
from .signals import document_consumption_finished
|
||||||
from .signals.handlers import (
|
from .signals.handlers import (
|
||||||
set_correspondent,
|
classify_document,
|
||||||
set_tags,
|
add_inbox_tags,
|
||||||
run_pre_consume_script,
|
run_pre_consume_script,
|
||||||
run_post_consume_script,
|
run_post_consume_script,
|
||||||
cleanup_document_deletion,
|
cleanup_document_deletion,
|
||||||
@@ -21,8 +21,8 @@ class DocumentsConfig(AppConfig):
|
|||||||
|
|
||||||
document_consumption_started.connect(run_pre_consume_script)
|
document_consumption_started.connect(run_pre_consume_script)
|
||||||
|
|
||||||
document_consumption_finished.connect(set_tags)
|
document_consumption_finished.connect(classify_document)
|
||||||
document_consumption_finished.connect(set_correspondent)
|
document_consumption_finished.connect(add_inbox_tags)
|
||||||
document_consumption_finished.connect(set_log_entry)
|
document_consumption_finished.connect(set_log_entry)
|
||||||
document_consumption_finished.connect(run_post_consume_script)
|
document_consumption_finished.connect(run_post_consume_script)
|
||||||
|
|
||||||
|
|||||||
240
src/documents/classifier.py
Executable file
240
src/documents/classifier.py
Executable file
@@ -0,0 +1,240 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
from sklearn.neural_network import MLPClassifier
|
||||||
|
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||||
|
|
||||||
|
from documents.models import Correspondent, DocumentType, Tag, Document
|
||||||
|
from paperless import settings
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_content(content):
|
||||||
|
content = content.lower()
|
||||||
|
content = content.strip()
|
||||||
|
content = content.replace("\n", " ")
|
||||||
|
content = content.replace("\r", " ")
|
||||||
|
while content.find(" ") > -1:
|
||||||
|
content = content.replace(" ", " ")
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentClassifier(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.classifier_version = 0
|
||||||
|
|
||||||
|
self.data_vectorizer = None
|
||||||
|
|
||||||
|
self.tags_binarizer = None
|
||||||
|
self.correspondent_binarizer = None
|
||||||
|
self.document_type_binarizer = None
|
||||||
|
|
||||||
|
self.tags_classifier = None
|
||||||
|
self.correspondent_classifier = None
|
||||||
|
self.document_type_classifier = None
|
||||||
|
|
||||||
|
def reload(self):
|
||||||
|
if os.path.getmtime(settings.MODEL_FILE) > self.classifier_version:
|
||||||
|
logging.getLogger(__name__).info("Reloading classifier models")
|
||||||
|
with open(settings.MODEL_FILE, "rb") as f:
|
||||||
|
self.data_vectorizer = pickle.load(f)
|
||||||
|
self.tags_binarizer = pickle.load(f)
|
||||||
|
self.correspondent_binarizer = pickle.load(f)
|
||||||
|
self.document_type_binarizer = pickle.load(f)
|
||||||
|
|
||||||
|
self.tags_classifier = pickle.load(f)
|
||||||
|
self.correspondent_classifier = pickle.load(f)
|
||||||
|
self.document_type_classifier = pickle.load(f)
|
||||||
|
self.classifier_version = os.path.getmtime(settings.MODEL_FILE)
|
||||||
|
|
||||||
|
def save_classifier(self):
|
||||||
|
with open(settings.MODEL_FILE, "wb") as f:
|
||||||
|
pickle.dump(self.data_vectorizer, f)
|
||||||
|
|
||||||
|
pickle.dump(self.tags_binarizer, f)
|
||||||
|
pickle.dump(self.correspondent_binarizer, f)
|
||||||
|
pickle.dump(self.document_type_binarizer, f)
|
||||||
|
|
||||||
|
pickle.dump(self.tags_classifier, f)
|
||||||
|
pickle.dump(self.correspondent_classifier, f)
|
||||||
|
pickle.dump(self.document_type_classifier, f)
|
||||||
|
|
||||||
|
def train(self):
|
||||||
|
data = list()
|
||||||
|
labels_tags = list()
|
||||||
|
labels_correspondent = list()
|
||||||
|
labels_document_type = list()
|
||||||
|
|
||||||
|
# Step 1: Extract and preprocess training data from the database.
|
||||||
|
logging.getLogger(__name__).info("Gathering data from database...")
|
||||||
|
for doc in Document.objects.exclude(tags__is_inbox_tag=True):
|
||||||
|
data.append(preprocess_content(doc.content))
|
||||||
|
|
||||||
|
y = -1
|
||||||
|
if doc.document_type:
|
||||||
|
if doc.document_type.automatic_classification:
|
||||||
|
y = doc.document_type.id
|
||||||
|
labels_document_type.append(y)
|
||||||
|
|
||||||
|
y = -1
|
||||||
|
if doc.correspondent:
|
||||||
|
if doc.correspondent.automatic_classification:
|
||||||
|
y = doc.correspondent.id
|
||||||
|
labels_correspondent.append(y)
|
||||||
|
|
||||||
|
tags = [tag.id for tag in doc.tags.filter(
|
||||||
|
automatic_classification=True
|
||||||
|
)]
|
||||||
|
labels_tags.append(tags)
|
||||||
|
|
||||||
|
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"{} documents, {} tag(s), {} correspondent(s), "
|
||||||
|
"{} document type(s).".format(
|
||||||
|
len(data),
|
||||||
|
len(labels_tags_unique),
|
||||||
|
len(set(labels_correspondent)),
|
||||||
|
len(set(labels_document_type))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 2: vectorize data
|
||||||
|
logging.getLogger(__name__).info("Vectorizing data...")
|
||||||
|
self.data_vectorizer = CountVectorizer(
|
||||||
|
analyzer="char",
|
||||||
|
ngram_range=(3, 5),
|
||||||
|
min_df=0.1
|
||||||
|
)
|
||||||
|
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||||
|
|
||||||
|
self.tags_binarizer = MultiLabelBinarizer()
|
||||||
|
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
||||||
|
|
||||||
|
self.correspondent_binarizer = LabelBinarizer()
|
||||||
|
labels_correspondent_vectorized = \
|
||||||
|
self.correspondent_binarizer.fit_transform(labels_correspondent)
|
||||||
|
|
||||||
|
self.document_type_binarizer = LabelBinarizer()
|
||||||
|
labels_document_type_vectorized = \
|
||||||
|
self.document_type_binarizer.fit_transform(labels_document_type)
|
||||||
|
|
||||||
|
# Step 3: train the classifiers
|
||||||
|
if len(self.tags_binarizer.classes_) > 0:
|
||||||
|
logging.getLogger(__name__).info("Training tags classifier...")
|
||||||
|
self.tags_classifier = MLPClassifier(verbose=True)
|
||||||
|
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||||
|
else:
|
||||||
|
self.tags_classifier = None
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"There are no tags. Not training tags classifier."
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(self.correspondent_binarizer.classes_) > 0:
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Training correspondent classifier..."
|
||||||
|
)
|
||||||
|
self.correspondent_classifier = MLPClassifier(verbose=True)
|
||||||
|
self.correspondent_classifier.fit(
|
||||||
|
data_vectorized,
|
||||||
|
labels_correspondent_vectorized
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.correspondent_classifier = None
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"There are no correspondents. Not training correspondent "
|
||||||
|
"classifier."
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(self.document_type_binarizer.classes_) > 0:
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Training document type classifier..."
|
||||||
|
)
|
||||||
|
self.document_type_classifier = MLPClassifier(verbose=True)
|
||||||
|
self.document_type_classifier.fit(
|
||||||
|
data_vectorized,
|
||||||
|
labels_document_type_vectorized
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.document_type_classifier = None
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"There are no document types. Not training document type "
|
||||||
|
"classifier."
|
||||||
|
)
|
||||||
|
|
||||||
|
def classify_document(
|
||||||
|
self, document, classify_correspondent=False,
|
||||||
|
classify_document_type=False, classify_tags=False,
|
||||||
|
replace_tags=False):
|
||||||
|
|
||||||
|
X = self.data_vectorizer.transform(
|
||||||
|
[preprocess_content(document.content)]
|
||||||
|
)
|
||||||
|
|
||||||
|
if classify_correspondent and self.correspondent_classifier:
|
||||||
|
self._classify_correspondent(X, document)
|
||||||
|
|
||||||
|
if classify_document_type and self.document_type_classifier:
|
||||||
|
self._classify_document_type(X, document)
|
||||||
|
|
||||||
|
if classify_tags and self.tags_classifier:
|
||||||
|
self._classify_tags(X, document, replace_tags)
|
||||||
|
|
||||||
|
document.save(update_fields=("correspondent", "document_type"))
|
||||||
|
|
||||||
|
def _classify_correspondent(self, X, document):
|
||||||
|
y = self.correspondent_classifier.predict(X)
|
||||||
|
correspondent_id = self.correspondent_binarizer.inverse_transform(y)[0]
|
||||||
|
try:
|
||||||
|
correspondent = None
|
||||||
|
if correspondent_id != -1:
|
||||||
|
correspondent = Correspondent.objects.get(id=correspondent_id)
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Detected correspondent: {}".format(correspondent.name)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.getLogger(__name__).info("Detected correspondent: -")
|
||||||
|
document.correspondent = correspondent
|
||||||
|
except Correspondent.DoesNotExist:
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"Detected correspondent with id {} does not exist "
|
||||||
|
"anymore! Did you delete it?".format(correspondent_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _classify_document_type(self, X, document):
|
||||||
|
y = self.document_type_classifier.predict(X)
|
||||||
|
document_type_id = self.document_type_binarizer.inverse_transform(y)[0]
|
||||||
|
try:
|
||||||
|
document_type = None
|
||||||
|
if document_type_id != -1:
|
||||||
|
document_type = DocumentType.objects.get(id=document_type_id)
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Detected document type: {}".format(document_type.name)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.getLogger(__name__).info("Detected document type: -")
|
||||||
|
document.document_type = document_type
|
||||||
|
except DocumentType.DoesNotExist:
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"Detected document type with id {} does not exist "
|
||||||
|
"anymore! Did you delete it?".format(document_type_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _classify_tags(self, X, document, replace_tags):
|
||||||
|
y = self.tags_classifier.predict(X)
|
||||||
|
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||||
|
if replace_tags:
|
||||||
|
document.tags.clear()
|
||||||
|
for tag_id in tags_ids:
|
||||||
|
try:
|
||||||
|
tag = Tag.objects.get(id=tag_id)
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Detected tag: {}".format(tag.name)
|
||||||
|
)
|
||||||
|
document.tags.add(tag)
|
||||||
|
except Tag.DoesNotExist:
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"Detected tag with id {} does not exist anymore! Did "
|
||||||
|
"you delete it?".format(tag_id)
|
||||||
|
)
|
||||||
2
src/documents/consumer.py
Normal file → Executable file
2
src/documents/consumer.py
Normal file → Executable file
@@ -225,7 +225,7 @@ class Consumer:
|
|||||||
storage_type=self.storage_type
|
storage_type=self.storage_type
|
||||||
)
|
)
|
||||||
|
|
||||||
relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
|
relevant_tags = set(file_info.tags)
|
||||||
if relevant_tags:
|
if relevant_tags:
|
||||||
tag_names = ", ".join([t.slug for t in relevant_tags])
|
tag_names = ", ".join([t.slug for t in relevant_tags])
|
||||||
self.log("debug", "Tagging with {}".format(tag_names))
|
self.log("debug", "Tagging with {}".format(tag_names))
|
||||||
|
|||||||
18
src/documents/filters.py
Normal file → Executable file
18
src/documents/filters.py
Normal file → Executable file
@@ -1,6 +1,6 @@
|
|||||||
from django_filters.rest_framework import BooleanFilter, FilterSet
|
from django_filters.rest_framework import BooleanFilter, FilterSet
|
||||||
|
|
||||||
from .models import Correspondent, Document, Tag
|
from .models import Correspondent, Document, Tag, DocumentType
|
||||||
|
|
||||||
|
|
||||||
CHAR_KWARGS = (
|
CHAR_KWARGS = (
|
||||||
@@ -35,6 +35,19 @@ class TagFilterSet(FilterSet):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeFilterSet(FilterSet):
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = DocumentType
|
||||||
|
fields = {
|
||||||
|
"name": [
|
||||||
|
"startswith", "endswith", "contains",
|
||||||
|
"istartswith", "iendswith", "icontains"
|
||||||
|
],
|
||||||
|
"slug": ["istartswith", "iendswith", "icontains"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DocumentFilterSet(FilterSet):
|
class DocumentFilterSet(FilterSet):
|
||||||
|
|
||||||
tags_empty = BooleanFilter(
|
tags_empty = BooleanFilter(
|
||||||
@@ -57,4 +70,7 @@ class DocumentFilterSet(FilterSet):
|
|||||||
"tags__name": CHAR_KWARGS,
|
"tags__name": CHAR_KWARGS,
|
||||||
"tags__slug": CHAR_KWARGS,
|
"tags__slug": CHAR_KWARGS,
|
||||||
|
|
||||||
|
"document_type__name": CHAR_KWARGS,
|
||||||
|
"document_type__slug": CHAR_KWARGS,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
import sys
|
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
|
||||||
|
|
||||||
from documents.models import Correspondent, Document
|
|
||||||
|
|
||||||
from ...mixins import Renderable
|
|
||||||
|
|
||||||
|
|
||||||
class Command(Renderable, BaseCommand):
|
|
||||||
|
|
||||||
help = """
|
|
||||||
Using the current set of correspondent rules, apply said rules to all
|
|
||||||
documents in the database, effectively allowing you to back-tag all
|
|
||||||
previously indexed documents with correspondent created (or modified)
|
|
||||||
after their initial import.
|
|
||||||
""".replace(" ", "")
|
|
||||||
|
|
||||||
TOO_MANY_CONTINUE = (
|
|
||||||
"Detected {} potential correspondents for {}, so we've opted for {}")
|
|
||||||
TOO_MANY_SKIP = (
|
|
||||||
"Detected {} potential correspondents for {}, so we're skipping it")
|
|
||||||
CHANGE_MESSAGE = (
|
|
||||||
'Document {}: "{}" was given the correspondent id {}: "{}"')
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
self.verbosity = 0
|
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
|
||||||
parser.add_argument(
|
|
||||||
"--use-first",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="By default this command won't try to assign a correspondent "
|
|
||||||
"if more than one matches the document. Use this flag if "
|
|
||||||
"you'd rather it just pick the first one it finds."
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
|
||||||
|
|
||||||
self.verbosity = options["verbosity"]
|
|
||||||
|
|
||||||
for document in Document.objects.filter(correspondent__isnull=True):
|
|
||||||
|
|
||||||
potential_correspondents = list(
|
|
||||||
Correspondent.match_all(document.content))
|
|
||||||
|
|
||||||
if not potential_correspondents:
|
|
||||||
continue
|
|
||||||
|
|
||||||
potential_count = len(potential_correspondents)
|
|
||||||
correspondent = potential_correspondents[0]
|
|
||||||
|
|
||||||
if potential_count > 1:
|
|
||||||
if not options["use_first"]:
|
|
||||||
print(
|
|
||||||
self.TOO_MANY_SKIP.format(potential_count, document),
|
|
||||||
file=sys.stderr
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
print(
|
|
||||||
self.TOO_MANY_CONTINUE.format(
|
|
||||||
potential_count,
|
|
||||||
document,
|
|
||||||
correspondent
|
|
||||||
),
|
|
||||||
file=sys.stderr
|
|
||||||
)
|
|
||||||
|
|
||||||
document.correspondent = correspondent
|
|
||||||
document.save(update_fields=("correspondent",))
|
|
||||||
|
|
||||||
print(
|
|
||||||
self.CHANGE_MESSAGE.format(
|
|
||||||
document.pk,
|
|
||||||
document.title,
|
|
||||||
correspondent.pk,
|
|
||||||
correspondent.name
|
|
||||||
),
|
|
||||||
file=sys.stderr
|
|
||||||
)
|
|
||||||
25
src/documents/management/commands/document_create_classifier.py
Executable file
25
src/documents/management/commands/document_create_classifier.py
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
|
from paperless import settings
|
||||||
|
from ...mixins import Renderable
|
||||||
|
|
||||||
|
|
||||||
|
class Command(Renderable, BaseCommand):
|
||||||
|
|
||||||
|
help = """
|
||||||
|
Trains the classifier on your data and saves the resulting models to a
|
||||||
|
file. The document consumer will then automatically use this new model.
|
||||||
|
""".replace(" ", "")
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
clf = DocumentClassifier()
|
||||||
|
clf.train()
|
||||||
|
logging.getLogger(__name__).info(
|
||||||
|
"Saving models to {}...".format(settings.MODEL_FILE)
|
||||||
|
)
|
||||||
|
clf.save_classifier()
|
||||||
@@ -6,7 +6,7 @@ import shutil
|
|||||||
from django.core.management.base import BaseCommand, CommandError
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
from django.core import serializers
|
from django.core import serializers
|
||||||
|
|
||||||
from documents.models import Document, Correspondent, Tag
|
from documents.models import Document, Correspondent, Tag, DocumentType
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
|
|
||||||
from ...mixins import Renderable
|
from ...mixins import Renderable
|
||||||
@@ -96,6 +96,9 @@ class Command(Renderable, BaseCommand):
|
|||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(serializers.serialize(
|
||||||
"json", Tag.objects.all()))
|
"json", Tag.objects.all()))
|
||||||
|
|
||||||
|
manifest += json.loads(serializers.serialize(
|
||||||
|
"json", DocumentType.objects.all()))
|
||||||
|
|
||||||
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
||||||
json.dump(manifest, f, indent=2)
|
json.dump(manifest, f, indent=2)
|
||||||
|
|
||||||
|
|||||||
64
src/documents/management/commands/document_retagger.py
Normal file → Executable file
64
src/documents/management/commands/document_retagger.py
Normal file → Executable file
@@ -1,5 +1,8 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.models import Document, Tag
|
from documents.models import Document, Tag
|
||||||
|
|
||||||
from ...mixins import Renderable
|
from ...mixins import Renderable
|
||||||
@@ -8,25 +11,66 @@ from ...mixins import Renderable
|
|||||||
class Command(Renderable, BaseCommand):
|
class Command(Renderable, BaseCommand):
|
||||||
|
|
||||||
help = """
|
help = """
|
||||||
Using the current set of tagging rules, apply said rules to all
|
Using the current classification model, assigns correspondents, tags
|
||||||
documents in the database, effectively allowing you to back-tag all
|
and document types to all documents, effectively allowing you to
|
||||||
previously indexed documents with tags created (or modified) after
|
back-tag all previously indexed documents with metadata created (or
|
||||||
their initial import.
|
modified) after their initial import.
|
||||||
""".replace(" ", "")
|
""".replace(" ", "")
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.verbosity = 0
|
self.verbosity = 0
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"-c", "--correspondent",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-T", "--tags",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t", "--type",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-i", "--inbox-only",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-r", "--replace-tags",
|
||||||
|
action="store_true"
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
self.verbosity = options["verbosity"]
|
self.verbosity = options["verbosity"]
|
||||||
|
|
||||||
for document in Document.objects.all():
|
if options["inbox_only"]:
|
||||||
|
queryset = Document.objects.filter(tags__is_inbox_tag=True)
|
||||||
|
else:
|
||||||
|
queryset = Document.objects.all()
|
||||||
|
documents = queryset.distinct()
|
||||||
|
|
||||||
tags = Tag.objects.exclude(
|
logging.getLogger(__name__).info("Loading classifier")
|
||||||
pk__in=document.tags.values_list("pk", flat=True))
|
clf = DocumentClassifier()
|
||||||
|
try:
|
||||||
|
clf.reload()
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.getLogger(__name__).fatal("Cannot classify documents, "
|
||||||
|
"classifier model file was not "
|
||||||
|
"found.")
|
||||||
|
return
|
||||||
|
|
||||||
for tag in Tag.match_all(document.content, tags):
|
for document in documents:
|
||||||
print('Tagging {} with "{}"'.format(document, tag))
|
logging.getLogger(__name__).info(
|
||||||
document.tags.add(tag)
|
"Processing document {}".format(document.title)
|
||||||
|
)
|
||||||
|
clf.classify_document(
|
||||||
|
document,
|
||||||
|
classify_document_type=options["type"],
|
||||||
|
classify_tags=options["tags"],
|
||||||
|
classify_correspondent=options["correspondent"],
|
||||||
|
replace_tags=options["replace_tags"]
|
||||||
|
)
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ def re_slug_all_the_things(apps, schema_editor):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
Tag = apps.get_model("documents", "Tag")
|
Tag = apps.get_model("documents", "Tag")
|
||||||
Correspondent = apps.get_model("documents", "Tag")
|
Correspondent = apps.get_model("documents", "Correspondent")
|
||||||
|
|
||||||
for klass in (Tag, Correspondent):
|
for klass in (Tag, Correspondent):
|
||||||
for instance in klass.objects.all():
|
for instance in klass.objects.all():
|
||||||
|
|||||||
23
src/documents/migrations/1001_workflow_improvements.py
Executable file
23
src/documents/migrations/1001_workflow_improvements.py
Executable file
@@ -0,0 +1,23 @@
|
|||||||
|
# Generated by Django 2.0.7 on 2018-07-12 09:52
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '0022_auto_20181007_1420'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='document',
|
||||||
|
name='archive_serial_number',
|
||||||
|
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='is_inbox_tag',
|
||||||
|
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
|
||||||
|
),
|
||||||
|
]
|
||||||
33
src/documents/migrations/1002_auto_20180823_1155.py
Normal file
33
src/documents/migrations/1002_auto_20180823_1155.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Generated by Django 2.0.7 on 2018-08-23 11:55
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '1001_workflow_improvements'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='DocumentType',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('name', models.CharField(max_length=128, unique=True)),
|
||||||
|
('slug', models.SlugField(blank=True)),
|
||||||
|
('match', models.CharField(blank=True, max_length=256)),
|
||||||
|
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
||||||
|
('is_insensitive', models.BooleanField(default=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'abstract': False,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='document',
|
||||||
|
name='document_type',
|
||||||
|
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.DocumentType'),
|
||||||
|
),
|
||||||
|
]
|
||||||
77
src/documents/migrations/1003_auto_20180904_1425.py
Normal file
77
src/documents/migrations/1003_auto_20180904_1425.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# Generated by Django 2.0.8 on 2018-09-04 14:25
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
def transfer_automatic_classification(apps, schema_editor):
|
||||||
|
for model_name in ["Tag", "Correspondent", "DocumentType"]:
|
||||||
|
model_class = apps.get_model("documents", model_name)
|
||||||
|
for o in model_class.objects.all():
|
||||||
|
o.automatic_classification = o.match is not None and len(o.match) > 0
|
||||||
|
o.save()
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_automatic_classification(apps, schema_editor):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '1002_auto_20180823_1155'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='correspondent',
|
||||||
|
name='automatic_classification',
|
||||||
|
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='documenttype',
|
||||||
|
name='automatic_classification',
|
||||||
|
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='tag',
|
||||||
|
name='automatic_classification',
|
||||||
|
field=models.BooleanField(default=False, help_text='Automatically assign to newly added documents based on current usage in your document collection.'),
|
||||||
|
),
|
||||||
|
migrations.RunPython(transfer_automatic_classification, reverse_automatic_classification),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='correspondent',
|
||||||
|
name='is_insensitive',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='correspondent',
|
||||||
|
name='match',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='correspondent',
|
||||||
|
name='matching_algorithm',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='documenttype',
|
||||||
|
name='is_insensitive',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='documenttype',
|
||||||
|
name='match',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='documenttype',
|
||||||
|
name='matching_algorithm',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='tag',
|
||||||
|
name='is_insensitive',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='tag',
|
||||||
|
name='match',
|
||||||
|
),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='tag',
|
||||||
|
name='matching_algorithm',
|
||||||
|
),
|
||||||
|
]
|
||||||
36
src/documents/migrations/1004_documenttype_slug.py
Normal file
36
src/documents/migrations/1004_documenttype_slug.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Generated by Django 2.0.8 on 2018-10-07 14:20
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
from django.utils.text import slugify
|
||||||
|
|
||||||
|
|
||||||
|
def re_slug_all_the_things(apps, schema_editor):
|
||||||
|
"""
|
||||||
|
Rewrite all slug values to make sure they're actually slugs before we brand
|
||||||
|
them as uneditable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DocumentType = apps.get_model("documents", "DocumentType")
|
||||||
|
|
||||||
|
for instance in DocumentType.objects.all():
|
||||||
|
DocumentType.objects.filter(
|
||||||
|
pk=instance.pk
|
||||||
|
).update(
|
||||||
|
slug=slugify(instance.slug)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('documents', '1003_auto_20180904_1425'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='documenttype',
|
||||||
|
name='slug',
|
||||||
|
field=models.SlugField(blank=True, editable=False),
|
||||||
|
),
|
||||||
|
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
||||||
|
]
|
||||||
0
src/documents/mixins.py
Normal file → Executable file
0
src/documents/mixins.py
Normal file → Executable file
142
src/documents/models.py
Normal file → Executable file
142
src/documents/models.py
Normal file → Executable file
@@ -24,43 +24,15 @@ except ImportError:
|
|||||||
|
|
||||||
class MatchingModel(models.Model):
|
class MatchingModel(models.Model):
|
||||||
|
|
||||||
MATCH_ANY = 1
|
|
||||||
MATCH_ALL = 2
|
|
||||||
MATCH_LITERAL = 3
|
|
||||||
MATCH_REGEX = 4
|
|
||||||
MATCH_FUZZY = 5
|
|
||||||
MATCHING_ALGORITHMS = (
|
|
||||||
(MATCH_ANY, "Any"),
|
|
||||||
(MATCH_ALL, "All"),
|
|
||||||
(MATCH_LITERAL, "Literal"),
|
|
||||||
(MATCH_REGEX, "Regular Expression"),
|
|
||||||
(MATCH_FUZZY, "Fuzzy Match"),
|
|
||||||
)
|
|
||||||
|
|
||||||
name = models.CharField(max_length=128, unique=True)
|
name = models.CharField(max_length=128, unique=True)
|
||||||
slug = models.SlugField(blank=True, editable=False)
|
slug = models.SlugField(blank=True, editable=False)
|
||||||
|
|
||||||
match = models.CharField(max_length=256, blank=True)
|
automatic_classification = models.BooleanField(
|
||||||
matching_algorithm = models.PositiveIntegerField(
|
default=False,
|
||||||
choices=MATCHING_ALGORITHMS,
|
help_text="Automatically assign to newly added documents based on "
|
||||||
default=MATCH_ANY,
|
"current usage in your document collection."
|
||||||
help_text=(
|
|
||||||
"Which algorithm you want to use when matching text to the OCR'd "
|
|
||||||
"PDF. Here, \"any\" looks for any occurrence of any word "
|
|
||||||
"provided in the PDF, while \"all\" requires that every word "
|
|
||||||
"provided appear in the PDF, albeit not in the order provided. A "
|
|
||||||
"\"literal\" match means that the text you enter must appear in "
|
|
||||||
"the PDF exactly as you've entered it, and \"regular expression\" "
|
|
||||||
"uses a regex to match the PDF. (If you don't know what a regex "
|
|
||||||
"is, you probably don't want this option.) Finally, a \"fuzzy "
|
|
||||||
"match\" looks for words or phrases that are mostly—but not "
|
|
||||||
"exactly—the same, which can be useful for matching against "
|
|
||||||
"documents containg imperfections that foil accurate OCR."
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
is_insensitive = models.BooleanField(default=True)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
ordering = ("name",)
|
ordering = ("name",)
|
||||||
@@ -68,86 +40,8 @@ class MatchingModel(models.Model):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
@property
|
|
||||||
def conditions(self):
|
|
||||||
return "{}: \"{}\" ({})".format(
|
|
||||||
self.name, self.match, self.get_matching_algorithm_display())
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def match_all(cls, text, tags=None):
|
|
||||||
|
|
||||||
if tags is None:
|
|
||||||
tags = cls.objects.all()
|
|
||||||
|
|
||||||
text = text.lower()
|
|
||||||
for tag in tags:
|
|
||||||
if tag.matches(text):
|
|
||||||
yield tag
|
|
||||||
|
|
||||||
def matches(self, text):
|
|
||||||
|
|
||||||
search_kwargs = {}
|
|
||||||
|
|
||||||
# Check that match is not empty
|
|
||||||
if self.match.strip() == "":
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.is_insensitive:
|
|
||||||
search_kwargs = {"flags": re.IGNORECASE}
|
|
||||||
|
|
||||||
if self.matching_algorithm == self.MATCH_ALL:
|
|
||||||
for word in self._split_match():
|
|
||||||
search_result = re.search(
|
|
||||||
r"\b{}\b".format(word), text, **search_kwargs)
|
|
||||||
if not search_result:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
if self.matching_algorithm == self.MATCH_ANY:
|
|
||||||
for word in self._split_match():
|
|
||||||
if re.search(r"\b{}\b".format(word), text, **search_kwargs):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.matching_algorithm == self.MATCH_LITERAL:
|
|
||||||
return bool(re.search(
|
|
||||||
r"\b{}\b".format(self.match), text, **search_kwargs))
|
|
||||||
|
|
||||||
if self.matching_algorithm == self.MATCH_REGEX:
|
|
||||||
return bool(re.search(
|
|
||||||
re.compile(self.match, **search_kwargs), text))
|
|
||||||
|
|
||||||
if self.matching_algorithm == self.MATCH_FUZZY:
|
|
||||||
match = re.sub(r'[^\w\s]', '', self.match)
|
|
||||||
text = re.sub(r'[^\w\s]', '', text)
|
|
||||||
if self.is_insensitive:
|
|
||||||
match = match.lower()
|
|
||||||
text = text.lower()
|
|
||||||
|
|
||||||
return True if fuzz.partial_ratio(match, text) >= 90 else False
|
|
||||||
|
|
||||||
raise NotImplementedError("Unsupported matching algorithm")
|
|
||||||
|
|
||||||
def _split_match(self):
|
|
||||||
"""
|
|
||||||
Splits the match to individual keywords, getting rid of unnecessary
|
|
||||||
spaces and grouping quoted words together.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
' some random words "with quotes " and spaces'
|
|
||||||
==>
|
|
||||||
["some", "random", "words", "with+quotes", "and", "spaces"]
|
|
||||||
"""
|
|
||||||
findterms = re.compile(r'"([^"]+)"|(\S+)').findall
|
|
||||||
normspace = re.compile(r"\s+").sub
|
|
||||||
return [
|
|
||||||
normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
|
||||||
for t in findterms(self.match)
|
|
||||||
]
|
|
||||||
|
|
||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
|
|
||||||
self.match = self.match.lower()
|
|
||||||
self.slug = slugify(self.name)
|
self.slug = slugify(self.name)
|
||||||
|
|
||||||
models.Model.save(self, *args, **kwargs)
|
models.Model.save(self, *args, **kwargs)
|
||||||
@@ -183,6 +77,17 @@ class Tag(MatchingModel):
|
|||||||
|
|
||||||
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
|
colour = models.PositiveIntegerField(choices=COLOURS, default=1)
|
||||||
|
|
||||||
|
is_inbox_tag = models.BooleanField(
|
||||||
|
default=False,
|
||||||
|
help_text="Marks this tag as an inbox tag: All newly consumed "
|
||||||
|
"documents will be tagged with inbox tags."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentType(MatchingModel):
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Document(models.Model):
|
class Document(models.Model):
|
||||||
|
|
||||||
@@ -214,6 +119,14 @@ class Document(models.Model):
|
|||||||
|
|
||||||
title = models.CharField(max_length=128, blank=True, db_index=True)
|
title = models.CharField(max_length=128, blank=True, db_index=True)
|
||||||
|
|
||||||
|
document_type = models.ForeignKey(
|
||||||
|
DocumentType,
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
related_name="documents",
|
||||||
|
on_delete=models.SET_NULL
|
||||||
|
)
|
||||||
|
|
||||||
content = models.TextField(
|
content = models.TextField(
|
||||||
db_index=True,
|
db_index=True,
|
||||||
blank=True,
|
blank=True,
|
||||||
@@ -254,6 +167,15 @@ class Document(models.Model):
|
|||||||
added = models.DateTimeField(
|
added = models.DateTimeField(
|
||||||
default=timezone.now, editable=False, db_index=True)
|
default=timezone.now, editable=False, db_index=True)
|
||||||
|
|
||||||
|
archive_serial_number = models.IntegerField(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
unique=True,
|
||||||
|
db_index=True,
|
||||||
|
help_text="The position of this document in your physical document "
|
||||||
|
"archive."
|
||||||
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ("correspondent", "title")
|
ordering = ("correspondent", "title")
|
||||||
|
|
||||||
|
|||||||
@@ -14,14 +14,18 @@ from django.utils import timezone
|
|||||||
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX.YY.ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX/YY/ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX-YY-ZZZZ with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ.XX.YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ/XX/YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
|
# - ZZZZ-XX-YY with XX + YY being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
|
||||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
# - MONTH ZZZZ, with ZZZZ being 4 digits
|
||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
|
||||||
DATE_REGEX = re.compile(
|
DATE_REGEX = re.compile(
|
||||||
r'\b([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})\b|' +
|
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))\b|' +
|
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))\b|' +
|
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' + # NOQA: E501
|
||||||
r'\b([^\W\d_]{3,9} [0-9]{4})\b'
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|' +
|
||||||
|
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -37,6 +41,7 @@ class DocumentParser:
|
|||||||
|
|
||||||
SCRATCH = settings.SCRATCH_DIR
|
SCRATCH = settings.SCRATCH_DIR
|
||||||
DATE_ORDER = settings.DATE_ORDER
|
DATE_ORDER = settings.DATE_ORDER
|
||||||
|
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
|
||||||
OPTIPNG = settings.OPTIPNG_BINARY
|
OPTIPNG = settings.OPTIPNG_BINARY
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
@@ -75,30 +80,60 @@ class DocumentParser:
|
|||||||
Returns the date of the document.
|
Returns the date of the document.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __parser(ds, date_order):
|
||||||
|
"""
|
||||||
|
Call dateparser.parse with a particular date ordering
|
||||||
|
"""
|
||||||
|
return dateparser.parse(
|
||||||
|
ds,
|
||||||
|
settings={
|
||||||
|
"DATE_ORDER": date_order,
|
||||||
|
"PREFER_DAY_OF_MONTH": "first",
|
||||||
|
"RETURN_AS_TIMEZONE_AWARE":
|
||||||
|
True
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
date = None
|
date = None
|
||||||
date_string = None
|
date_string = None
|
||||||
|
|
||||||
|
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
||||||
|
title = os.path.basename(self.document_path)
|
||||||
|
|
||||||
|
# if filename date parsing is enabled, search there first:
|
||||||
|
if self.FILENAME_DATE_ORDER:
|
||||||
|
self.log("info", "Checking document title for date")
|
||||||
|
for m in re.finditer(DATE_REGEX, title):
|
||||||
|
date_string = m.group(0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
date = __parser(date_string, self.FILENAME_DATE_ORDER)
|
||||||
|
except TypeError:
|
||||||
|
# Skip all matches that do not parse to a proper date
|
||||||
|
continue
|
||||||
|
|
||||||
|
if date is not None and next_year > date.year > 1900:
|
||||||
|
self.log(
|
||||||
|
"info",
|
||||||
|
"Detected document date {} based on string {} "
|
||||||
|
"from document title"
|
||||||
|
"".format(date.isoformat(), date_string)
|
||||||
|
)
|
||||||
|
return date
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# getting text after checking filename will save time if only
|
||||||
|
# looking at the filename instead of the whole text
|
||||||
text = self.get_text()
|
text = self.get_text()
|
||||||
except ParseError:
|
except ParseError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
next_year = timezone.now().year + 5 # Arbitrary 5 year future limit
|
# Iterate through all regex matches in text and try to parse the date
|
||||||
|
|
||||||
# Iterate through all regex matches and try to parse the date
|
|
||||||
for m in re.finditer(DATE_REGEX, text):
|
for m in re.finditer(DATE_REGEX, text):
|
||||||
|
|
||||||
date_string = m.group(0)
|
date_string = m.group(0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
date = dateparser.parse(
|
date = __parser(date_string, self.DATE_ORDER)
|
||||||
date_string,
|
|
||||||
settings={
|
|
||||||
"DATE_ORDER": self.DATE_ORDER,
|
|
||||||
"PREFER_DAY_OF_MONTH": "first",
|
|
||||||
"RETURN_AS_TIMEZONE_AWARE": True
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except TypeError:
|
except TypeError:
|
||||||
# Skip all matches that do not parse to a proper date
|
# Skip all matches that do not parse to a proper date
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -1,13 +1,20 @@
|
|||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
from .models import Correspondent, Tag, Document, Log
|
from .models import Correspondent, Tag, Document, Log, DocumentType
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
class CorrespondentSerializer(serializers.HyperlinkedModelSerializer):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Correspondent
|
model = Correspondent
|
||||||
fields = ("id", "slug", "name")
|
fields = ("id", "slug", "name", "automatic_classification")
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeSerializer(serializers.HyperlinkedModelSerializer):
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
model = DocumentType
|
||||||
|
fields = ("id", "slug", "name", "automatic_classification")
|
||||||
|
|
||||||
|
|
||||||
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
class TagSerializer(serializers.HyperlinkedModelSerializer):
|
||||||
@@ -15,7 +22,7 @@ class TagSerializer(serializers.HyperlinkedModelSerializer):
|
|||||||
class Meta:
|
class Meta:
|
||||||
model = Tag
|
model = Tag
|
||||||
fields = (
|
fields = (
|
||||||
"id", "slug", "name", "colour", "match", "matching_algorithm")
|
"id", "slug", "name", "colour", "automatic_classification")
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
class CorrespondentField(serializers.HyperlinkedRelatedField):
|
||||||
@@ -28,17 +35,25 @@ class TagsField(serializers.HyperlinkedRelatedField):
|
|||||||
return Tag.objects.all()
|
return Tag.objects.all()
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeField(serializers.HyperlinkedRelatedField):
|
||||||
|
def get_queryset(self):
|
||||||
|
return DocumentType.objects.all()
|
||||||
|
|
||||||
|
|
||||||
class DocumentSerializer(serializers.ModelSerializer):
|
class DocumentSerializer(serializers.ModelSerializer):
|
||||||
|
|
||||||
correspondent = CorrespondentField(
|
correspondent = CorrespondentField(
|
||||||
view_name="drf:correspondent-detail", allow_null=True)
|
view_name="drf:correspondent-detail", allow_null=True)
|
||||||
tags = TagsField(view_name="drf:tag-detail", many=True)
|
tags = TagsField(view_name="drf:tag-detail", many=True)
|
||||||
|
document_type = DocumentTypeField(
|
||||||
|
view_name="drf:documenttype-detail", allow_null=True)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Document
|
model = Document
|
||||||
fields = (
|
fields = (
|
||||||
"id",
|
"id",
|
||||||
"correspondent",
|
"correspondent",
|
||||||
|
"document_type",
|
||||||
"title",
|
"title",
|
||||||
"content",
|
"content",
|
||||||
"file_type",
|
"file_type",
|
||||||
|
|||||||
59
src/documents/signals/handlers.py
Normal file → Executable file
59
src/documents/signals/handlers.py
Normal file → Executable file
@@ -8,57 +8,36 @@ from django.contrib.auth.models import User
|
|||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from ..models import Correspondent, Document, Tag
|
from documents.classifier import DocumentClassifier
|
||||||
|
from ..models import Document, Tag
|
||||||
|
|
||||||
|
|
||||||
def logger(message, group):
|
def logger(message, group):
|
||||||
logging.getLogger(__name__).debug(message, extra={"group": group})
|
logging.getLogger(__name__).debug(message, extra={"group": group})
|
||||||
|
|
||||||
|
|
||||||
def set_correspondent(sender, document=None, logging_group=None, **kwargs):
|
classifier = DocumentClassifier()
|
||||||
|
|
||||||
# No sense in assigning a correspondent when one is already set.
|
|
||||||
if document.correspondent:
|
|
||||||
return
|
|
||||||
|
|
||||||
# No matching correspondents, so no need to continue
|
def classify_document(sender, document=None, logging_group=None, **kwargs):
|
||||||
potential_correspondents = list(Correspondent.match_all(document.content))
|
global classifier
|
||||||
if not potential_correspondents:
|
try:
|
||||||
return
|
classifier.reload()
|
||||||
|
classifier.classify_document(
|
||||||
potential_count = len(potential_correspondents)
|
document,
|
||||||
selected = potential_correspondents[0]
|
classify_correspondent=True,
|
||||||
if potential_count > 1:
|
classify_tags=True,
|
||||||
message = "Detected {} potential correspondents, so we've opted for {}"
|
classify_document_type=True
|
||||||
logger(
|
)
|
||||||
message.format(potential_count, selected),
|
except FileNotFoundError:
|
||||||
logging_group
|
logging.getLogger(__name__).fatal(
|
||||||
|
"Cannot classify document, classifier model file was not found."
|
||||||
)
|
)
|
||||||
|
|
||||||
logger(
|
|
||||||
'Assigning correspondent "{}" to "{}" '.format(selected, document),
|
|
||||||
logging_group
|
|
||||||
)
|
|
||||||
|
|
||||||
document.correspondent = selected
|
def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
||||||
document.save(update_fields=("correspondent",))
|
inbox_tags = Tag.objects.filter(is_inbox_tag=True)
|
||||||
|
document.tags.add(*inbox_tags)
|
||||||
|
|
||||||
def set_tags(sender, document=None, logging_group=None, **kwargs):
|
|
||||||
|
|
||||||
current_tags = set(document.tags.all())
|
|
||||||
relevant_tags = set(Tag.match_all(document.content)) - current_tags
|
|
||||||
|
|
||||||
if not relevant_tags:
|
|
||||||
return
|
|
||||||
|
|
||||||
message = 'Tagging "{}" with "{}"'
|
|
||||||
logger(
|
|
||||||
message.format(document, ", ".join([t.slug for t in relevant_tags])),
|
|
||||||
logging_group
|
|
||||||
)
|
|
||||||
|
|
||||||
document.tags.add(*relevant_tags)
|
|
||||||
|
|
||||||
|
|
||||||
def run_pre_consume_script(sender, filename, **kwargs):
|
def run_pre_consume_script(sender, filename, **kwargs):
|
||||||
|
|||||||
13922
src/documents/static/documents/js/pdf.js
Executable file
13922
src/documents/static/documents/js/pdf.js
Executable file
File diff suppressed because it is too large
Load Diff
1
src/documents/static/documents/js/pdf.js.map
Executable file
1
src/documents/static/documents/js/pdf.js.map
Executable file
File diff suppressed because one or more lines are too long
41660
src/documents/static/documents/js/pdf.worker.js
vendored
Executable file
41660
src/documents/static/documents/js/pdf.worker.js
vendored
Executable file
File diff suppressed because it is too large
Load Diff
1
src/documents/static/documents/js/pdf.worker.js.map
vendored
Executable file
1
src/documents/static/documents/js/pdf.worker.js.map
vendored
Executable file
File diff suppressed because one or more lines are too long
66
src/documents/static/js/colours.js
Normal file
66
src/documents/static/js/colours.js
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
// The following jQuery snippet will add a small square next to the selection
|
||||||
|
// drop-down on the `Add tag` page that will update to show the selected tag
|
||||||
|
// color as the drop-down value is changed.
|
||||||
|
|
||||||
|
django.jQuery(document).ready(function(){
|
||||||
|
|
||||||
|
if (django.jQuery("#id_colour").length) {
|
||||||
|
|
||||||
|
let colour;
|
||||||
|
let colour_num;
|
||||||
|
|
||||||
|
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||||
|
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||||
|
django.jQuery('#id_colour').after('<div class="colour_square"></div>');
|
||||||
|
|
||||||
|
django.jQuery('.colour_square').css({
|
||||||
|
'float': 'left',
|
||||||
|
'width': '20px',
|
||||||
|
'height': '20px',
|
||||||
|
'margin': '5px',
|
||||||
|
'border': '1px solid rgba(0, 0, 0, .2)',
|
||||||
|
'background': colour
|
||||||
|
});
|
||||||
|
|
||||||
|
django.jQuery('#id_colour').change(function () {
|
||||||
|
colour_num = django.jQuery("#id_colour").val() - 1;
|
||||||
|
colour = django.jQuery('#id_colour')[0][colour_num].text;
|
||||||
|
django.jQuery('.colour_square').css({'background': colour});
|
||||||
|
});
|
||||||
|
|
||||||
|
} else if (django.jQuery("select[id*='colour']").length) {
|
||||||
|
|
||||||
|
django.jQuery('select[id*="-colour"]').each(function (index, element) {
|
||||||
|
let id;
|
||||||
|
let loop_colour_num;
|
||||||
|
let loop_colour;
|
||||||
|
|
||||||
|
id = "colour_square_" + index;
|
||||||
|
django.jQuery(element).after('<div class="colour_square" id="' + id + '"></div>');
|
||||||
|
|
||||||
|
loop_colour_num = django.jQuery(element).val() - 1;
|
||||||
|
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||||
|
|
||||||
|
django.jQuery("<style type='text/css'>\
|
||||||
|
.colour_square{ \
|
||||||
|
float: left; \
|
||||||
|
width: 20px; \
|
||||||
|
height: 20px; \
|
||||||
|
margin: 5px; \
|
||||||
|
border: 1px solid rgba(0,0,0,.2); \
|
||||||
|
} </style>").appendTo("head");
|
||||||
|
django.jQuery('#' + id).css({'background': loop_colour});
|
||||||
|
|
||||||
|
console.log(id, loop_colour_num, loop_colour);
|
||||||
|
|
||||||
|
django.jQuery(element).change(function () {
|
||||||
|
loop_colour_num = django.jQuery(element).val() - 1;
|
||||||
|
loop_colour = django.jQuery(element)[0][loop_colour_num].text;
|
||||||
|
django.jQuery('#' + id).css({'background': loop_colour});
|
||||||
|
console.log('#' + id, loop_colour)
|
||||||
|
});
|
||||||
|
})
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
13
src/documents/static/paperless.css
Normal file → Executable file
13
src/documents/static/paperless.css
Normal file → Executable file
@@ -21,3 +21,16 @@ td a.tag {
|
|||||||
width: 90%;
|
width: 90%;
|
||||||
height: 5em;
|
height: 5em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#change_form_twocolumn_parent {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
#change_form_form_parent {
|
||||||
|
flex:50%;
|
||||||
|
margin-right: 10px;
|
||||||
|
}
|
||||||
|
#change_form_viewer_parent {
|
||||||
|
flex:50%;
|
||||||
|
margin-left: 10px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
21
src/documents/templates/admin/documents/document/change_form.html
Normal file → Executable file
21
src/documents/templates/admin/documents/document/change_form.html
Normal file → Executable file
@@ -4,6 +4,27 @@
|
|||||||
|
|
||||||
{{ block.super }}
|
{{ block.super }}
|
||||||
|
|
||||||
|
{% if file_type in "pdf jpg png" %}
|
||||||
|
|
||||||
|
<div id="change_form_twocolumn_parent">
|
||||||
|
<div id="change_form_form_parent"></div>
|
||||||
|
<div id="change_form_viewer_parent">
|
||||||
|
{% if file_type == "pdf" %}
|
||||||
|
{% include "admin/documents/document/viewers/viewer_pdf.html" %}
|
||||||
|
{% endif %}
|
||||||
|
{% if file_type in "jpg png" %}
|
||||||
|
{% include "admin/documents/document/viewers/viewer_image.html" %}
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
django.jQuery("#change_form_form_parent").append(django.jQuery("#document_form"));
|
||||||
|
django.jQuery("#content-main").append(django.jQuery("#change_form_twocolumn_parent"));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if next_object %}
|
{% if next_object %}
|
||||||
<script type="text/javascript">//<![CDATA[
|
<script type="text/javascript">//<![CDATA[
|
||||||
(function($){
|
(function($){
|
||||||
|
|||||||
27
src/documents/templates/admin/documents/document/change_list_results.html
Normal file → Executable file
27
src/documents/templates/admin/documents/document/change_list_results.html
Normal file → Executable file
@@ -24,7 +24,8 @@
|
|||||||
border: 1px solid #cccccc;
|
border: 1px solid #cccccc;
|
||||||
border-radius: 2%;
|
border-radius: 2%;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
height: 300px;
|
height: 350px;
|
||||||
|
position: relative;
|
||||||
}
|
}
|
||||||
.result .header {
|
.result .header {
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
@@ -60,6 +61,11 @@
|
|||||||
.result a.tag {
|
.result a.tag {
|
||||||
color: #ffffff;
|
color: #ffffff;
|
||||||
}
|
}
|
||||||
|
.result .documentType {
|
||||||
|
padding: 5px;
|
||||||
|
background-color: #eeeeee;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
.result .date {
|
.result .date {
|
||||||
padding: 5px;
|
padding: 5px;
|
||||||
}
|
}
|
||||||
@@ -79,6 +85,15 @@
|
|||||||
.result .image img {
|
.result .image img {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
|
.result .footer {
|
||||||
|
position: absolute;
|
||||||
|
bottom: 0;
|
||||||
|
right: 0;
|
||||||
|
border-left: 1px solid #cccccc;
|
||||||
|
border-top: 1px solid #cccccc;
|
||||||
|
padding: 4px 10px 4px 10px;
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
.grid {
|
.grid {
|
||||||
margin-right: 260px;
|
margin-right: 260px;
|
||||||
@@ -152,7 +167,9 @@
|
|||||||
{# 4: Image #}
|
{# 4: Image #}
|
||||||
{# 5: Correspondent #}
|
{# 5: Correspondent #}
|
||||||
{# 6: Tags #}
|
{# 6: Tags #}
|
||||||
{# 7: Document edit url #}
|
{# 7: Archive serial number #}
|
||||||
|
{# 8: Document type #}
|
||||||
|
{# 9: Document edit url #}
|
||||||
<div class="box">
|
<div class="box">
|
||||||
<div class="result">
|
<div class="result">
|
||||||
<div class="header">
|
<div class="header">
|
||||||
@@ -166,7 +183,7 @@
|
|||||||
selection would not be possible with mouse click + drag. Instead,
|
selection would not be possible with mouse click + drag. Instead,
|
||||||
the underlying link would be dragged.
|
the underlying link would be dragged.
|
||||||
{% endcomment %}
|
{% endcomment %}
|
||||||
<div class="headerLink" onclick="location.href='{{ result.7 }}';"></div>
|
<div class="headerLink" onclick="location.href='{{ result.9 }}';"></div>
|
||||||
<div class="checkbox">{{ result.0 }}</div>
|
<div class="checkbox">{{ result.0 }}</div>
|
||||||
<div class="info">
|
<div class="info">
|
||||||
{{ result.5 }}
|
{{ result.5 }}
|
||||||
@@ -174,10 +191,14 @@
|
|||||||
{{ result.1 }}
|
{{ result.1 }}
|
||||||
<div style="clear: both;"></div>
|
<div style="clear: both;"></div>
|
||||||
</div>
|
</div>
|
||||||
|
{% if '>-<' not in result.8 %}<div class="documentType">{{ result.8 }}</div>{% endif %}
|
||||||
<div class="tags">{{ result.6 }}</div>
|
<div class="tags">{{ result.6 }}</div>
|
||||||
<div class="date">{{ result.2 }}</div>
|
<div class="date">{{ result.2 }}</div>
|
||||||
<div style="clear: both;"></div>
|
<div style="clear: both;"></div>
|
||||||
<div class="image">{{ result.4 }}</div>
|
<div class="image">{{ result.4 }}</div>
|
||||||
|
{# Only show the archive serial number if it is set on the document. #}
|
||||||
|
{# checking for >-< (i.e., will a dash be displayed) doesn't feel like a very good solution to me. #}
|
||||||
|
{% if '>-<' not in result.7 %}<div class="footer">#{{ result.7 }}</div>{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|||||||
0
src/documents/templates/admin/documents/document/select_object.html
Normal file → Executable file
0
src/documents/templates/admin/documents/document/select_object.html
Normal file → Executable file
@@ -0,0 +1 @@
|
|||||||
|
<img src="{{download_url}}" style="max-width: 100%">
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
{% load static %}
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<input id="prev" value="Previous" class="default" type="button">
|
||||||
|
<input id="next" value="Next" class="default" type="button">
|
||||||
|
|
||||||
|
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
|
||||||
|
|
||||||
|
<input id="zoomin" value="+" class="default" type="button">
|
||||||
|
<input id="zoomout" value="-" class="default" type="button">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="width: 100%; overflow: auto;">
|
||||||
|
<canvas id="the-canvas"></canvas>
|
||||||
|
</div>
|
||||||
|
<script type="text/javascript" src="{% static 'documents/js/pdf.js' %}"></script>
|
||||||
|
<script type="text/javascript" src="{% static 'documents/js/pdf.worker.js' %}"></script>
|
||||||
|
|
||||||
|
{# Load and display PDF document#}
|
||||||
|
<script>
|
||||||
|
var pdfjsLib = window['pdfjs-dist/build/pdf'];
|
||||||
|
|
||||||
|
var pdfDoc = null,
|
||||||
|
pageNum = 1,
|
||||||
|
pageRendering = false,
|
||||||
|
pageNumPending = null,
|
||||||
|
scale = 1.0,
|
||||||
|
canvas = document.getElementById('the-canvas'),
|
||||||
|
ctx = canvas.getContext('2d');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get page info from document, resize canvas accordingly, and render page.
|
||||||
|
* @param num Page number.
|
||||||
|
*/
|
||||||
|
function renderPage(num) {
|
||||||
|
pageRendering = true;
|
||||||
|
// Using promise to fetch the page
|
||||||
|
pdfDoc.getPage(num).then(function(page) {
|
||||||
|
var viewport = page.getViewport(scale);
|
||||||
|
canvas.height = viewport.height;
|
||||||
|
canvas.width = viewport.width;
|
||||||
|
// Render PDF page into canvas context
|
||||||
|
var renderContext = {
|
||||||
|
canvasContext: ctx,
|
||||||
|
viewport: viewport
|
||||||
|
};
|
||||||
|
var renderTask = page.render(renderContext);
|
||||||
|
// Wait for rendering to finish
|
||||||
|
renderTask.promise.then(function () {
|
||||||
|
pageRendering = false;
|
||||||
|
if (pageNumPending !== null) {
|
||||||
|
// New page rendering is pending
|
||||||
|
renderPage(pageNumPending);
|
||||||
|
pageNumPending = null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
// Update page counters
|
||||||
|
document.getElementById('page_num').textContent = num;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If another page rendering in progress, waits until the rendering is
|
||||||
|
* finised. Otherwise, executes rendering immediately.
|
||||||
|
*/
|
||||||
|
function queueRenderPage(num) {
|
||||||
|
if (pageRendering) {
|
||||||
|
pageNumPending = num;
|
||||||
|
} else {
|
||||||
|
renderPage(num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Displays previous page.
|
||||||
|
*/
|
||||||
|
function onPrevPage() {
|
||||||
|
if (pageNum <= 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pageNum--;
|
||||||
|
queueRenderPage(pageNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('prev').addEventListener('click', onPrevPage);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Displays next page.
|
||||||
|
*/
|
||||||
|
function onNextPage() {
|
||||||
|
if (pageNum >= pdfDoc.numPages) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
pageNum++;
|
||||||
|
queueRenderPage(pageNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('next').addEventListener('click', onNextPage);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Displays next page.
|
||||||
|
*/
|
||||||
|
function onZoomIn() {
|
||||||
|
scale *= 1.2;
|
||||||
|
queueRenderPage(pageNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('zoomin').addEventListener('click', onZoomIn);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Displays next page.
|
||||||
|
*/
|
||||||
|
function onZoomOut() {
|
||||||
|
scale /= 1.2;
|
||||||
|
queueRenderPage(pageNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('zoomout').addEventListener('click', onZoomOut);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asynchronously downloads PDF.
|
||||||
|
*/
|
||||||
|
pdfjsLib.getDocument("{{download_url}}").then(function (pdfDoc_) {
|
||||||
|
pdfDoc = pdfDoc_;
|
||||||
|
document.getElementById('page_count').textContent = pdfDoc.numPages;
|
||||||
|
// Initial/first page rendering
|
||||||
|
renderPage(pageNum);
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
24
src/documents/views.py
Normal file → Executable file
24
src/documents/views.py
Normal file → Executable file
@@ -20,14 +20,21 @@ from rest_framework.viewsets import (
|
|||||||
ReadOnlyModelViewSet
|
ReadOnlyModelViewSet
|
||||||
)
|
)
|
||||||
|
|
||||||
from .filters import CorrespondentFilterSet, DocumentFilterSet, TagFilterSet
|
from .filters import (
|
||||||
|
CorrespondentFilterSet,
|
||||||
|
DocumentFilterSet,
|
||||||
|
TagFilterSet,
|
||||||
|
DocumentTypeFilterSet
|
||||||
|
)
|
||||||
|
|
||||||
from .forms import UploadForm
|
from .forms import UploadForm
|
||||||
from .models import Correspondent, Document, Log, Tag
|
from .models import Correspondent, Document, Log, Tag, DocumentType
|
||||||
from .serialisers import (
|
from .serialisers import (
|
||||||
CorrespondentSerializer,
|
CorrespondentSerializer,
|
||||||
DocumentSerializer,
|
DocumentSerializer,
|
||||||
LogSerializer,
|
LogSerializer,
|
||||||
TagSerializer
|
TagSerializer,
|
||||||
|
DocumentTypeSerializer
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -116,6 +123,17 @@ class TagViewSet(ModelViewSet):
|
|||||||
ordering_fields = ("name", "slug")
|
ordering_fields = ("name", "slug")
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentTypeViewSet(ModelViewSet):
|
||||||
|
model = DocumentType
|
||||||
|
queryset = DocumentType.objects.all()
|
||||||
|
serializer_class = DocumentTypeSerializer
|
||||||
|
pagination_class = StandardPagination
|
||||||
|
permission_classes = (IsAuthenticated,)
|
||||||
|
filter_backends = (DjangoFilterBackend, OrderingFilter)
|
||||||
|
filter_class = DocumentTypeFilterSet
|
||||||
|
ordering_fields = ("name", "slug")
|
||||||
|
|
||||||
|
|
||||||
class DocumentViewSet(RetrieveModelMixin,
|
class DocumentViewSet(RetrieveModelMixin,
|
||||||
UpdateModelMixin,
|
UpdateModelMixin,
|
||||||
DestroyModelMixin,
|
DestroyModelMixin,
|
||||||
|
|||||||
0
src/manage.py
Executable file → Normal file
0
src/manage.py
Executable file → Normal file
27
src/paperless/settings.py
Normal file → Executable file
27
src/paperless/settings.py
Normal file → Executable file
@@ -144,14 +144,18 @@ DATABASES = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if os.getenv("PAPERLESS_DBUSER"):
|
if os.getenv("PAPERLESS_DBENGINE"):
|
||||||
DATABASES["default"] = {
|
DATABASES["default"] = {
|
||||||
"ENGINE": "django.db.backends.postgresql_psycopg2",
|
"ENGINE": os.getenv("PAPERLESS_DBENGINE"),
|
||||||
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
"NAME": os.getenv("PAPERLESS_DBNAME", "paperless"),
|
||||||
"USER": os.getenv("PAPERLESS_DBUSER"),
|
"USER": os.getenv("PAPERLESS_DBUSER"),
|
||||||
}
|
}
|
||||||
if os.getenv("PAPERLESS_DBPASS"):
|
if os.getenv("PAPERLESS_DBPASS"):
|
||||||
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
DATABASES["default"]["PASSWORD"] = os.getenv("PAPERLESS_DBPASS")
|
||||||
|
if os.getenv("PAPERLESS_DBHOST"):
|
||||||
|
DATABASES["default"]["HOST"] = os.getenv("PAPERLESS_DBHOST")
|
||||||
|
if os.getenv("PAPERLESS_DBPORT"):
|
||||||
|
DATABASES["default"]["PORT"] = os.getenv("PAPERLESS_DBPORT")
|
||||||
|
|
||||||
|
|
||||||
# Password validation
|
# Password validation
|
||||||
@@ -199,6 +203,24 @@ STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", "/static/")
|
|||||||
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
MEDIA_URL = os.getenv("PAPERLESS_MEDIA_URL", "/media/")
|
||||||
|
|
||||||
|
|
||||||
|
# Other
|
||||||
|
|
||||||
|
# Disable Django's artificial limit on the number of form fields to submit at
|
||||||
|
# once. This is a protection against overloading the server, but since this is
|
||||||
|
# a self-hosted sort of gig, the benefits of being able to mass-delete a tonne
|
||||||
|
# of log entries outweight the benefits of such a safeguard.
|
||||||
|
|
||||||
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
||||||
|
|
||||||
|
|
||||||
|
# Document classification models location
|
||||||
|
MODEL_FILE = os.getenv(
|
||||||
|
"PAPERLESS_MODEL_FILE", os.path.join(
|
||||||
|
BASE_DIR, "..", "models", "model.pickle"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Paperless-specific stuff
|
# Paperless-specific stuff
|
||||||
# You shouldn't have to edit any of these values. Rather, you can set these
|
# You shouldn't have to edit any of these values. Rather, you can set these
|
||||||
# values in /etc/paperless.conf instead.
|
# values in /etc/paperless.conf instead.
|
||||||
@@ -296,6 +318,7 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_YEAR_END")
|
|||||||
|
|
||||||
# Specify the default date order (for autodetected dates)
|
# Specify the default date order (for autodetected dates)
|
||||||
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
||||||
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
||||||
|
|
||||||
# Specify for how many years a correspondent is considered recent. Recent
|
# Specify for how many years a correspondent is considered recent. Recent
|
||||||
# correspondents will be shown in a separate "Recent correspondents" filter as
|
# correspondents will be shown in a separate "Recent correspondents" filter as
|
||||||
|
|||||||
4
src/paperless/urls.py
Normal file → Executable file
4
src/paperless/urls.py
Normal file → Executable file
@@ -12,12 +12,14 @@ from documents.views import (
|
|||||||
FetchView,
|
FetchView,
|
||||||
LogViewSet,
|
LogViewSet,
|
||||||
PushView,
|
PushView,
|
||||||
TagViewSet
|
TagViewSet,
|
||||||
|
DocumentTypeViewSet
|
||||||
)
|
)
|
||||||
from reminders.views import ReminderViewSet
|
from reminders.views import ReminderViewSet
|
||||||
|
|
||||||
router = DefaultRouter()
|
router = DefaultRouter()
|
||||||
router.register(r"correspondents", CorrespondentViewSet)
|
router.register(r"correspondents", CorrespondentViewSet)
|
||||||
|
router.register(r"document_types", DocumentTypeViewSet)
|
||||||
router.register(r"documents", DocumentViewSet)
|
router.register(r"documents", DocumentViewSet)
|
||||||
router.register(r"logs", LogViewSet)
|
router.register(r"logs", LogViewSet)
|
||||||
router.register(r"reminders", ReminderViewSet)
|
router.register(r"reminders", ReminderViewSet)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = (2, 5, 0)
|
__version__ = (1, 0, 0)
|
||||||
|
|||||||
@@ -153,7 +153,10 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
)
|
)
|
||||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||||
return raw_text
|
return raw_text
|
||||||
raise OCRError("Language detection failed")
|
error_msg = ("Language detection failed. Set "
|
||||||
|
"PAPERLESS_FORGIVING_OCR in config file to continue "
|
||||||
|
"anyway.")
|
||||||
|
raise OCRError(error_msg)
|
||||||
|
|
||||||
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
if ISO639[guessed_language] == self.DEFAULT_OCR_LANGUAGE:
|
||||||
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
raw_text = self._assemble_ocr_sections(imgs, middle, raw_text)
|
||||||
@@ -218,7 +221,8 @@ def run_convert(*args):
|
|||||||
|
|
||||||
def run_unpaper(args):
|
def run_unpaper(args):
|
||||||
unpaper, pnm = args
|
unpaper, pnm = args
|
||||||
command_args = unpaper, pnm, pnm.replace(".pnm", ".unpaper.pnm")
|
command_args = (unpaper, "--overwrite", pnm,
|
||||||
|
pnm.replace(".pnm", ".unpaper.pnm"))
|
||||||
if not subprocess.Popen(command_args).wait() == 0:
|
if not subprocess.Popen(command_args).wait() == 0:
|
||||||
raise ParseError("Unpaper failed at {}".format(command_args))
|
raise ParseError("Unpaper failed at {}".format(command_args))
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 136 KiB |
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 138 KiB After Width: | Height: | Size: 55 KiB |
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 138 KiB After Width: | Height: | Size: 53 KiB |
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 136 KiB |
@@ -5,9 +5,10 @@ from unittest import mock
|
|||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
from django.test import TestCase
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from ..parsers import RasterisedDocumentParser
|
from ..parsers import RasterisedDocumentParser
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
class TestDate(TestCase):
|
class TestDate(TestCase):
|
||||||
@@ -59,9 +60,13 @@ class TestDate(TestCase):
|
|||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
document._text = "lorem ipsum 13.02.2018 lorem ipsum"
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 2, 13, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -72,10 +77,16 @@ class TestDate(TestCase):
|
|||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = (
|
document._text = (
|
||||||
"lorem ipsum 130218, 2018, 20180213 and 13.02.2018 lorem ipsum")
|
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
||||||
|
"ipsum"
|
||||||
|
)
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 2, 13, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -110,9 +121,13 @@ class TestDate(TestCase):
|
|||||||
"März 2019\n"
|
"März 2019\n"
|
||||||
"lorem ipsum"
|
"lorem ipsum"
|
||||||
)
|
)
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2019, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -122,19 +137,25 @@ class TestDate(TestCase):
|
|||||||
def test_date_format_8(self):
|
def test_date_format_8(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = ("lorem ipsum\n"
|
document._text = (
|
||||||
"Wohnort\n"
|
"lorem ipsum\n"
|
||||||
"3100\n"
|
"Wohnort\n"
|
||||||
"IBAN\n"
|
"3100\n"
|
||||||
"AT87 4534\n"
|
"IBAN\n"
|
||||||
"1234\n"
|
"AT87 4534\n"
|
||||||
"1234 5678\n"
|
"1234\n"
|
||||||
"BIC\n"
|
"1234 5678\n"
|
||||||
"lorem ipsum\n"
|
"BIC\n"
|
||||||
"März 2020")
|
"lorem ipsum\n"
|
||||||
self.assertEqual(document.get_date(),
|
"März 2020"
|
||||||
datetime.datetime(2020, 3, 1, 0, 0,
|
)
|
||||||
tzinfo=tz.tzutc()))
|
self.assertEqual(
|
||||||
|
document.get_date(),
|
||||||
|
datetime.datetime(
|
||||||
|
2020, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
@@ -143,13 +164,19 @@ class TestDate(TestCase):
|
|||||||
def test_date_format_9(self):
|
def test_date_format_9(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "")
|
input_file = os.path.join(self.SAMPLE_FILES, "")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
document._text = ("lorem ipsum\n"
|
document._text = (
|
||||||
"27. Nullmonth 2020\n"
|
"lorem ipsum\n"
|
||||||
"März 2020\n"
|
"27. Nullmonth 2020\n"
|
||||||
"lorem ipsum")
|
"März 2020\n"
|
||||||
self.assertEqual(document.get_date(),
|
"lorem ipsum"
|
||||||
datetime.datetime(2020, 3, 1, 0, 0,
|
)
|
||||||
tzinfo=tz.tzutc()))
|
self.assertEqual(
|
||||||
|
document.get_date(),
|
||||||
|
datetime.datetime(
|
||||||
|
2020, 3, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
@@ -158,11 +185,16 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_1_pdf(self):
|
def test_get_text_1_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
|
date = document.get_date()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
date,
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 4, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -172,11 +204,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_1_png(self):
|
def test_get_text_1_png(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_1.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 4, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -186,11 +222,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_2_pdf(self):
|
def test_get_text_2_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2013, 2, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -200,67 +240,91 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_2_png(self):
|
def test_get_text_2_png(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_2.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2013, 2, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2013, 2, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
SCRATCH
|
SCRATCH
|
||||||
)
|
)
|
||||||
|
@override_settings(OCR_LANGUAGE="deu")
|
||||||
def test_get_text_3_pdf(self):
|
def test_get_text_3_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 10, 5, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
SCRATCH
|
SCRATCH
|
||||||
)
|
)
|
||||||
|
@override_settings(OCR_LANGUAGE="deu")
|
||||||
def test_get_text_3_png(self):
|
def test_get_text_3_png(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_3.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 10, 5, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
SCRATCH
|
SCRATCH
|
||||||
)
|
)
|
||||||
|
@override_settings(OCR_LANGUAGE="eng")
|
||||||
def test_get_text_4_pdf(self):
|
def test_get_text_4_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 10, 5, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
SCRATCH
|
SCRATCH
|
||||||
)
|
)
|
||||||
|
@override_settings(OCR_LANGUAGE="eng")
|
||||||
def test_get_text_4_png(self):
|
def test_get_text_4_png(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_4.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 10, 5, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 10, 5, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -270,11 +334,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_5_pdf(self):
|
def test_get_text_5_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 12, 17, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -284,11 +352,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_5_png(self):
|
def test_get_text_5_png(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_5.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 12, 17, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -303,7 +375,10 @@ class TestDate(TestCase):
|
|||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 12, 17, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -318,7 +393,10 @@ class TestDate(TestCase):
|
|||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 12, 17, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 12, 17, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -328,6 +406,7 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_6_pdf_eu(self):
|
def test_get_text_6_pdf_eu(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
@@ -339,6 +418,7 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_6_png_eu(self):
|
def test_get_text_6_png_eu(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_6.png")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), False)
|
self.assertEqual(document._is_ocred(), False)
|
||||||
self.assertEqual(document.get_date(), None)
|
self.assertEqual(document.get_date(), None)
|
||||||
@@ -350,11 +430,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_7_pdf(self):
|
def test_get_text_7_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_7.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2018, 4, 1, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2018, 4, 1, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -364,11 +448,15 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_8_pdf(self):
|
def test_get_text_8_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_8.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2017, 12, 31, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
@@ -378,11 +466,100 @@ class TestDate(TestCase):
|
|||||||
def test_get_text_9_pdf(self):
|
def test_get_text_9_pdf(self):
|
||||||
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
|
input_file = os.path.join(self.SAMPLE_FILES, "tests_date_9.pdf")
|
||||||
document = RasterisedDocumentParser(input_file)
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.DATE_ORDER = 'DMY'
|
||||||
document.get_text()
|
document.get_text()
|
||||||
self.assertEqual(document._is_ocred(), True)
|
self.assertEqual(document._is_ocred(), True)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
document.get_date(),
|
document.get_date(),
|
||||||
datetime.datetime(2017, 12, 31, 0, 0, tzinfo=tz.tzutc())
|
datetime.datetime(
|
||||||
|
2017, 12, 31, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch(
|
||||||
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
|
SCRATCH
|
||||||
|
)
|
||||||
|
def test_filename_date_1_pdf(self):
|
||||||
|
input_file = os.path.join(
|
||||||
|
self.SAMPLE_FILES,
|
||||||
|
"tests_date_in_filename_2018-03-20_1.pdf"
|
||||||
|
)
|
||||||
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.FILENAME_DATE_ORDER = 'YMD'
|
||||||
|
document.get_text()
|
||||||
|
date = document.get_date()
|
||||||
|
self.assertEqual(document._is_ocred(), True)
|
||||||
|
self.assertEqual(
|
||||||
|
date,
|
||||||
|
datetime.datetime(
|
||||||
|
2018, 3, 20, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch(
|
||||||
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
|
SCRATCH
|
||||||
|
)
|
||||||
|
def test_filename_date_1_png(self):
|
||||||
|
input_file = os.path.join(
|
||||||
|
self.SAMPLE_FILES,
|
||||||
|
"tests_date_in_filename_2018-03-20_1.png"
|
||||||
|
)
|
||||||
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.FILENAME_DATE_ORDER = 'YMD'
|
||||||
|
date = document.get_date()
|
||||||
|
self.assertEqual(document._is_ocred(), False)
|
||||||
|
self.assertEqual(
|
||||||
|
date,
|
||||||
|
datetime.datetime(
|
||||||
|
2018, 3, 20, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch(
|
||||||
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
|
SCRATCH
|
||||||
|
)
|
||||||
|
def test_filename_date_2_pdf(self):
|
||||||
|
input_file = os.path.join(
|
||||||
|
self.SAMPLE_FILES,
|
||||||
|
"2013-12-11_tests_date_in_filename_2.pdf"
|
||||||
|
)
|
||||||
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.FILENAME_DATE_ORDER = 'YMD'
|
||||||
|
date = document.get_date()
|
||||||
|
self.assertEqual(document._is_ocred(), True)
|
||||||
|
self.assertEqual(
|
||||||
|
date,
|
||||||
|
datetime.datetime(
|
||||||
|
2013, 12, 11, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
@mock.patch(
|
||||||
|
"paperless_tesseract.parsers.RasterisedDocumentParser.SCRATCH",
|
||||||
|
SCRATCH
|
||||||
|
)
|
||||||
|
def test_filename_date_2_png(self):
|
||||||
|
input_file = os.path.join(
|
||||||
|
self.SAMPLE_FILES,
|
||||||
|
"2013-12-11_tests_date_in_filename_2.png"
|
||||||
|
)
|
||||||
|
document = RasterisedDocumentParser(input_file)
|
||||||
|
document.FILENAME_DATE_ORDER = 'YMD'
|
||||||
|
date = document.get_date()
|
||||||
|
self.assertEqual(document._is_ocred(), False)
|
||||||
|
self.assertEqual(
|
||||||
|
date,
|
||||||
|
datetime.datetime(
|
||||||
|
2013, 12, 11, 0, 0,
|
||||||
|
tzinfo=tz.gettz(settings.TIME_ZONE)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@mock.patch(
|
@mock.patch(
|
||||||
|
|||||||
Reference in New Issue
Block a user