Compare commits
75 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9318c2c0bc | ||
|
|
e431a658cc | ||
|
|
b97fa9e3b9 | ||
|
|
1b0ddd6df6 | ||
|
|
daf54a334f | ||
|
|
183d432f84 | ||
|
|
e1853583b0 | ||
|
|
f156f05b37 | ||
|
|
b03d4c7646 | ||
|
|
1ef12d2cbc | ||
|
|
0d8688515c | ||
|
|
f51207fc32 | ||
|
|
64ee8eab2f | ||
|
|
2224540b71 | ||
|
|
fcc0cb7293 | ||
|
|
023aeea7ea | ||
|
|
a27daaebe9 | ||
|
|
5160ff9793 | ||
|
|
c2a86704eb | ||
|
|
8fbb31a928 | ||
|
|
ab41a708e9 | ||
|
|
5d5915c5d6 | ||
|
|
562d81e246 | ||
|
|
bddffbce50 | ||
|
|
6992ac6aa9 | ||
|
|
dddd6f5503 | ||
|
|
71fc785753 | ||
|
|
8361d15a70 | ||
|
|
52b3057640 | ||
|
|
5573a84335 | ||
|
|
a1f5ddede8 | ||
|
|
481b6c7cec | ||
|
|
bc4192e7d1 | ||
|
|
440a23a054 | ||
|
|
785577b2e8 | ||
|
|
6c308116d6 | ||
|
|
6834e563a8 | ||
|
|
938499706c | ||
|
|
42c9186e91 | ||
|
|
60ac1ddbb9 | ||
|
|
a4277706f2 | ||
|
|
35b2033949 | ||
|
|
20c1139632 | ||
|
|
d04b54140c | ||
|
|
6b3ec52ed4 | ||
|
|
a4bd2d687e | ||
|
|
db0f7649d1 | ||
|
|
6454df57bf | ||
|
|
b589b7a5dc | ||
|
|
4bf0d834a0 | ||
|
|
68e0c21eb0 | ||
|
|
d5ec762954 | ||
|
|
ae30fef641 | ||
|
|
75390693b9 | ||
|
|
43b473dc53 | ||
|
|
30acfdd3f1 | ||
|
|
2a4fe4dceb | ||
|
|
ef15de18a9 | ||
|
|
2163015d06 | ||
|
|
3b38ac0f9b | ||
|
|
97639508cb | ||
|
|
1987dccf48 | ||
|
|
d92214d412 | ||
|
|
751c2ac54b | ||
|
|
6aca09d485 | ||
|
|
dd83364326 | ||
|
|
977594fece | ||
|
|
cd6e7d9563 | ||
|
|
f4013b1343 | ||
|
|
09e419aeee | ||
|
|
d7160de9f1 | ||
|
|
ded8f865d8 | ||
|
|
1e95d22e1a | ||
|
|
49ff1984f0 | ||
|
|
f7fa110afe |
@@ -1,13 +1,26 @@
|
||||
# Contributing
|
||||
|
||||
If you feel that somethings is not working, please submit an issue. You can also ask questions on the issue tracker by tagging your question with the question tag.
|
||||
There's still lots of things to be done, just have a look at that issue log. If you feel like conctributing to the project, please do! Bug fixes and improvements to the front end (I just can't seem to get some of these CSS things right) are always welcome.
|
||||
|
||||
Pull requests are welcome, however, I will be a little bit more strict about what goes into the code and what does not. If you want to make a big change, please ask me about it first.
|
||||
If you want to implement something big: Please start a discussion about that in the issues! Maybe I've already had something similar in mind and we can make it happen together. However, keep in mind that the general roadmap is to make the existing features stable and get them tested. See the roadmap in the readme.
|
||||
|
||||
* When making additions to the project, consider if the majority of users will benefit from your change. If not, you're probably better of forking the project.
|
||||
* Also consider if your change will get in the way of other users. A good change is a change that enhances the experience of some users who want that change and does not affect users who do not care about the change.
|
||||
|
||||
However:
|
||||
## Python
|
||||
|
||||
* Bug fixes and are always welcome. Docker makes things easier, however, I alone cannot ensure that this runs on all platforms.
|
||||
* Improvements to the styling of the front-end are always welcome. I'm no expert in things UX, and simply copied one of the Bootstrap examples. I think it turned out rather good, but I just can't seem to get some things working properly.
|
||||
Use python 3.6 for development. Paperless supports python 3.6, 3.7 and 3.8.
|
||||
|
||||
## Branches
|
||||
|
||||
master always reflects the latest release.
|
||||
|
||||
dev contains all changes that will be part of the next release. Use this branch to start making your changes.
|
||||
|
||||
feature-X branches is for experimental stuff that will eventually be merged into dev, and then released as part of the next release.
|
||||
|
||||
## Testing:
|
||||
|
||||
I'm trying to get most of paperless tested, so please do the same for your code! I know its a hassle, but it makes sure that your code works now and will allow us to detect regressions easily.
|
||||
|
||||
To test your code, execute `pytest` in the src/ directory. Executing that in the project root is no good. This also generates a html coverage report, which you can use to see if you missed anything important during testing.
|
||||
|
||||
4
Pipfile
@@ -8,6 +8,9 @@ url = "https://www.piwheels.org/simple"
|
||||
verify_ssl = true
|
||||
name = "piwheels"
|
||||
|
||||
[requires]
|
||||
python_version = "3.6"
|
||||
|
||||
[packages]
|
||||
dateparser = "~=0.7.6"
|
||||
django = "~=3.1.3"
|
||||
@@ -35,6 +38,7 @@ scikit-learn="~=0.23.2"
|
||||
whitenoise = "~=5.2.0"
|
||||
watchdog = "*"
|
||||
whoosh="~=2.7.4"
|
||||
inotify-simple = "*"
|
||||
|
||||
[dev-packages]
|
||||
coveralls = "*"
|
||||
|
||||
50
Pipfile.lock
generated
@@ -1,10 +1,12 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "ae2643b9cf0cf5741ae149fb6bc0c480de41329ce48e773eb4b5d760bc5e2244"
|
||||
"sha256": "d6432a18280c092c108e998f00bcd377c0c55ef18f26cb0b8eb64f9618b9f383"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
"requires": {
|
||||
"python_version": "3.6"
|
||||
},
|
||||
"sources": [
|
||||
{
|
||||
"name": "pypi",
|
||||
@@ -129,6 +131,14 @@
|
||||
"index": "pypi",
|
||||
"version": "==0.32.0"
|
||||
},
|
||||
"inotify-simple": {
|
||||
"hashes": [
|
||||
"sha256:8440ffe49c4ae81a8df57c1ae1eb4b6bfa7acb830099bfb3e305b383005cc128",
|
||||
"sha256:854f9ac752cc1fcff6ca34e9d3d875c9a94c9b7d6eb377f63be2d481a566c6ee"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.3.5"
|
||||
},
|
||||
"joblib": {
|
||||
"hashes": [
|
||||
"sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
|
||||
@@ -663,11 +673,11 @@
|
||||
},
|
||||
"faker": {
|
||||
"hashes": [
|
||||
"sha256:3f5d379e4b5ce92a8afe3c2ce59d7c43886370dd3bf9495a936b91888debfc81",
|
||||
"sha256:8c0e8a06acef4b9312902e2ce18becabe62badd3a6632180bd0680c6ee111473"
|
||||
"sha256:5398268e1d751ffdb3ed36b8a790ed98659200599b368eec38a02eed15bce997",
|
||||
"sha256:d4183b8f57316de3be27cd6c3b40e9f9343d27c95c96179f027316c58c2c239e"
|
||||
],
|
||||
"markers": "python_version >= '3.5'",
|
||||
"version": "==4.17.0"
|
||||
"version": "==4.17.1"
|
||||
},
|
||||
"filelock": {
|
||||
"hashes": [
|
||||
@@ -693,6 +703,22 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==1.2.0"
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:030f3b1bdb823ecbe4a9659e14cc861ce5af403fe99863bae173ec5fe00ab132",
|
||||
"sha256:caeee3603f5dcf567864d1be9b839b0bcfdf1383e3e7be33ce2dead8144ff19c"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==2.1.0"
|
||||
},
|
||||
"importlib-resources": {
|
||||
"hashes": [
|
||||
"sha256:7b51f0106c8ec564b1bef3d9c588bc694ce2b92125bbb6278f4f2f5b54ec3592",
|
||||
"sha256:a3d34a8464ce1d5d7c92b0ea4e921e696d86f2aa212e684451cb1482c8d84ed5"
|
||||
],
|
||||
"markers": "python_version < '3.7'",
|
||||
"version": "==3.3.0"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
"sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
|
||||
@@ -999,11 +1025,19 @@
|
||||
},
|
||||
"virtualenv": {
|
||||
"hashes": [
|
||||
"sha256:b0011228208944ce71052987437d3843e05690b2f23d1c7da4263fde104c97a2",
|
||||
"sha256:b8d6110f493af256a40d65e29846c69340a947669eec8ce784fcf3dd3af28380"
|
||||
"sha256:07cff122e9d343140366055f31be4dcd61fd598c69d11cd33a9d9c8df4546dd7",
|
||||
"sha256:e0aac7525e880a429764cefd3aaaff54afb5d9f25c82627563603f5d7de5a6e5"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==20.1.0"
|
||||
"version": "==20.2.1"
|
||||
},
|
||||
"zipp": {
|
||||
"hashes": [
|
||||
"sha256:102c24ef8f171fd729d46599845e95c7ab894a4cf45f5de11a44cc7444fb1108",
|
||||
"sha256:ed5eee1974372595f9e416cc7bbeeb12335201d8081ca8a0743c954d4446e5cb"
|
||||
],
|
||||
"markers": "python_version < '3.8'",
|
||||
"version": "==3.4.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
40
README.md
@@ -21,6 +21,10 @@ Paperless does not control your scanner, it only helps you deal with what your s
|
||||
4. Use the web frontend to sift through the database and find what you want.
|
||||
5. Download the PDF you need/want via the web interface and do whatever you like with it. You can even print it and send it as if it's the original. In most cases, no one will care or notice.
|
||||
|
||||
Here's what you get:
|
||||
|
||||

|
||||
|
||||
# Why Paperless-ng?
|
||||
|
||||
I wanted to make big changes to the project that will impact the way it is used by its users greatly. Among the users who currently use paperless in production there are probably many that don't want these changes right away. I also wanted to have more control over what goes into the code and what does not. Therefore, paperless-ng was created. NG stands for both Angular (the framework used for the Frontend) and next-gen. Publishing this project under a different name also avoids confusion between paperless and paperless-ng.
|
||||
@@ -31,43 +35,61 @@ The gist of the changes is the following:
|
||||
* New full text search.
|
||||
* New email processing.
|
||||
* Machine learning powered document matching.
|
||||
* Code cleanup in many, MANY areas.
|
||||
* A task processor that processes documents in parallel and also tells you when something goes wrong.
|
||||
* Code cleanup in many, MANY areas. Some of the code was just overly complicated.
|
||||
* More tests, more stability.
|
||||
|
||||
If you want to see some screenshots of paperless-ng in action, [some are available in the documentation](https://paperless-ng.readthedocs.io/en/latest/screenshots.html).
|
||||
|
||||
For a complete list of changes, check out the [changelog](https://paperless-ng.readthedocs.io/en/latest/changelog.html)
|
||||
|
||||
## Planned
|
||||
# Roadmap for 1.0
|
||||
|
||||
These features will make it into the application at some point, sorted by priority.
|
||||
- Make the front end nice (except mobile).
|
||||
- Test coverage at 90%.
|
||||
- Store archived documents with an embedded OCR text layer, while keeping originals available. Making good progress in the `feature-ocrmypdf` branch.
|
||||
- Fix whatever bugs I and you find.
|
||||
|
||||
## Roadmap for versions beyond 1.0
|
||||
|
||||
- **More search.** The search backend is incredibly versatile and customizable. Searching is the most important feature of this project and thus, I want to implement things like:
|
||||
- Group and limit search results by correspondent, show “more from this” links in the results.
|
||||
- Ability to search for “Similar documents” in the search results
|
||||
- Provide corrections for mispelled queries
|
||||
- **More robust consumer** that shows its progress on the web page.
|
||||
- **An interactive consumer** that shows its progress for documents it processes on the web page.
|
||||
- With live updates ans websockets. This already works on a dev branch, but requires a lot of new dependencies, which I'm not particular happy about.
|
||||
- Notifications when a document was added with buttons to open the new document right away.
|
||||
- **Arbitrary tag colors**. Allow the selection of any color with a color picker.
|
||||
|
||||
## On the chopping block.
|
||||
|
||||
- **GnuPG encrypion.** Since its disabled by default and the website allows transparent access to encrypted documents anyway, this doesn’t really provide any benefit over having the application stored on an encrypted file system.
|
||||
- **GnuPG encrypion.** [Here's a note about encryption in paperless](https://paperless-ng.readthedocs.io/en/latest/administration.html#managing-encryption). The gist of it is that I don't see which attacks this implementation protects against. It gives a false sense of security to users who don't care about how it works.
|
||||
|
||||
# Getting started
|
||||
|
||||
The recommended way to deploy paperless is docker-compose. Grab the latest release to get started. the dockerfiles archive contains just the docker files which will pull the image from docker hub. The source archive contains everything you need to build the docker image yourself.
|
||||
The recommended way to deploy paperless is docker-compose. Don't clone the repository, grab the latest release to get started instead. The dockerfiles archive contains just the docker files which will pull the image from docker hub. The source archive contains everything you need to build the docker image yourself (i.e. if you want to run on Raspberry Pi).
|
||||
|
||||
Read the [documentation](https://paperless-ng.readthedocs.io/en/latest/setup.html#installation) on how to get started.
|
||||
|
||||
Alternatively, you can install the dependencies and setup apache and a database server yourself. Details for that will be available in the documentation at some point.
|
||||
Alternatively, you can install the dependencies and setup apache and a database server yourself. The documenation has information about the individual components of paperless that you need to take care of.
|
||||
|
||||
# Migrating to paperless-ng
|
||||
|
||||
Read the section about [migration](https://paperless-ng.readthedocs.io/en/latest/setup.html#migration-to-paperless-ng) in the documentation.
|
||||
Read the section about [migration](https://paperless-ng.readthedocs.io/en/latest/setup.html#migration-to-paperless-ng) in the documentation. Its also entirely possible to go back to paperless by reverting the database migrations.
|
||||
|
||||
# Documentation
|
||||
|
||||
The documentation for Paperless-ng is available on [ReadTheDocs](https://paperless-ng.readthedocs.io/).
|
||||
|
||||
# Suggestions? Questions? Something not working?
|
||||
|
||||
Please open an issue and start a discussion about it!
|
||||
|
||||
## Feel like helping out?
|
||||
|
||||
There's still lots of things to be done, just have a look at that issue log. If you feel like conctributing to the project, please do! Bug fixes and improvements to the front end (I just can't seem to get some of these CSS things right) are always welcome.
|
||||
|
||||
If you want to implement something big: Please start a discussion about that in the issues! Maybe I've already had something similar in mind and we can make it happen together. However, keep in mind that the general roadmap is to make the existing features stable and get them tested. See the roadmap above.
|
||||
|
||||
# Affiliated Projects
|
||||
|
||||
Paperless has been around a while now, and people are starting to build stuff on top of it. If you're one of those people, we can add your project to this list:
|
||||
|
||||
@@ -15,7 +15,7 @@ services:
|
||||
POSTGRES_PASSWORD: paperless
|
||||
|
||||
webserver:
|
||||
image: jonaswinkler/paperless-ng:0.9.2
|
||||
image: jonaswinkler/paperless-ng:0.9.4
|
||||
restart: always
|
||||
depends_on:
|
||||
- db
|
||||
|
||||
@@ -5,7 +5,7 @@ services:
|
||||
restart: always
|
||||
|
||||
webserver:
|
||||
image: jonaswinkler/paperless-ng:0.9.2
|
||||
image: jonaswinkler/paperless-ng:0.9.4
|
||||
restart: always
|
||||
depends_on:
|
||||
- broker
|
||||
|
||||
BIN
docs/_static/paperless-0-dashboard.png
vendored
|
Before Width: | Height: | Size: 52 KiB |
BIN
docs/_static/paperless-1-list-table.png
vendored
|
Before Width: | Height: | Size: 62 KiB |
BIN
docs/_static/paperless-10-mobile.png
vendored
|
Before Width: | Height: | Size: 56 KiB |
BIN
docs/_static/paperless-11-mail-filters.png
vendored
|
Before Width: | Height: | Size: 70 KiB |
BIN
docs/_static/paperless-2-list-smallcards.png
vendored
|
Before Width: | Height: | Size: 256 KiB |
BIN
docs/_static/paperless-3-list-largecards.png
vendored
|
Before Width: | Height: | Size: 224 KiB |
BIN
docs/_static/paperless-4-filter.png
vendored
|
Before Width: | Height: | Size: 101 KiB |
BIN
docs/_static/paperless-5-editing.png
vendored
|
Before Width: | Height: | Size: 196 KiB |
BIN
docs/_static/paperless-6-tags.png
vendored
|
Before Width: | Height: | Size: 50 KiB |
BIN
docs/_static/paperless-7-autocomplete.png
vendored
|
Before Width: | Height: | Size: 53 KiB |
BIN
docs/_static/paperless-8-search-results.png
vendored
|
Before Width: | Height: | Size: 214 KiB |
BIN
docs/_static/paperless-9-admin.png
vendored
|
Before Width: | Height: | Size: 50 KiB |
BIN
docs/_static/screenshots/correspondents.png
vendored
Normal file
|
After Width: | Height: | Size: 106 KiB |
BIN
docs/_static/screenshots/dashboard.png
vendored
Normal file
|
After Width: | Height: | Size: 167 KiB |
BIN
docs/_static/screenshots/documents-filter.png
vendored
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
docs/_static/screenshots/documents-largecards.png
vendored
Normal file
|
After Width: | Height: | Size: 306 KiB |
BIN
docs/_static/screenshots/documents-smallcards.png
vendored
Normal file
|
After Width: | Height: | Size: 410 KiB |
BIN
docs/_static/screenshots/documents-table.png
vendored
Normal file
|
After Width: | Height: | Size: 137 KiB |
BIN
docs/_static/screenshots/editing.png
vendored
Normal file
|
After Width: | Height: | Size: 293 KiB |
BIN
docs/_static/screenshots/logs.png
vendored
Normal file
|
After Width: | Height: | Size: 260 KiB |
BIN
docs/_static/screenshots/mail-rules-edited.png
vendored
Normal file
|
After Width: | Height: | Size: 96 KiB |
BIN
docs/_static/screenshots/mobile.png
vendored
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
docs/_static/screenshots/new-tag.png
vendored
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
docs/_static/screenshots/search-preview.png
vendored
Normal file
|
After Width: | Height: | Size: 61 KiB |
BIN
docs/_static/screenshots/search-results.png
vendored
Normal file
|
After Width: | Height: | Size: 261 KiB |
@@ -30,7 +30,7 @@ Options available to docker installations:
|
||||
Paperless uses 3 volumes:
|
||||
|
||||
* ``paperless_media``: This is where your documents are stored.
|
||||
* ``paperless_data``: This is where auxilliary data is stored. This
|
||||
* ``paperless_data``: This is where auxillary data is stored. This
|
||||
folder also contains the SQLite database, if you use it.
|
||||
* ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
|
||||
the database.
|
||||
@@ -69,7 +69,7 @@ First of all, ensure that paperless is stopped.
|
||||
|
||||
After that, :ref:`make a backup <administration-backup>`.
|
||||
|
||||
A. If you used the docker-compose file, simply download the files of the new release,
|
||||
A. If you used the dockerfiles archive, simply download the files of the new release,
|
||||
adjust the settings in the files (i.e., the path to your consumption directory),
|
||||
and replace your existing docker-compose files. Then start paperless as usual,
|
||||
which will pull the new image, and update your database, if necessary:
|
||||
@@ -109,7 +109,7 @@ B. If you built the image yourself, grab the new archive and replace your curre
|
||||
.. hint::
|
||||
|
||||
You can usually keep your ``docker-compose.env`` file, since this file will
|
||||
never include mandantory configuration options. However, it is worth checking
|
||||
never include mandatory configuration options. However, it is worth checking
|
||||
out the new version of this file, since it might have new recommendations
|
||||
on what to configure.
|
||||
|
||||
@@ -126,8 +126,8 @@ After grabbing the new release and unpacking the contents, do the following:
|
||||
|
||||
$ pip install --upgrade pipenv
|
||||
$ cd /path/to/paperless
|
||||
$ pipenv install
|
||||
$ pipenv clean
|
||||
$ pipenv install
|
||||
|
||||
This creates a new virtual environment (or uses your existing environment)
|
||||
and installs all dependencies into it.
|
||||
@@ -247,12 +247,12 @@ your already processed documents.
|
||||
|
||||
When multiple document types or correspondents match a single document,
|
||||
the retagger won't assign these to the document. Specify ``--use-first``
|
||||
to override this behaviour and just use the first correspondent or type
|
||||
to override this behavior and just use the first correspondent or type
|
||||
it finds. This option does not apply to tags, since any amount of tags
|
||||
can be applied to a document.
|
||||
|
||||
Finally, ``-f`` specifies that you wish to overwrite already assigned
|
||||
correspondents, types and/or tags. The default behaviour is to not
|
||||
correspondents, types and/or tags. The default behavior is to not
|
||||
assign correspondents and types to documents that have this data already
|
||||
assigned. ``-f`` works differently for tags: By default, only additional tags get
|
||||
added to documents, no tags will be removed. With ``-f``, tags that don't
|
||||
@@ -274,6 +274,7 @@ management command:
|
||||
|
||||
This command takes no arguments.
|
||||
|
||||
.. _`administration-index`:
|
||||
|
||||
Managing the document search index
|
||||
==================================
|
||||
@@ -341,7 +342,7 @@ Documents can be stored in Paperless using GnuPG encryption.
|
||||
|
||||
.. danger::
|
||||
|
||||
Encryption is depreceated since paperless-ng 0.9 and doesn't really provide any
|
||||
Encryption is deprecated since paperless-ng 0.9 and doesn't really provide any
|
||||
additional security, since you have to store the passphrase in a configuration
|
||||
file on the same system as the encrypted documents for paperless to work.
|
||||
Furthermore, the entire text content of the documents is stored plain in the
|
||||
@@ -353,39 +354,23 @@ Documents can be stored in Paperless using GnuPG encryption.
|
||||
Consider running paperless on an encrypted filesystem instead, which will then
|
||||
at least provide security against physical hardware theft.
|
||||
|
||||
.. code::
|
||||
|
||||
change_storage_type [--passphrase PASSPHRASE] {gpg,unencrypted} {gpg,unencrypted}
|
||||
|
||||
positional arguments:
|
||||
{gpg,unencrypted} The state you want to change your documents from
|
||||
{gpg,unencrypted} The state you want to change your documents to
|
||||
|
||||
optional arguments:
|
||||
--passphrase PASSPHRASE
|
||||
|
||||
Enabling encryption
|
||||
-------------------
|
||||
|
||||
Basic usage to enable encryption of your document store (**USE A MORE SECURE PASSPHRASE**):
|
||||
|
||||
(Note: If ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
||||
|
||||
.. code::
|
||||
|
||||
change_storage_type [--passphrase SECR3TP4SSPHRA$E] unencrypted gpg
|
||||
Enabling encryption is no longer supported.
|
||||
|
||||
|
||||
Disabling encryption
|
||||
--------------------
|
||||
|
||||
Basic usage to enable encryption of your document store:
|
||||
Basic usage to disable encryption of your document store:
|
||||
|
||||
(Note: Again, if ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
||||
(Note: If ``PAPERLESS_PASSPHRASE`` isn't set already, you need to specify it here)
|
||||
|
||||
.. code::
|
||||
|
||||
change_storage_type [--passphrase SECR3TP4SSPHRA$E] gpg unencrypted
|
||||
decrypt_documents [--passphrase SECR3TP4SSPHRA$E]
|
||||
|
||||
|
||||
.. _Pipenv: https://pipenv.pypa.io/en/latest/
|
||||
@@ -84,6 +84,8 @@ to the filename.
|
||||
PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
|
||||
|
||||
|
||||
.. _advanced-matching:
|
||||
|
||||
Matching tags, correspondents and document types
|
||||
################################################
|
||||
|
||||
@@ -145,7 +147,9 @@ America are tagged with the tag "bofa_123" and the matching algorithm of this
|
||||
tag is set to *Auto*, this neural network will examine your documents and
|
||||
automatically learn when to assign this tag.
|
||||
|
||||
There are a couple caveats you need to keep in mind when using this feature:
|
||||
Paperless tries to hide much of the involved complexity with this approach.
|
||||
However, there are a couple caveats you need to keep in mind when using this
|
||||
feature:
|
||||
|
||||
* Changes to your documents are not immediately reflected by the matching
|
||||
algorithm. The neural network needs to be *trained* on your documents after
|
||||
@@ -165,6 +169,11 @@ There are a couple caveats you need to keep in mind when using this feature:
|
||||
has the correspondent "Very obscure web shop I bought something five years
|
||||
ago", it will probably not assign this correspondent automatically if you buy
|
||||
something from them again. The more documents, the better.
|
||||
* Paperless also needs a reasonable amount of negative examples to decide when
|
||||
not to assign a certain tag, correspondent or type. This will usually be the
|
||||
case as you start filling up paperless with documents. Example: If all your
|
||||
documents are either from "Webshop" and "Bank", paperless will assign one of
|
||||
these correspondents to ANY new document, if both are set to automatic matching.
|
||||
|
||||
Hooking into the consumption process
|
||||
####################################
|
||||
@@ -253,7 +262,7 @@ By default, paperless stores your documents in the media directory and renames t
|
||||
using the identifier which it has assigned to each document. You will end up getting
|
||||
files like ``0000123.pdf`` in your media directory. This isn't necessarily a bad
|
||||
thing, because you normally don't have to access these files manually. However, if
|
||||
you wish to name your files differently, you can do that by adjustng the
|
||||
you wish to name your files differently, you can do that by adjusting the
|
||||
``PAPERLESS_FILENAME_FORMAT`` settings variable.
|
||||
|
||||
This variable allows you to configure the filename (folders are allowed!) using
|
||||
@@ -278,7 +287,7 @@ will create a directory structure as follows:
|
||||
my_new_shoes-0000004.pdf
|
||||
|
||||
Paperless appends the unique identifier of each document to the filename. This
|
||||
avoides filename clashes.
|
||||
avoids filename clashes.
|
||||
|
||||
.. danger::
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ Result object:
|
||||
}
|
||||
|
||||
* ``id``: the primary key of the found document
|
||||
* ``highlights``: an object containing parseable highlights for the result.
|
||||
* ``highlights``: an object containing parsable highlights for the result.
|
||||
See below.
|
||||
* ``score``: The score assigned to the document. A higher score indicates a
|
||||
better match with the query. Search results are sorted descending by score.
|
||||
|
||||
@@ -5,6 +5,52 @@
|
||||
Changelog
|
||||
*********
|
||||
|
||||
paperless-ng 0.9.4
|
||||
##################
|
||||
|
||||
* Searching:
|
||||
|
||||
* Paperless now supports searching by tags, types and dates and correspondents. In order to have this applied to your
|
||||
existing documents, you need to perform a ``document_index reindex`` management command
|
||||
(see :ref:`administration-index`)
|
||||
that adds the data to the search index. You only need to do this once, since the schema of the search index changed.
|
||||
Paperless keeps the index updated after that whenever something changes.
|
||||
* Paperless now has spelling corrections ("Did you mean") for miss-typed queries.
|
||||
* The documentation contains :ref:`information about the query syntax <basic-searching>`.
|
||||
|
||||
* Front end:
|
||||
|
||||
* Clickable tags, correspondents and types allow quick filtering for related documents.
|
||||
* Saved views are now editable.
|
||||
* Preview documents directly in the browser.
|
||||
* Navigation from the dashboard to saved views.
|
||||
|
||||
* Fixes:
|
||||
|
||||
* A severe error when trying to use post consume scripts.
|
||||
* An error in the consumer that cause invalid messages of missing files to show up in the log.
|
||||
|
||||
* The documentation now contains information about bare metal installs and a section about
|
||||
how to setup the development environment.
|
||||
|
||||
paperless-ng 0.9.3
|
||||
##################
|
||||
|
||||
* Setting ``PAPERLESS_AUTO_LOGIN_USERNAME`` replaces ``PAPERLESS_DISABLE_LOGIN``.
|
||||
You have to specify your username.
|
||||
* Added a simple sanity checker that checks your documents for missing or orphaned files,
|
||||
files with wrong checksums, inaccessible files, and documents with empty content.
|
||||
* It is no longer possible to encrypt your documents. For the time being, paperless will
|
||||
continue to operate with already encrypted documents.
|
||||
* Fixes:
|
||||
|
||||
* Paperless now uses inotify again, since the watchdog was causing issues which I was not
|
||||
aware of.
|
||||
* Issue with the automatic classifier not working with only one tag.
|
||||
* A couple issues with the search index being opened to eagerly.
|
||||
|
||||
* Added lots of tests for various parts of the application.
|
||||
|
||||
paperless-ng 0.9.2
|
||||
##################
|
||||
|
||||
@@ -52,7 +98,7 @@ paperless-ng 0.9.0
|
||||
* **Added:** New frontend. Features:
|
||||
|
||||
* Single page application: It's much more responsive than the django admin pages.
|
||||
* Dashboard. Shows recently scanned documents, or todos, or other documents
|
||||
* Dashboard. Shows recently scanned documents, or todo notes, or other documents
|
||||
at wish. Allows uploading of documents. Shows basic statistics.
|
||||
* Better document list with multiple display options.
|
||||
* Full text search with result highlighting, auto completion and scoring based
|
||||
@@ -102,7 +148,7 @@ paperless-ng 0.9.0
|
||||
|
||||
* **Modified [breaking]:** PostgreSQL:
|
||||
|
||||
* If ``PAPERLESS_DBHOST`` is specified in the settings, paperless uses postgresql instead of sqlite.
|
||||
* If ``PAPERLESS_DBHOST`` is specified in the settings, paperless uses PostgreSQL instead of SQLite.
|
||||
Username, database and password all default to ``paperless`` if not specified.
|
||||
|
||||
* **Modified [breaking]:** document_retagger management command rework. See
|
||||
@@ -130,7 +176,7 @@ paperless-ng 0.9.0
|
||||
Certain language specifics such as umlauts may not get picked up properly.
|
||||
* ``PAPERLESS_DEBUG`` defaults to ``false``.
|
||||
* The presence of ``PAPERLESS_DBHOST`` now determines whether to use PostgreSQL or
|
||||
sqlite.
|
||||
SQLite.
|
||||
* ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
|
||||
``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
|
||||
* ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
|
||||
@@ -138,8 +184,11 @@ paperless-ng 0.9.0
|
||||
|
||||
* Many more small changes here and there. The usual stuff.
|
||||
|
||||
Paperless
|
||||
#########
|
||||
|
||||
2.7.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* `syntonym`_ submitted a pull request to catch IMAP connection errors `#475`_.
|
||||
* `Stéphane Brunner`_ added ``psycopg2`` to the Pipfile `#489`_. He also fixed
|
||||
@@ -156,7 +205,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.6.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* We now have a logo, complete with a favicon :-)
|
||||
* Removed some problematic tests.
|
||||
@@ -168,7 +217,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.6.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Allow an infinite number of logs to be deleted. Thanks to `Ulli`_ for noting
|
||||
the problem in `#433`_.
|
||||
@@ -189,7 +238,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.5.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* **New dependency**: Paperless now optimises thumbnail generation with
|
||||
`optipng`_, so you'll need to install that somewhere in your PATH or declare
|
||||
@@ -233,7 +282,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.4.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* A new set of actions are now available thanks to `jonaswinkler`_'s very first
|
||||
pull request! You can now do nifty things like tag documents in bulk, or set
|
||||
@@ -254,7 +303,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.3.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Support for consuming plain text & markdown documents was added by
|
||||
`Joshua Taillon`_! This was a long-requested feature, and it's addition is
|
||||
@@ -272,14 +321,14 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.2.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* `Kyle Lucy`_ reported a bug quickly after the release of 2.2.0 where we broke
|
||||
the ``DISABLE_LOGIN`` feature: `#392`_.
|
||||
|
||||
|
||||
2.2.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Thanks to `dadosch`_, `Wolfgang Mader`_, and `Tim Brooks`_ this is the first
|
||||
version of Paperless that supports Django 2.0! As a result of their hard
|
||||
@@ -296,7 +345,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.1.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* `Enno Lohmeier`_ added three simple features that make Paperless a lot more
|
||||
user (and developer) friendly:
|
||||
@@ -315,7 +364,7 @@ paperless-ng 0.9.0
|
||||
|
||||
|
||||
2.0.0
|
||||
#####
|
||||
=====
|
||||
|
||||
This is a big release as we've changed a core-functionality of Paperless: we no
|
||||
longer encrypt files with GPG by default.
|
||||
@@ -347,7 +396,7 @@ Special thanks to `erikarvstedt`_, `matthewmoto`_, and `mcronce`_ who did the
|
||||
bulk of the work on this big change.
|
||||
|
||||
1.4.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* `Quentin Dawans`_ has refactored the document consumer to allow for some
|
||||
command-line options. Notably, you can now direct it to consume from a
|
||||
@@ -382,7 +431,7 @@ bulk of the work on this big change.
|
||||
to some excellent work from `erikarvstedt`_ on `#351`_
|
||||
|
||||
1.3.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* You can now run Paperless without a login, though you'll still have to create
|
||||
at least one user. This is thanks to a pull-request from `matthewmoto`_:
|
||||
@@ -405,7 +454,7 @@ bulk of the work on this big change.
|
||||
problem and helping me find where to fix it.
|
||||
|
||||
1.2.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* New Docker image, now based on Alpine, thanks to the efforts of `addadi`_
|
||||
and `Pit`_. This new image is dramatically smaller than the Debian-based
|
||||
@@ -424,7 +473,7 @@ bulk of the work on this big change.
|
||||
in the document text.
|
||||
|
||||
1.1.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Fix for `#283`_, a redirect bug which broke interactions with
|
||||
paperless-desktop. Thanks to `chris-aeviator`_ for reporting it.
|
||||
@@ -434,7 +483,7 @@ bulk of the work on this big change.
|
||||
`Dan Panzarella`_
|
||||
|
||||
1.0.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Upgrade to Django 1.11. **You'll need to run
|
||||
``pip install -r requirements.txt`` after the usual ``git pull`` to
|
||||
@@ -453,14 +502,14 @@ bulk of the work on this big change.
|
||||
`Lukas Winkler`_'s issue `#278`_
|
||||
|
||||
0.8.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Paperless can now run in a subdirectory on a host (``/paperless``), rather
|
||||
than always running in the root (``/``) thanks to `maphy-psd`_'s work on
|
||||
`#255`_.
|
||||
|
||||
0.7.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* **Potentially breaking change**: As per `#235`_, Paperless will no longer
|
||||
automatically delete documents attached to correspondents when those
|
||||
@@ -472,7 +521,7 @@ bulk of the work on this big change.
|
||||
`Kusti Skytén`_ for posting the correct solution in the Github issue.
|
||||
|
||||
0.6.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Abandon the shared-secret trick we were using for the POST API in favour
|
||||
of BasicAuth or Django session.
|
||||
@@ -486,7 +535,7 @@ bulk of the work on this big change.
|
||||
the help with this feature.
|
||||
|
||||
0.5.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Support for fuzzy matching in the auto-tagger & auto-correspondent systems
|
||||
thanks to `Jake Gysland`_'s patch `#220`_.
|
||||
@@ -504,13 +553,13 @@ bulk of the work on this big change.
|
||||
* Amended the Django Admin configuration to have nice headers (`#230`_)
|
||||
|
||||
0.4.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
|
||||
all-caps suffixes like ``.PDF``
|
||||
|
||||
0.4.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Introducing reminders. See `#199`_ for more information, but the short
|
||||
explanation is that you can now attach simple notes & times to documents
|
||||
@@ -520,7 +569,7 @@ bulk of the work on this big change.
|
||||
like to make use of this feature in his project.
|
||||
|
||||
0.3.6
|
||||
#####
|
||||
=====
|
||||
|
||||
* Fix for `#200`_ (!!) where the API wasn't configured to allow updating the
|
||||
correspondent or the tags for a document.
|
||||
@@ -534,7 +583,7 @@ bulk of the work on this big change.
|
||||
documentation is on its way.
|
||||
|
||||
0.3.5
|
||||
#####
|
||||
=====
|
||||
|
||||
* A serious facelift for the documents listing page wherein we drop the
|
||||
tabular layout in favour of a tiled interface.
|
||||
@@ -545,7 +594,7 @@ bulk of the work on this big change.
|
||||
consumption.
|
||||
|
||||
0.3.4
|
||||
#####
|
||||
=====
|
||||
|
||||
* Removal of django-suit due to a licensing conflict I bumped into in 0.3.3.
|
||||
Note that you *can* use Django Suit with Paperless, but only in a
|
||||
@@ -558,26 +607,26 @@ bulk of the work on this big change.
|
||||
API thanks to @thomasbrueggemann. See `#179`_.
|
||||
|
||||
0.3.3
|
||||
#####
|
||||
=====
|
||||
|
||||
* Thumbnails in the UI and a Django-suit -based face-lift courtesy of @ekw!
|
||||
* Timezone, items per page, and default language are now all configurable,
|
||||
also thanks to @ekw.
|
||||
|
||||
0.3.2
|
||||
#####
|
||||
=====
|
||||
|
||||
* Fix for `#172`_: defaulting ALLOWED_HOSTS to ``["*"]`` and allowing the
|
||||
user to set her own value via ``PAPERLESS_ALLOWED_HOSTS`` should the need
|
||||
arise.
|
||||
|
||||
0.3.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added a default value for ``CONVERT_BINARY``
|
||||
|
||||
0.3.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Updated to using django-filter 1.x
|
||||
* Added some system checks so new users aren't confused by misconfigurations.
|
||||
@@ -590,7 +639,7 @@ bulk of the work on this big change.
|
||||
``PAPERLESS_SHARED_SECRET`` respectively instead.
|
||||
|
||||
0.2.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* `#150`_: The media root is now a variable you can set in
|
||||
``paperless.conf``.
|
||||
@@ -618,7 +667,7 @@ bulk of the work on this big change.
|
||||
to `Martin Honermeyer`_ and `Tim White`_ for working with me on this.
|
||||
|
||||
0.1.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* Potentially **Breaking Change**: All references to "sender" in the code
|
||||
have been renamed to "correspondent" to better reflect the nature of the
|
||||
@@ -642,7 +691,7 @@ bulk of the work on this big change.
|
||||
to be imported but made unavailable.
|
||||
|
||||
0.1.0
|
||||
#####
|
||||
=====
|
||||
|
||||
* Docker support! Big thanks to `Wayne Werner`_, `Brian Conn`_, and
|
||||
`Tikitu de Jager`_ for this one, and especially to `Pit`_
|
||||
@@ -661,14 +710,14 @@ bulk of the work on this big change.
|
||||
* Added tox with pep8 checking
|
||||
|
||||
0.0.6
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added support for parallel OCR (significant work from `Pit`_)
|
||||
* Sped up the language detection (significant work from `Pit`_)
|
||||
* Added simple logging
|
||||
|
||||
0.0.5
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added support for image files as documents (png, jpg, gif, tiff)
|
||||
* Added a crude means of HTTP POST for document imports
|
||||
@@ -677,7 +726,7 @@ bulk of the work on this big change.
|
||||
* Documentation for the above as well as data migration
|
||||
|
||||
0.0.4
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added automated tagging basted on keyword matching
|
||||
* Cleaned up the document listing page
|
||||
@@ -685,19 +734,19 @@ bulk of the work on this big change.
|
||||
* Added ``pytz`` to the list of requirements
|
||||
|
||||
0.0.3
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added basic tagging
|
||||
|
||||
0.0.2
|
||||
#####
|
||||
=====
|
||||
|
||||
* Added language detection
|
||||
* Added datestamps to ``document_exporter``.
|
||||
* Changed ``settings.TESSERACT_LANGUAGE`` to ``settings.OCR_LANGUAGE``.
|
||||
|
||||
0.0.1
|
||||
#####
|
||||
=====
|
||||
|
||||
* Initial release
|
||||
|
||||
|
||||
@@ -35,22 +35,22 @@ PAPERLESS_DBHOST=<hostname>
|
||||
|
||||
PAPERLESS_DBPORT=<port>
|
||||
Adjust port if necessary.
|
||||
|
||||
|
||||
Default is 5432.
|
||||
|
||||
PAPERLESS_DBNAME=<name>
|
||||
Database name in PostgreSQL.
|
||||
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
PAPERLESS_DBUSER=<name>
|
||||
Database user in PostgreSQL.
|
||||
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
PAPERLESS_DBPASS=<password>
|
||||
Database password for PostgreSQL.
|
||||
|
||||
|
||||
Defaults to "paperless".
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR=<path>
|
||||
Defaults to "../consume", relative to the "src" directory.
|
||||
|
||||
PAPERLESS_DATA_DIR=<path>
|
||||
This is where paperless stores all its data (search index, sqlite database,
|
||||
This is where paperless stores all its data (search index, SQLite database,
|
||||
classification model, etc).
|
||||
|
||||
Defaults to "../data", relative to the "src" directory.
|
||||
@@ -100,7 +100,7 @@ Hosting & Security
|
||||
##################
|
||||
|
||||
PAPERLESS_SECRET_KEY=<key>
|
||||
Paperless uses this to make session tokens. If you exose paperless on the
|
||||
Paperless uses this to make session tokens. If you expose paperless on the
|
||||
internet, you need to change this, since the default secret is well known.
|
||||
|
||||
Use any sequence of characters. The more, the better. You don't need to
|
||||
@@ -113,7 +113,7 @@ PAPERLESS_ALLOWED_HOSTS<comma-separated-list>
|
||||
really should set this value to the domain name you're using. Failing to do
|
||||
so leaves you open to HTTP host header attacks:
|
||||
https://docs.djangoproject.com/en/3.1/topics/security/#host-header-validation
|
||||
|
||||
|
||||
Just remember that this is a comma-separated list, so "example.com" is fine,
|
||||
as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
|
||||
|
||||
@@ -132,15 +132,25 @@ PAPERLESS_FORCE_SCRIPT_NAME=<path>
|
||||
.. note::
|
||||
|
||||
I don't know if this works in paperless-ng. Probably not.
|
||||
|
||||
|
||||
Defaults to none, which hosts paperless at "/".
|
||||
|
||||
PAPERLESS_STATIC_URL=<path>
|
||||
Override the STATIC_URL here. Unless you're hosting Paperless off a
|
||||
subdomain like /paperless/, you probably don't need to change this.
|
||||
|
||||
|
||||
Defaults to "/static/".
|
||||
|
||||
PAPERLESS_AUTO_LOGIN_USERNAME=<username>
|
||||
Specify a username here so that paperless will automatically perform login
|
||||
with the selected user.
|
||||
|
||||
.. danger::
|
||||
|
||||
Do not use this when exposing paperless on the internet. There are no
|
||||
checks in place that would prevent you from doing this.
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
Software tweaks
|
||||
###############
|
||||
@@ -156,11 +166,11 @@ PAPERLESS_THREADS_PER_WORKER=<num>
|
||||
in parallel on a single document.
|
||||
|
||||
.. caution::
|
||||
|
||||
|
||||
Ensure that the product
|
||||
|
||||
|
||||
PAPERLESS_TASK_WORKERS * PAPERLESS_THREADS_PER_WORKER
|
||||
|
||||
|
||||
does not exceed your CPU core count or else paperless will be extremely slow.
|
||||
If you want paperless to process many documents in parallel, choose a high
|
||||
worker count. If you want paperless to process very large documents faster,
|
||||
@@ -197,10 +207,10 @@ PAPERLESS_OCR_PAGES=<num>
|
||||
PAPERLESS_OCR_LANGUAGE=<lang>
|
||||
Customize the default language that tesseract will attempt to use when
|
||||
parsing documents. The default language is used whenever
|
||||
|
||||
|
||||
* No language could be detected on a document
|
||||
* No tesseract data files are available for the detected language
|
||||
|
||||
|
||||
It should be a 3-letter language code consistent with ISO
|
||||
639: https://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
|
||||
@@ -220,7 +230,7 @@ PAPERLESS_CONSUMER_POLLING=<num>
|
||||
specify a polling interval in seconds here, which will then cause paperless
|
||||
to periodically check your consumption directory for changes.
|
||||
|
||||
Defaults to 0, which disables polling and uses filesystem notifiactions.
|
||||
Defaults to 0, which disables polling and uses filesystem notifications.
|
||||
|
||||
PAPERLESS_CONSUMER_DELETE_DUPLICATES=<bool>
|
||||
When the consumer detects a duplicate document, it will not touch the
|
||||
@@ -234,7 +244,7 @@ PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
|
||||
such cases, try setting this to a reasonably low value, like 32. The
|
||||
default is to use whatever is necessary to do everything without writing to
|
||||
disk, and units are in megabytes.
|
||||
|
||||
|
||||
For more information on how to use this value, you should search
|
||||
the web for "MAGICK_MEMORY_LIMIT".
|
||||
|
||||
@@ -245,7 +255,7 @@ PAPERLESS_CONVERT_TMPDIR=<path>
|
||||
/tmp as tmpfs, you should set this to a path that's on a physical disk, like
|
||||
/home/your_user/tmp or something. ImageMagick will use this as scratch space
|
||||
when crunching through very large documents.
|
||||
|
||||
|
||||
For more information on how to use this value, you should search
|
||||
the web for "MAGICK_TMPDIR".
|
||||
|
||||
@@ -264,7 +274,7 @@ PAPERLESS_CONVERT_DENSITY=<num>
|
||||
Default is 300.
|
||||
|
||||
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
|
||||
Use optipng to optimize thumbnails. This usually reduces the sice of
|
||||
Use optipng to optimize thumbnails. This usually reduces the size of
|
||||
thumbnails by about 20%, but uses considerable compute time during
|
||||
consumption.
|
||||
|
||||
@@ -282,7 +292,7 @@ PAPERLESS_FILENAME_DATE_ORDER=<format>
|
||||
Use this setting to enable checking the document filename for date
|
||||
information. The date order can be set to any option as specified in
|
||||
https://dateparser.readthedocs.io/en/latest/settings.html#date-order.
|
||||
The filename will be checked first, and if nothing is found, the document
|
||||
The filename will be checked first, and if nothing is found, the document
|
||||
text will be checked as normal.
|
||||
|
||||
Defaults to none, which disables this feature.
|
||||
|
||||
@@ -85,7 +85,7 @@ quoted, or triple-quoted string will do:
|
||||
problematic_string = 'This is a "string" with "quotes" in it'
|
||||
|
||||
In HTML templates, please use double-quotes for tag attributes, and single
|
||||
quotes for arguments passed to Django tempalte tags:
|
||||
quotes for arguments passed to Django template tags:
|
||||
|
||||
.. code:: html
|
||||
|
||||
|
||||
@@ -1,5 +1,120 @@
|
||||
.. _extending:
|
||||
|
||||
Paperless development
|
||||
#####################
|
||||
|
||||
This section describes the steps you need to take to start development on paperless-ng.
|
||||
|
||||
1. Check out the source from github. The repository is organized in the following way:
|
||||
|
||||
* ``master`` always represents the latest release and will only see changes
|
||||
when a new release is made.
|
||||
* ``dev`` contains the code that will be in the next release.
|
||||
* ``feature-X`` contain bigger changes that will be in some release, but not
|
||||
necessarily the next one.
|
||||
|
||||
Apart from that, the folder structure is as follows:
|
||||
|
||||
* ``docs/`` - Documentation.
|
||||
* ``src-ui/`` - Code of the front end.
|
||||
* ``src/`` - Code of the back end.
|
||||
* ``scripts/`` - Various scripts that help with different parts of development.
|
||||
* ``docker/`` - Files required to build the docker image.
|
||||
|
||||
2. Install some dependencies.
|
||||
|
||||
* Python 3.6.
|
||||
* All dependencies listed in the :ref:`Bare metal route <setup-bare_metal>`
|
||||
* redis. You can either install redis or use the included scritps/start-redis.sh
|
||||
to use docker to fire up a redis instance.
|
||||
|
||||
Back end development
|
||||
====================
|
||||
|
||||
The backend is a django application. I use PyCharm for development, but you can use whatever
|
||||
you want.
|
||||
|
||||
Install the python dependencies by performing ``pipenv install --dev`` in the src/ directory.
|
||||
This will also create a virtual environment, which you can enter with ``pipenv shell`` or
|
||||
execute one-shot commands in with ``pipenv run``.
|
||||
|
||||
In ``src/paperless.conf``, enable debug mode.
|
||||
|
||||
Configure the IDE to use the src/ folder as the base source folder. Configure the following
|
||||
launch configurations in your IDE:
|
||||
|
||||
* python3 manage.py runserver
|
||||
* python3 manage.py qcluster
|
||||
* python3 manage.py consumer
|
||||
|
||||
Depending on which part of paperless you're developing for, you need to have some or all of
|
||||
them running.
|
||||
|
||||
Testing and code style:
|
||||
|
||||
* Run ``pytest`` in the src/ directory to execute all tests. This also generates a HTML coverage
|
||||
report. When runnings test, paperless.conf is loaded as well. However: the tests rely on the default
|
||||
configuration. This is not ideal. But for now, make sure no settings except for DEBUG are overridden when testing.
|
||||
* Run ``pycodestyle`` to test your code for issues with the configured code style settings.
|
||||
|
||||
.. note::
|
||||
|
||||
The line length rule E501 is generally useful for getting multiple source files
|
||||
next to each other on the screen. However, in some cases, its just not possible
|
||||
to make some lines fit, especially complicated IF cases. Append `` # NOQA: E501``
|
||||
to disable this check for certain lines.
|
||||
|
||||
Front end development
|
||||
=====================
|
||||
|
||||
The front end is build using angular. I use the ``Code - OSS`` IDE for development.
|
||||
|
||||
In order to get started, you need ``npm``. Install the Angular CLI interface with
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ npm install -g @angular/cli
|
||||
|
||||
and make sure that it's on your path. Next, in the src-ui/ directory, install the
|
||||
required dependencies of the project.
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ npm install
|
||||
|
||||
You can launch a development server by running
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ng serve
|
||||
|
||||
This will automatically update whenever you save. However, in-place compilation might fail
|
||||
on syntax errors, in which case you need to restart it.
|
||||
|
||||
By default, the development server is available on ``http://localhost:4200/`` and is configured
|
||||
to access the API at ``http://localhost:8000/api/``, which is the default of the backend.
|
||||
If you enabled DEBUG on the back end, several security overrides for allowed hosts, CORS and
|
||||
X-Frame-Options are in place so that the front end behaves exactly as in production. This also
|
||||
relies on you being logged into the back end. Without a valid session, The front end will simply
|
||||
not work.
|
||||
|
||||
In order to build the front end and serve it as part of django, execute
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ ng build --prod --output-path ../src/documents/static/frontend/
|
||||
|
||||
This will build the front end and put it in a location from which the Django server will serve
|
||||
it as static content. This way, you can verify that authentication is working.
|
||||
|
||||
Making a release
|
||||
================
|
||||
|
||||
Execute the ``make-release.sh <ver>`` script.
|
||||
|
||||
This will test and assemble everything and also build and tag a docker image.
|
||||
|
||||
|
||||
Extending Paperless
|
||||
===================
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ is
|
||||
|
||||
.. caution::
|
||||
|
||||
Dont mess with this folder. Don't change permissions and don't move
|
||||
Do not mess with this folder. Don't change permissions and don't move
|
||||
files around manually. This folder is meant to be entirely managed by docker
|
||||
and paperless.
|
||||
|
||||
@@ -36,9 +36,9 @@ file extensions do not matter.
|
||||
|
||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
|
||||
The long answer is that certain parts of
|
||||
Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi,
|
||||
Paperless will run very slow, such as the tesseract OCR. On Raspberry Pi,
|
||||
try to OCR documents before feeding them into paperless so that paperless can
|
||||
reuse the text. The web interface should be alot snappier, since it runs
|
||||
reuse the text. The web interface should be a lot snappier, since it runs
|
||||
in your browser and paperless has to do much less work to serve the data.
|
||||
|
||||
.. note::
|
||||
|
||||
@@ -8,7 +8,7 @@ Scanner recommendations
|
||||
As Paperless operates by watching a folder for new files, doesn't care what
|
||||
scanner you use, but sometimes finding a scanner that will write to an FTP,
|
||||
NFS, or SMB server can be difficult. This page is here to help you find one
|
||||
that works right for you based on recommentations from other Paperless users.
|
||||
that works right for you based on recommendations from other Paperless users.
|
||||
|
||||
+---------+----------------+-----+-----+-----+----------------+
|
||||
| Brand | Model | Supports | Recommended By |
|
||||
|
||||
@@ -9,41 +9,37 @@ research papers though, its a horrible tool for that job.
|
||||
|
||||
The dashboard shows customizable views on your document and allows document uploads:
|
||||
|
||||
.. image:: _static/paperless-0-dashboard.png
|
||||
.. image:: _static/screenshots/dashboard.png
|
||||
|
||||
The document list provides three different styles to scroll through your documents:
|
||||
|
||||
.. image:: _static/paperless-1-list-table.png
|
||||
.. image:: _static/paperless-2-list-smallcards.png
|
||||
.. image:: _static/paperless-3-list-largecards.png
|
||||
.. image:: _static/screenshots/documents-table.png
|
||||
.. image:: _static/screenshots/documents-smallcards.png
|
||||
.. image:: _static/screenshots/documents-largecards.png
|
||||
|
||||
Extensive filtering mechanisms:
|
||||
|
||||
.. image:: _static/paperless-4-filter.png
|
||||
.. image:: _static/screenshots/documents-filter.png
|
||||
|
||||
Side-by-side editing of documents. Optmized for 1080p.
|
||||
Side-by-side editing of documents. Optimized for 1080p.
|
||||
|
||||
.. image:: _static/paperless-5-editing.png
|
||||
.. image:: _static/screenshots/editing.png
|
||||
|
||||
Tag editing. This looks about the same for correspondents and document types.
|
||||
|
||||
.. image:: _static/paperless-6-tags.png
|
||||
.. image:: _static/screenshots/new-tag.png
|
||||
|
||||
Searching provides auto complete and highlights the results.
|
||||
|
||||
.. image:: _static/paperless-7-autocomplete.png
|
||||
.. image:: _static/paperless-8-search-results.png
|
||||
|
||||
The old admin is still there and accessible!
|
||||
|
||||
.. image:: _static/paperless-9-admin.png
|
||||
.. image:: _static/screenshots/search-preview.png
|
||||
.. image:: _static/screenshots/search-results.png
|
||||
|
||||
Fancy mail filters!
|
||||
|
||||
.. image:: _static/paperless-11-mail-filters.png
|
||||
.. image:: _static/screenshots/mail-rules-edited.png
|
||||
|
||||
Mobile support in the future? This kinda works, however some layouts are still
|
||||
too wide.
|
||||
|
||||
.. image:: _static/paperless-10-mobile.png
|
||||
.. image:: _static/screenshots/mobile.png
|
||||
|
||||
|
||||
218
docs/setup.rst
@@ -85,7 +85,7 @@ Paperless consists of the following components:
|
||||
needs to do from time to time in order to operate properly.
|
||||
|
||||
This allows paperless to process multiple documents from your consumption folder in parallel! On
|
||||
a modern multicore system, consumption with full ocr is blazing fast.
|
||||
a modern multi core system, consumption with full ocr is blazing fast.
|
||||
|
||||
The task processor comes with a built-in admin interface that you can use to see whenever any of the
|
||||
tasks fail and inspect the errors (i.e., wrong email credentials, errors during consuming a specific
|
||||
@@ -102,8 +102,7 @@ Paperless consists of the following components:
|
||||
for getting the tasks from the webserver and consumer to the task scheduler. These run in different
|
||||
processes (maybe even on different machines!), and therefore, this is necessary.
|
||||
|
||||
* A database server. Paperless supports PostgreSQL and sqlite for storing its data. However, with the
|
||||
added concurrency, it is strongly advised to use PostgreSQL, as sqlite has its limits in that regard.
|
||||
* Optional: A database server. Paperless supports both PostgreSQL and SQLite for storing its data.
|
||||
|
||||
|
||||
Installation
|
||||
@@ -146,10 +145,10 @@ Docker Route
|
||||
|
||||
.. hint::
|
||||
|
||||
For new installations, it is recommended to use postgresql as the database
|
||||
backend. This is due to the increased amount of concurrency in paperless-ng.
|
||||
For new installations, it is recommended to use PostgreSQL as the database
|
||||
backend.
|
||||
|
||||
2. Modify ``docker-compose.yml`` to your preferences. You should change the path
|
||||
2. Modify ``docker-compose.yml`` to your preferences. You may want to change the path
|
||||
to the consumption directory in this file. Find the line that specifies where
|
||||
to mount the consumption directory:
|
||||
|
||||
@@ -205,13 +204,152 @@ Docker Route
|
||||
simplifies deployment immensely. If you know your way around Docker, feel
|
||||
free to tinker around without using compose!
|
||||
|
||||
.. _`setup-bare_metal`:
|
||||
|
||||
Bare Metal Route
|
||||
================
|
||||
|
||||
.. warning::
|
||||
Paperless runs on linux only. The following procedure has been tested on a minimal
|
||||
installation of Debian/Buster, which is the current stable release at the time of
|
||||
writing. Windows is not and will never be supported.
|
||||
|
||||
TBD. User docker for now.
|
||||
1. Install dependencies. Paperless requires the following packages.
|
||||
|
||||
* ``python3`` 3.6, 3.7, 3.8 (3.9 is untested).
|
||||
* ``python3-pip``, optionally ``pipenv`` for package installation
|
||||
* ``python3-dev``
|
||||
|
||||
* ``imagemagick`` >= 6 for PDF conversion
|
||||
* ``unpaper`` for cleaning documents before OCR
|
||||
* ``ghostscript``
|
||||
* ``optipng`` for optimising thumbnails
|
||||
* ``tesseract-ocr`` >= 4.0.0 for OCR
|
||||
* ``tesseract-ocr`` language packs (``tesseract-ocr-eng``, ``tesseract-ocr-deu``, etc)
|
||||
* ``gnupg`` for handling encrypted documents
|
||||
* ``libpoppler-cpp-dev`` for PDF to text conversion
|
||||
* ``libmagic-dev`` for mime type detection
|
||||
* ``libpq-dev`` for PostgreSQL
|
||||
|
||||
You will also need ``build-essential``, ``python3-setuptools`` and ``python3-wheel``
|
||||
for installing some of the python dependencies. You can remove that
|
||||
again after installation.
|
||||
|
||||
2. Install ``redis`` >= 5.0 and configure it to start automatically.
|
||||
|
||||
3. Optional. Install ``postgresql`` and configure a database, user and password for paperless. If you do not wish
|
||||
to use PostgreSQL, SQLite is avialable as well.
|
||||
|
||||
4. Get the release archive. If you pull the git repo as it is, you also have to compile the front end by yourself.
|
||||
Extract the frontend to a place from where you wish to execute it, such as ``/opt/paperless``.
|
||||
|
||||
5. Configure paperless. See :ref:`configuration` for details. Edit the included ``paperless.conf`` and adjust the
|
||||
settings to your needs. Required settings for getting paperless running are:
|
||||
|
||||
* ``PAPERLESS_REDIS`` should point to your redis server, such as redis://localhost:6379.
|
||||
* ``PAPERLESS_DBHOST`` should be the hostname on which your PostgreSQL server is running. Do not configure this
|
||||
to use SQLite instead. Also configure port, database name, user and password as necessary.
|
||||
* ``PAPERLESS_CONSUMPTION_DIR`` should point to a folder which paperless should watch for documents. You might
|
||||
want to have this somewhere else. Likewise, ``PAPERLESS_DATA_DIR`` and ``PAPERLESS_MEDIA_ROOT`` define where
|
||||
paperless stores its data. If you like, you can point both to the same directory.
|
||||
* ``PAPERLESS_SECRET_KEY`` should be a random sequence of characters. It's used for authentication. Failure
|
||||
to do so allows third parties to forge authentication credentials.
|
||||
|
||||
Many more adjustments can be made to paperless, especially the OCR part. The following options are recommended
|
||||
for everyone:
|
||||
|
||||
* Set ``PAPERLESS_OCR_LANGUAGE`` to the language most of your documents are written in.
|
||||
* Set ``PAPERLESS_TIME_ZONE`` to your local time zone.
|
||||
|
||||
6. Setup permissions. Create a system users under which you wish to run paperless. Ensure that these directories exist
|
||||
and that the user has write permissions to the following directories
|
||||
|
||||
* ``/opt/paperless/media``
|
||||
* ``/opt/paperless/data``
|
||||
* ``/opt/paperless/consume``
|
||||
|
||||
Adjust as necessary if you configured different folders.
|
||||
|
||||
7. Install python requirements. Paperless comes with both Pipfiles for ``pipenv`` as well as with a ``requirements.txt``.
|
||||
Both will install exactly the same requirements. It is up to you if you wish to use a virtual environment or not.
|
||||
|
||||
8. Go to ``/opt/paperless/src``, and execute the following commands:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
# This collects static files from paperless and django.
|
||||
python3 manage.py collectstatic --clear --no-input
|
||||
|
||||
# This creates the database schema.
|
||||
python3 manage.py migrate
|
||||
|
||||
# This creates your first paperless user
|
||||
python3 manage.py createsuperuser
|
||||
|
||||
9. Optional: Test that paperless is working by executing
|
||||
|
||||
.. code:: bash
|
||||
|
||||
# This collects static files from paperless and django.
|
||||
python3 manage.py runserver
|
||||
|
||||
and pointing your browser to http://localhost:8000/.
|
||||
|
||||
.. warning::
|
||||
|
||||
This is a development server which should not be used in
|
||||
production.
|
||||
|
||||
.. hint::
|
||||
|
||||
This will not start the consumer. Paperless does this in a
|
||||
separate process.
|
||||
|
||||
10. Setup systemd services to run paperless automatically. You may
|
||||
use the service definition files included in the ``scripts`` folder
|
||||
as a starting point.
|
||||
|
||||
Paperless needs the ``webserver`` script to run the webserver, the
|
||||
``consumer`` script to watch the input folder, and the ``scheduler``
|
||||
script to run tasks such as email checking and document consumption.
|
||||
|
||||
These services rely on redis and optionally the database server, but
|
||||
don't need to be started in any particular order. The example files
|
||||
depend on redis being started. If you use a database server, you should
|
||||
add additinal dependencies.
|
||||
|
||||
.. hint::
|
||||
|
||||
You may optionally set up your preferred web server to serve
|
||||
paperless as a wsgi application directly instead of running the
|
||||
``webserver`` service. The module containing the wsgi application
|
||||
is named ``paperless.wsgi``.
|
||||
|
||||
.. caution::
|
||||
|
||||
The included scripts run a ``gunicorn`` standalone server,
|
||||
which is fine for running paperless. It does support SSL,
|
||||
however, the documentation of GUnicorn states that you should
|
||||
use a proxy server in front of gunicorn instead.
|
||||
|
||||
11. Optional: Install a samba server and make the consumption folder
|
||||
available as a network share.
|
||||
|
||||
12. Configure ImageMagick to allow processing of PDF documents. Most distributions have
|
||||
this disabled by default, since PDF documents can contain malware. If
|
||||
you don't do this, paperless will fall back to ghostscript for certain steps
|
||||
such as thumbnail generation.
|
||||
|
||||
Edit ``/etc/ImageMagick-6/policy.xml`` and adjust
|
||||
|
||||
.. code::
|
||||
|
||||
<policy domain="coder" rights="none" pattern="PDF" />
|
||||
|
||||
to
|
||||
|
||||
.. code::
|
||||
|
||||
<policy domain="coder" rights="read|write" pattern="PDF" />
|
||||
|
||||
Migration to paperless-ng
|
||||
#########################
|
||||
@@ -221,10 +359,10 @@ things have changed under the hood, so you need to adapt your setup depending on
|
||||
how you installed paperless. The important things to keep in mind are as follows.
|
||||
|
||||
* Read the :ref:`changelog <paperless_changelog>` and take note of breaking changes.
|
||||
* It is recommended to use postgresql as the database now. If you want to continue
|
||||
using SQLite, which is the default of paperless, use ``docker-compose.sqlite.yml``.
|
||||
See :ref:`setup-sqlite_to_psql` for details on how to move your data from
|
||||
sqlite to postgres.
|
||||
* You should decide if you want to stick with SQLite or want to migrate your database
|
||||
to PostgreSQL. See :ref:`setup-sqlite_to_psql` for details on how to move your data from
|
||||
SQLite to PostgreSQL. Both work fine with paperless. However, if you already have a
|
||||
database server running for other services, you might as well use it for paperless as well.
|
||||
* The task scheduler of paperless, which is used to execute periodic tasks
|
||||
such as email checking and maintenance, requires a `redis`_ message broker
|
||||
instance. The docker-compose route takes care of that.
|
||||
@@ -259,22 +397,24 @@ Migration to paperless-ng is then performed in a few simple steps:
|
||||
will be incompatible with the migrated volumes.
|
||||
|
||||
4. Copy the ``docker-compose.sqlite.yml`` file to ``docker-compose.yml``.
|
||||
If you want to migrate to PostgreSQL, do that after you migrated your existing
|
||||
If you want to switch to PostgreSQL, do that after you migrated your existing
|
||||
SQLite database.
|
||||
|
||||
5. Adjust ``docker-compose.yml`` and
|
||||
``docker-compose.env`` to your needs.
|
||||
See `docker route`_ for details on which edits are advised.
|
||||
|
||||
6. Start paperless-ng.
|
||||
6. In order to find your existing documents with the new search feature, you need
|
||||
to invoke a one-time operation that will create the search index:
|
||||
|
||||
.. code:: bash
|
||||
.. code:: shell-session
|
||||
|
||||
$ docker-compose up
|
||||
$ docker-compose run --rm webserver document_index reindex
|
||||
|
||||
This will migrate your database and create the search index. After that,
|
||||
paperless will take care of maintaining the index by itself.
|
||||
|
||||
If you see everything working (you should see some migrations getting
|
||||
applied, for instance), you can gracefully stop paperless-ng with Ctrl-C
|
||||
and then start paperless-ng as usual with
|
||||
7. Start paperless-ng.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
@@ -282,11 +422,11 @@ Migration to paperless-ng is then performed in a few simple steps:
|
||||
|
||||
This will run paperless in the background and automatically start it on system boot.
|
||||
|
||||
7. Paperless installed a permanent redirect to ``admin/`` in your browser. This
|
||||
8. Paperless installed a permanent redirect to ``admin/`` in your browser. This
|
||||
redirect is still in place and prevents access to the new UI. Clear
|
||||
browsing cache in order to fix this.
|
||||
|
||||
8. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
|
||||
9. Optionally, follow the instructions below to migrate your existing data to PostgreSQL.
|
||||
|
||||
|
||||
.. _setup-sqlite_to_psql:
|
||||
@@ -299,7 +439,7 @@ management commands as below.
|
||||
|
||||
.. caution::
|
||||
|
||||
Make sure that your sqlite database is migrated to the latest version.
|
||||
Make sure that your SQLite database is migrated to the latest version.
|
||||
Starting paperless will make sure that this is the case. If your try to
|
||||
load data from an old database schema in SQLite into a newer database
|
||||
schema in PostgreSQL, you will run into trouble.
|
||||
@@ -323,7 +463,7 @@ management commands as below.
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver /bin/bash
|
||||
|
||||
This will lauch the container and initialize the PostgreSQL database.
|
||||
This will launch the container and initialize the PostgreSQL database.
|
||||
|
||||
b) Without docker, open a shell in your virtual environment, switch to
|
||||
the ``src`` directory and create the database schema:
|
||||
@@ -358,6 +498,35 @@ management commands as below.
|
||||
7. Start paperless.
|
||||
|
||||
|
||||
Moving back to paperless
|
||||
========================
|
||||
|
||||
Lets say you migrated to Paperless-ng and used it for a while, but decided that
|
||||
you don't like it and want to move back (If you do, send me a mail about what
|
||||
part you didn't like!), you can totally do that with a few simple steps.
|
||||
|
||||
Paperless-ng modified the database schema slightly, however, these changes can
|
||||
be reverted while keeping your current data, so that your current data will
|
||||
be compatible with original Paperless.
|
||||
|
||||
Execute this:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless
|
||||
$ docker-compose run --rm webserver migrate documents 0023
|
||||
|
||||
Or without docker:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ cd /path/to/paperless/src
|
||||
$ python3 manage.py migrate documents 0023
|
||||
|
||||
After that, you need to clear your cookies (Paperless-ng comes with updated
|
||||
dependencies that do cookie-processing differently) and probably your cache
|
||||
as well.
|
||||
|
||||
.. _setup-less_powerful_devices:
|
||||
|
||||
|
||||
@@ -367,12 +536,13 @@ Considerations for less powerful devices
|
||||
Paperless runs on Raspberry Pi. However, some things are rather slow on the Pi and
|
||||
configuring some options in paperless can help improve performance immensely:
|
||||
|
||||
* Stick with SQLite to save some resources.
|
||||
* Consider setting ``PAPERLESS_OCR_PAGES`` to 1, so that paperless will only OCR
|
||||
the first page of your documents.
|
||||
* ``PAPERLESS_TASK_WORKERS`` and ``PAPERLESS_THREADS_PER_WORKER`` are configured
|
||||
to use all cores. The Raspberry Pi models 3 and up have 4 cores, meaning that
|
||||
paperless will use 2 workers and 2 threads per worker. This may result in
|
||||
slugish response times during consumption, so you might want to lower these
|
||||
sluggish response times during consumption, so you might want to lower these
|
||||
settings (example: 2 workers and 1 thread to always have some computing power
|
||||
left for other tasks).
|
||||
* Keep ``PAPERLESS_OCR_ALWAYS`` at its default value 'false' and consider OCR'ing
|
||||
|
||||
@@ -5,13 +5,13 @@ Usage Overview
|
||||
Paperless is an application that manages your personal documents. With
|
||||
the help of a document scanner (see :ref:`scanners`), paperless transforms
|
||||
your wieldy physical document binders into a searchable archive and
|
||||
provices many utilities for finding and managing your documents.
|
||||
provides many utilities for finding and managing your documents.
|
||||
|
||||
|
||||
Terms and definitions
|
||||
#####################
|
||||
|
||||
Paperless esentially consists of two different parts for managing your
|
||||
Paperless essentially consists of two different parts for managing your
|
||||
documents:
|
||||
|
||||
* The *consumer* watches a specified folder and adds all documents in that
|
||||
@@ -30,12 +30,12 @@ Each document has a couple of fields that you can assign to them:
|
||||
tag, however, a single document can also have multiple tags. This is not
|
||||
possible with folders. The reason folders are not implemented in paperless
|
||||
is simply that tags are much more versatile than folders.
|
||||
* A *document type* is used to demarkate the type of a document such as letter,
|
||||
* A *document type* is used to demarcate the type of a document such as letter,
|
||||
bank statement, invoice, contract, etc. It is used to identify what a document
|
||||
is about.
|
||||
* The *date added* of a document is the date the document was scanned into
|
||||
paperless. You cannot and should not change this date.
|
||||
* The *date created* of a document is the date the document was intially issued.
|
||||
* The *date created* of a document is the date the document was initially issued.
|
||||
This can be the date you bought a product, the date you signed a contract, or
|
||||
the date a letter was sent to you.
|
||||
* The *archive serial number* (short: ASN) of a document is the identifier of
|
||||
@@ -131,7 +131,7 @@ These are as follows:
|
||||
|
||||
With the correct set of rules, you can completely automate your email documents.
|
||||
Create rules for every correspondent you receive digital documents from and
|
||||
paperless will read them automatically. The default acion "mark as read" is
|
||||
paperless will read them automatically. The default action "mark as read" is
|
||||
pretty tame and will not cause any damage or data loss whatsoever.
|
||||
|
||||
You can also setup a special folder in your mail account for paperless and use
|
||||
@@ -156,6 +156,62 @@ REST API
|
||||
|
||||
You can also submit a document using the REST API, see :ref:`api-file_uploads` for details.
|
||||
|
||||
.. _basic-searching:
|
||||
|
||||
Searching
|
||||
#########
|
||||
|
||||
Paperless offers an extensive searching mechanism that is designed to allow you to quickly
|
||||
find a document you're looking for (for example, that thing that just broke and you bought
|
||||
a couple months ago, that contract you signed 8 years ago).
|
||||
|
||||
When you search paperless for a document, it tries to match this query against your documents.
|
||||
Paperless will look for matching documents by inspecting their content, title, correspondent,
|
||||
type and tags. Paperless returns a scored list of results, so that documents matching your query
|
||||
better will appear further up in the search results.
|
||||
|
||||
By default, paperless returns only documents which contain all words typed in the search bar.
|
||||
However, paperless also offers advanced search syntax if you want to drill down the results
|
||||
further.
|
||||
|
||||
Matching documents with logical expressions:
|
||||
|
||||
.. code:: none
|
||||
|
||||
shopname AND (product1 OR product2)
|
||||
|
||||
Matching specific tags, correspondents or types:
|
||||
|
||||
.. code:: none
|
||||
|
||||
type:invoice tag:unpaid
|
||||
correspondent:university certificate
|
||||
|
||||
Matching dates:
|
||||
|
||||
.. code:: none
|
||||
|
||||
created:[2005 to 2009]
|
||||
added:yesterday
|
||||
modified:today
|
||||
|
||||
Matching inexact words:
|
||||
|
||||
.. code:: none
|
||||
|
||||
produ*name
|
||||
|
||||
.. note::
|
||||
|
||||
Inexact terms are hard for search indexes. These queries might take a while to execute. That's why paperless offers
|
||||
auto complete and query correction.
|
||||
|
||||
All of these constructs can be combined as you see fit.
|
||||
If you want to learn more about the query language used by paperless, paperless uses Whoosh's default query language.
|
||||
Head over to `Whoosh query language <https://whoosh.readthedocs.io/en/latest/querylang.html>`_.
|
||||
For details on what date parsing utilities are available, see
|
||||
`Date parsing <https://whoosh.readthedocs.io/en/latest/dates.html#parsing-date-queries>`_.
|
||||
|
||||
|
||||
.. _usage-recommended_workflow:
|
||||
|
||||
@@ -182,7 +238,7 @@ Processing of the physical documents
|
||||
====================================
|
||||
|
||||
Keep a physical inbox. Whenever you receive a document that you need to
|
||||
archive, put it into your inbox. Regulary, do the following for all documents
|
||||
archive, put it into your inbox. Regularly, do the following for all documents
|
||||
in your inbox:
|
||||
|
||||
1. For each document, decide if you need to keep the document in physical
|
||||
@@ -217,18 +273,24 @@ Once you have scanned in a document, proceed in paperless as follows.
|
||||
|
||||
1. If the document has an ASN, assign the ASN to the document.
|
||||
2. Assign a correspondent to the document (i.e., your employer, bank, etc)
|
||||
This isnt strictly necessary but helps in finding a document when you need
|
||||
This isn't strictly necessary but helps in finding a document when you need
|
||||
it.
|
||||
3. Assign a document type (i.e., invoice, bank statement, etc) to the document
|
||||
This isnt strictly necessary but helps in finding a document when you need
|
||||
This isn't strictly necessary but helps in finding a document when you need
|
||||
it.
|
||||
4. Assign a proper title to the document (the name of an item you bought, the
|
||||
subject of the letter, etc)
|
||||
5. Check that the date of the document is corrent. Paperless tries to read
|
||||
5. Check that the date of the document is correct. Paperless tries to read
|
||||
the date from the content of the document, but this fails sometimes if the
|
||||
OCR is bad or multiple dates appear on the document.
|
||||
6. Remove inbox tags from the documents.
|
||||
|
||||
.. hint::
|
||||
|
||||
You can setup manual matching rules for your correspondents and tags and
|
||||
paperless will assign them automatically. After consuming a couple documents,
|
||||
you can even ask paperless to *learn* when to assign tags and correspondents
|
||||
by itself. For details on this feature, see :ref:`advanced-matching`.
|
||||
|
||||
Task management
|
||||
===============
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
|
||||
#PAPERLESS_FORCE_SCRIPT_NAME=
|
||||
#PAPERLESS_STATIC_URL=/static/
|
||||
#PAPERLESS_AUTO_LOGIN_USERNAME=
|
||||
|
||||
# Software tweaks
|
||||
|
||||
|
||||
@@ -1,5 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Release checklist
|
||||
# - wait for travis build.
|
||||
# adjust src/paperless/version.py
|
||||
# changelog in the documentation
|
||||
# adjust versions in docker/hub/*
|
||||
# If docker-compose was modified: all compose files are the same.
|
||||
|
||||
# Steps:
|
||||
# run release script "dev", push
|
||||
# if it works: new tag, merge into master
|
||||
# on master: make release "lastest", push
|
||||
# on master: make release "version-tag", push
|
||||
# publish release files
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
@@ -28,6 +42,7 @@ fi
|
||||
mkdir "$PAPERLESS_DIST"
|
||||
mkdir "$PAPERLESS_DIST_APP"
|
||||
mkdir "$PAPERLESS_DIST_APP/docker"
|
||||
mkdir "$PAPERLESS_DIST_APP/scripts"
|
||||
mkdir "$PAPERLESS_DIST_DOCKERFILES"
|
||||
|
||||
# setup dependencies.
|
||||
@@ -90,6 +105,11 @@ cp "$PAPERLESS_ROOT/docker/gunicorn.conf.py" "$PAPERLESS_DIST_APP/docker/"
|
||||
cp "$PAPERLESS_ROOT/docker/imagemagick-policy.xml" "$PAPERLESS_DIST_APP/docker/"
|
||||
cp "$PAPERLESS_ROOT/docker/supervisord.conf" "$PAPERLESS_DIST_APP/docker/"
|
||||
|
||||
# auxiliary files for bare metal installs
|
||||
cp "$PAPERLESS_ROOT/scripts/paperless-webserver.service" "$PAPERLESS_DIST_APP/scripts/"
|
||||
cp "$PAPERLESS_ROOT/scripts/paperless-consumer.service" "$PAPERLESS_DIST_APP/scripts/"
|
||||
cp "$PAPERLESS_ROOT/scripts/paperless-scheduler.service" "$PAPERLESS_DIST_APP/scripts/"
|
||||
|
||||
# try to make the docker build.
|
||||
|
||||
cd "$PAPERLESS_DIST_APP"
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
[Unit]
|
||||
Description=Paperless consumer
|
||||
Requires=redis.service
|
||||
|
||||
[Service]
|
||||
User=paperless
|
||||
Group=paperless
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/python /home/paperless/project/src/manage.py document_consumer
|
||||
WorkingDirectory=/opt/paperless/src
|
||||
ExecStart=python3 manage.py document_consumer
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
12
scripts/paperless-scheduler.service
Normal file
@@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Paperless consumer
|
||||
Requires=redis.service
|
||||
|
||||
[Service]
|
||||
User=paperless
|
||||
Group=paperless
|
||||
WorkingDirectory=/opt/paperless/src
|
||||
ExecStart=python3 manage.py qcluster
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -2,11 +2,13 @@
|
||||
Description=Paperless webserver
|
||||
After=network.target
|
||||
Wants=network.target
|
||||
Requires=redis.service
|
||||
|
||||
[Service]
|
||||
User=paperless
|
||||
Group=paperless
|
||||
ExecStart=/home/paperless/project/virtualenv/bin/gunicorn --pythonpath=/home/paperless/project/src paperless.wsgi -w 2
|
||||
WorkingDirectory=/opt/paperless/src
|
||||
ExecStart=/opt/paperless/.local/bin/gunicorn paperless.wsgi -w 2 -b 0.0.0.0:8000
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
@@ -23,7 +23,7 @@ import { TagEditDialogComponent } from './components/manage/tag-list/tag-edit-di
|
||||
import { DocumentTypeEditDialogComponent } from './components/manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
||||
import { TagComponent } from './components/common/tag/tag.component';
|
||||
import { SearchComponent } from './components/search/search.component';
|
||||
import { ResultHightlightComponent } from './components/search/result-hightlight/result-hightlight.component';
|
||||
import { ResultHighlightComponent } from './components/search/result-highlight/result-highlight.component';
|
||||
import { PageHeaderComponent } from './components/common/page-header/page-header.component';
|
||||
import { AppFrameComponent } from './components/app-frame/app-frame.component';
|
||||
import { ToastsComponent } from './components/common/toasts/toasts.component';
|
||||
@@ -65,7 +65,7 @@ import { WidgetFrameComponent } from './components/dashboard/widgets/widget-fram
|
||||
DocumentTypeEditDialogComponent,
|
||||
TagComponent,
|
||||
SearchComponent,
|
||||
ResultHightlightComponent,
|
||||
ResultHighlightComponent,
|
||||
PageHeaderComponent,
|
||||
AppFrameComponent,
|
||||
ToastsComponent,
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
<span *ngIf="!clickable" class="badge" [style.background]="getColour().value" [style.color]="getColour().textColor">{{tag.name}}</span>
|
||||
<a [routerLink]="" *ngIf="clickable" class="badge" [style.background]="getColour().value" [style.color]="getColour().textColor">{{tag.name}}</a>
|
||||
<a [routerLink]="" [title]="linkTitle" *ngIf="clickable" class="badge" [style.background]="getColour().value" [style.color]="getColour().textColor">{{tag.name}}</a>
|
||||
@@ -14,10 +14,10 @@ export class TagComponent implements OnInit {
|
||||
tag: PaperlessTag
|
||||
|
||||
@Input()
|
||||
clickable: boolean = false
|
||||
linkTitle: string = ""
|
||||
|
||||
@Output()
|
||||
click = new EventEmitter()
|
||||
@Input()
|
||||
clickable: boolean = false
|
||||
|
||||
ngOnInit(): void {
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
<app-widget-frame [title]="savedView.title">
|
||||
|
||||
<table class="table table-sm table-hover table-borderless">
|
||||
<a header-buttons [routerLink]="" (click)="showAll()">Show all</a>
|
||||
|
||||
|
||||
<table content class="table table-sm table-hover table-borderless">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Created</th>
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { Component, Input, OnInit } from '@angular/core';
|
||||
import { Router } from '@angular/router';
|
||||
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||
import { SavedViewConfig } from 'src/app/data/saved-view-config';
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service';
|
||||
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
|
||||
@Component({
|
||||
@@ -10,7 +12,10 @@ import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
})
|
||||
export class SavedViewWidgetComponent implements OnInit {
|
||||
|
||||
constructor(private documentService: DocumentService) { }
|
||||
constructor(
|
||||
private documentService: DocumentService,
|
||||
private router: Router,
|
||||
private list: DocumentListViewService) { }
|
||||
|
||||
@Input()
|
||||
savedView: SavedViewConfig
|
||||
@@ -23,4 +28,9 @@ export class SavedViewWidgetComponent implements OnInit {
|
||||
})
|
||||
}
|
||||
|
||||
showAll() {
|
||||
this.list.load(this.savedView)
|
||||
this.router.navigate(["documents"])
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
<app-widget-frame title="Statistics">
|
||||
<p class="card-text">Documents in inbox: {{statistics.documents_inbox}}</p>
|
||||
<p class="card-text">Total documents: {{statistics.documents_total}}</p>
|
||||
<ng-container content>
|
||||
<p class="card-text">Documents in inbox: {{statistics.documents_inbox}}</p>
|
||||
<p class="card-text">Total documents: {{statistics.documents_total}}</p>
|
||||
</ng-container>
|
||||
</app-widget-frame>
|
||||
@@ -1,6 +1,6 @@
|
||||
<app-widget-frame title="Upload new documents">
|
||||
|
||||
<form>
|
||||
<form content>
|
||||
<ngx-file-drop
|
||||
dropZoneLabel="Drop documents here or" (onFileDrop)="dropped($event)"
|
||||
(onFileOver)="fileOver($event)" (onFileLeave)="fileLeave($event)"
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
<div class="card mb-3 shadow">
|
||||
<div class="card-header">
|
||||
<h5 class="card-title mb-0">{{title}}</h5>
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<h5 class="card-title mb-0">{{title}}</h5>
|
||||
<ng-content select ="[header-buttons]"></ng-content>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="card-body text-dark">
|
||||
<ng-content></ng-content>
|
||||
<ng-content select ="[content]"></ng-content>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1,4 +1,3 @@
|
||||
import { DatePipe, formatDate } from '@angular/common';
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { FormControl, FormGroup } from '@angular/forms';
|
||||
import { ActivatedRoute, Router } from '@angular/router';
|
||||
@@ -6,17 +5,14 @@ import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
|
||||
import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
|
||||
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||
import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
|
||||
import { TAG_COLOURS, PaperlessTag } from 'src/app/data/paperless-tag';
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service';
|
||||
import { OpenDocumentsService } from 'src/app/services/open-documents.service';
|
||||
import { CorrespondentService } from 'src/app/services/rest/correspondent.service';
|
||||
import { DocumentTypeService } from 'src/app/services/rest/document-type.service';
|
||||
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
import { TagService } from 'src/app/services/rest/tag.service';
|
||||
import { DeleteDialogComponent } from '../common/delete-dialog/delete-dialog.component';
|
||||
import { CorrespondentEditDialogComponent } from '../manage/correspondent-list/correspondent-edit-dialog/correspondent-edit-dialog.component';
|
||||
import { DocumentTypeEditDialogComponent } from '../manage/document-type-list/document-type-edit-dialog/document-type-edit-dialog.component';
|
||||
import { TagEditDialogComponent } from '../manage/tag-list/tag-edit-dialog/tag-edit-dialog.component';
|
||||
|
||||
@Component({
|
||||
selector: 'app-document-detail',
|
||||
@@ -133,8 +129,8 @@ export class DocumentDetailComponent implements OnInit {
|
||||
|
||||
close() {
|
||||
this.openDocumentService.closeDocument(this.document)
|
||||
if (this.documentListViewService.viewId) {
|
||||
this.router.navigate(['view', this.documentListViewService.viewId])
|
||||
if (this.documentListViewService.savedViewId) {
|
||||
this.router.navigate(['view', this.documentListViewService.savedViewId])
|
||||
} else {
|
||||
this.router.navigate(['documents'])
|
||||
}
|
||||
|
||||
@@ -7,11 +7,18 @@
|
||||
<div class="card-body">
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<h5 class="card-title">{{document.correspondent ? document.correspondent.name + ': ' : ''}}{{document.title}}<app-tag [tag]="t" *ngFor="let t of document.tags" class="ml-1"></app-tag></h5>
|
||||
<h5 class="card-title">
|
||||
<ng-container *ngIf="document.correspondent">
|
||||
<a *ngIf="clickCorrespondent.observers.length ; else nolink" [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>
|
||||
<ng-template #nolink>{{document.correspondent.name}}</ng-template>:
|
||||
</ng-container>
|
||||
{{document.title}}
|
||||
<app-tag [tag]="t" linkTitle="Filter by tag" *ngFor="let t of document.tags" class="ml-1" (click)="clickTag.emit(t)" [clickable]="clickTag.observers.length"></app-tag>
|
||||
</h5>
|
||||
<h5 class="card-title" *ngIf="document.archive_serial_number">#{{document.archive_serial_number}}</h5>
|
||||
</div>
|
||||
<p class="card-text">
|
||||
<app-result-hightlight *ngIf="getDetailsAsHighlight()" class="result-content" [highlights]="getDetailsAsHighlight()"></app-result-hightlight>
|
||||
<app-result-highlight *ngIf="getDetailsAsHighlight()" class="result-content" [highlights]="getDetailsAsHighlight()"></app-result-highlight>
|
||||
<span *ngIf="getDetailsAsString()" class="result-content">{{getDetailsAsString()}}</span>
|
||||
</p>
|
||||
|
||||
@@ -24,6 +31,13 @@
|
||||
</svg>
|
||||
Edit
|
||||
</a>
|
||||
<a type="button" class="btn btn-sm btn-outline-secondary" [href]="getPreviewUrl()">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-search" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M10.442 10.442a1 1 0 0 1 1.415 0l3.85 3.85a1 1 0 0 1-1.414 1.415l-3.85-3.85a1 1 0 0 1 0-1.415z"/>
|
||||
<path fill-rule="evenodd" d="M6.5 12a5.5 5.5 0 1 0 0-11 5.5 5.5 0 0 0 0 11zM13 6.5a6.5 6.5 0 1 1-13 0 6.5 6.5 0 0 1 13 0z"/>
|
||||
</svg>
|
||||
View
|
||||
</a>
|
||||
<a type="button" class="btn btn-sm btn-outline-secondary" [href]="getDownloadUrl()">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-download" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M.5 9.9a.5.5 0 0 1 .5.5v2.5a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1v-2.5a.5.5 0 0 1 1 0v2.5a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2v-2.5a.5.5 0 0 1 .5-.5z"/>
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Component, Input, OnInit } from '@angular/core';
|
||||
import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
|
||||
import { DomSanitizer } from '@angular/platform-browser';
|
||||
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||
import { PaperlessTag } from 'src/app/data/paperless-tag';
|
||||
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
|
||||
@Component({
|
||||
@@ -18,6 +19,12 @@ export class DocumentCardLargeComponent implements OnInit {
|
||||
@Input()
|
||||
details: any
|
||||
|
||||
@Output()
|
||||
clickTag = new EventEmitter<PaperlessTag>()
|
||||
|
||||
@Output()
|
||||
clickCorrespondent = new EventEmitter<PaperlessDocument>()
|
||||
|
||||
ngOnInit(): void {
|
||||
}
|
||||
|
||||
@@ -41,4 +48,8 @@ export class DocumentCardLargeComponent implements OnInit {
|
||||
getDownloadUrl() {
|
||||
return this.documentService.getDownloadUrl(this.document.id)
|
||||
}
|
||||
|
||||
getPreviewUrl() {
|
||||
return this.documentService.getPreviewUrl(this.document.id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,26 +2,34 @@
|
||||
<div class="card h-100 shadow-sm">
|
||||
<div class=" border-bottom doc-img pr-1" [ngStyle]="{'background-image': 'url(' + getThumbUrl() + ')'}">
|
||||
<div class="row" *ngFor="let t of document.tags">
|
||||
<app-tag [tag]="t" class="col text-right"></app-tag>
|
||||
<app-tag style="font-size: large;" [tag]="t" class="col text-right" (click)="clickTag.emit(t)" [clickable]="true" linkTitle="Filter by tag"></app-tag>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="card-body p-2">
|
||||
<p class="card-text">
|
||||
<span class="font-weight-bold">{{document.correspondent? document.correspondent.name + ': ' : ''}}</span> {{document.title}}
|
||||
<ng-container *ngIf="document.correspondent">
|
||||
<a [routerLink]="" title="Filter by correspondent" (click)="clickCorrespondent.emit(document.correspondent)" class="font-weight-bold">{{document.correspondent.name}}</a>:
|
||||
</ng-container>
|
||||
{{document.title}}
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-footer">
|
||||
|
||||
<div class="d-flex justify-content-between align-items-center ml-n2">
|
||||
<div class="btn-group">
|
||||
<a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary">
|
||||
<a routerLink="/documents/{{document.id}}" class="btn btn-sm btn-outline-secondary" title="Edit">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-pencil" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M12.146.146a.5.5 0 0 1 .708 0l3 3a.5.5 0 0 1 0 .708l-10 10a.5.5 0 0 1-.168.11l-5 2a.5.5 0 0 1-.65-.65l2-5a.5.5 0 0 1 .11-.168l10-10zM11.207 2.5L13.5 4.793 14.793 3.5 12.5 1.207 11.207 2.5zm1.586 3L10.5 3.207 4 9.707V10h.5a.5.5 0 0 1 .5.5v.5h.5a.5.5 0 0 1 .5.5v.5h.293l6.5-6.5zm-9.761 5.175l-.106.106-1.528 3.821 3.821-1.528.106-.106A.5.5 0 0 1 5 12.5V12h-.5a.5.5 0 0 1-.5-.5V11h-.5a.5.5 0 0 1-.468-.325z"/>
|
||||
</svg>
|
||||
</a>
|
||||
<a [href]="getDownloadUrl()" class="btn btn-sm btn-outline-secondary">
|
||||
<a [href]="getPreviewUrl()" class="btn btn-sm btn-outline-secondary" title="View in browser">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-search" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M10.442 10.442a1 1 0 0 1 1.415 0l3.85 3.85a1 1 0 0 1-1.414 1.415l-3.85-3.85a1 1 0 0 1 0-1.415z"/>
|
||||
<path fill-rule="evenodd" d="M6.5 12a5.5 5.5 0 1 0 0-11 5.5 5.5 0 0 0 0 11zM13 6.5a6.5 6.5 0 1 1-13 0 6.5 6.5 0 0 1 13 0z"/>
|
||||
</svg>
|
||||
</a>
|
||||
<a [href]="getDownloadUrl()" class="btn btn-sm btn-outline-secondary" title="Download">
|
||||
<svg width="1em" height="1em" viewBox="0 0 16 16" class="bi bi-download" fill="currentColor" xmlns="http://www.w3.org/2000/svg">
|
||||
<path fill-rule="evenodd" d="M.5 9.9a.5.5 0 0 1 .5.5v2.5a1 1 0 0 0 1 1h12a1 1 0 0 0 1-1v-2.5a.5.5 0 0 1 1 0v2.5a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2v-2.5a.5.5 0 0 1 .5-.5z"/>
|
||||
<path fill-rule="evenodd" d="M7.646 11.854a.5.5 0 0 0 .708 0l3-3a.5.5 0 0 0-.708-.708L8.5 10.293V1.5a.5.5 0 0 0-1 0v8.793L5.354 8.146a.5.5 0 1 0-.708.708l3 3z"/>
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Component, Input, OnInit } from '@angular/core';
|
||||
import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core';
|
||||
import { PaperlessDocument } from 'src/app/data/paperless-document';
|
||||
import { PaperlessTag } from 'src/app/data/paperless-tag';
|
||||
import { DocumentService } from 'src/app/services/rest/document.service';
|
||||
|
||||
@Component({
|
||||
@@ -14,6 +15,12 @@ export class DocumentCardSmallComponent implements OnInit {
|
||||
@Input()
|
||||
document: PaperlessDocument
|
||||
|
||||
@Output()
|
||||
clickTag = new EventEmitter<PaperlessTag>()
|
||||
|
||||
@Output()
|
||||
clickCorrespondent = new EventEmitter<PaperlessDocument>()
|
||||
|
||||
ngOnInit(): void {
|
||||
}
|
||||
|
||||
@@ -24,4 +31,8 @@ export class DocumentCardSmallComponent implements OnInit {
|
||||
getDownloadUrl() {
|
||||
return this.documentService.getDownloadUrl(this.document.id)
|
||||
}
|
||||
|
||||
getPreviewUrl() {
|
||||
return this.documentService.getPreviewUrl(this.document.id)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,13 +21,12 @@
|
||||
</svg>
|
||||
</label>
|
||||
</div>
|
||||
<div class="btn-group btn-group-toggle ml-2" ngbRadioGroup [(ngModel)]="docs.sortDirection"
|
||||
*ngIf="!docs.viewId">
|
||||
<div class="btn-group btn-group-toggle ml-2" ngbRadioGroup [(ngModel)]="list.sortDirection">
|
||||
<div ngbDropdown class="btn-group">
|
||||
<button class="btn btn-outline-primary btn-sm" id="dropdownBasic1" ngbDropdownToggle>Sort by</button>
|
||||
<div ngbDropdownMenu aria-labelledby="dropdownBasic1">
|
||||
<button *ngFor="let f of getSortFields()" ngbDropdownItem (click)="setSort(f.field)"
|
||||
[class.active]="docs.sortField == f.field">{{f.name}}</button>
|
||||
<button *ngFor="let f of getSortFields()" ngbDropdownItem (click)="list.sortField = f.field"
|
||||
[class.active]="list.sortField == f.field">{{f.name}}</button>
|
||||
</div>
|
||||
</div>
|
||||
<label ngbButtonLabel class="btn-outline-primary btn-sm">
|
||||
@@ -43,7 +42,7 @@
|
||||
</svg>
|
||||
</label>
|
||||
</div>
|
||||
<div class="btn-group ml-2" *ngIf="!docs.viewId">
|
||||
<div class="btn-group ml-2">
|
||||
|
||||
<button type="button" class="btn btn-sm btn-outline-primary" (click)="showFilter=!showFilter">
|
||||
<svg class="toolbaricon" fill="currentColor">
|
||||
@@ -55,9 +54,13 @@
|
||||
<div class="btn-group" ngbDropdown role="group">
|
||||
<button class="btn btn-sm btn-outline-primary dropdown-toggle-split" ngbDropdownToggle></button>
|
||||
<div class="dropdown-menu" ngbDropdownMenu>
|
||||
<button ngbDropdownItem *ngFor="let config of savedViewConfigService.getConfigs()" (click)="loadViewConfig(config)">{{config.title}}</button>
|
||||
<div class="dropdown-divider" *ngIf="savedViewConfigService.getConfigs().length > 0"></div>
|
||||
<button ngbDropdownItem (click)="saveViewConfig()">Save current view</button>
|
||||
<ng-container *ngIf="!list.savedViewId" >
|
||||
<button ngbDropdownItem *ngFor="let config of savedViewConfigService.getConfigs()" (click)="loadViewConfig(config)">{{config.title}}</button>
|
||||
<div class="dropdown-divider" *ngIf="savedViewConfigService.getConfigs().length > 0"></div>
|
||||
</ng-container>
|
||||
|
||||
<button ngbDropdownItem (click)="saveViewConfig()" *ngIf="list.savedViewId">Save "{{list.savedViewTitle}}"</button>
|
||||
<button ngbDropdownItem (click)="saveViewConfigAs()">Save as...</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -72,16 +75,16 @@
|
||||
</div>
|
||||
|
||||
<div class="row m-0 justify-content-end">
|
||||
<ngb-pagination [pageSize]="docs.currentPageSize" [collectionSize]="docs.collectionSize" [(page)]="docs.currentPage" [maxSize]="5"
|
||||
[rotate]="true" (pageChange)="reload()" aria-label="Default pagination"></ngb-pagination>
|
||||
<ngb-pagination [pageSize]="list.currentPageSize" [collectionSize]="list.collectionSize" [(page)]="list.currentPage" [maxSize]="5"
|
||||
[rotate]="true" (pageChange)="list.reload()" aria-label="Default pagination"></ngb-pagination>
|
||||
</div>
|
||||
|
||||
<div *ngIf="displayMode == 'largeCards'">
|
||||
<app-document-card-large *ngFor="let d of docs.documents" [document]="d" [details]="d.content">
|
||||
<app-document-card-large *ngFor="let d of list.documents" [document]="d" [details]="d.content" (clickTag)="filterByTag($event)" (clickCorrespondent)="filterByCorrespondent($event)">
|
||||
</app-document-card-large>
|
||||
</div>
|
||||
|
||||
<table class="table table-hover table-sm border shadow" *ngIf="displayMode == 'details'">
|
||||
<table class="table table-sm border shadow" *ngIf="displayMode == 'details'">
|
||||
<thead>
|
||||
<th class="d-none d-lg-table-cell">ASN</th>
|
||||
<th class="d-none d-md-table-cell">Correspondent</th>
|
||||
@@ -91,20 +94,37 @@
|
||||
<th class="d-none d-xl-table-cell">Added</th>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr *ngFor="let d of docs.documents" routerLink="/documents/{{d.id}}">
|
||||
<td class="d-none d-lg-table-cell">{{d.archive_serial_number}}</td>
|
||||
<td class="d-none d-md-table-cell">{{d.correspondent ? d.correspondent.name : ''}}</td>
|
||||
<td>{{d.title}}<app-tag [tag]="t" *ngFor="let t of d.tags" class="ml-1"></app-tag></td>
|
||||
<td class="d-none d-xl-table-cell">{{d.document_type ? d.document_type.name : ''}}</td>
|
||||
<td>{{d.created | date}}</td>
|
||||
<td class="d-none d-xl-table-cell">{{d.added | date}}</td>
|
||||
<tr *ngFor="let d of list.documents">
|
||||
<td class="d-none d-lg-table-cell">
|
||||
{{d.archive_serial_number}}
|
||||
</td>
|
||||
<td class="d-none d-md-table-cell">
|
||||
<ng-container *ngIf="d.correspondent">
|
||||
<a [routerLink]="" (click)="filterByCorrespondent(d.correspondent)" title="Filter by correspondent">{{d.correspondent.name}}</a>
|
||||
</ng-container>
|
||||
</td>
|
||||
<td>
|
||||
<a routerLink="/documents/{{d.id}}" title="Edit document">{{d.title}}</a>
|
||||
<app-tag [tag]="t" *ngFor="let t of d.tags" class="ml-1" clickable="true" linkTitle="Filter by tag" (click)="filterByTag(t)"></app-tag>
|
||||
</td>
|
||||
<td class="d-none d-xl-table-cell">
|
||||
<ng-container *ngIf="d.document_type">
|
||||
<a [routerLink]="" (click)="filterByDocumentType(d.document_type)" title="Filter by document type">{{d.document_type.name}}</a>
|
||||
</ng-container>
|
||||
</td>
|
||||
<td>
|
||||
{{d.created | date}}
|
||||
</td>
|
||||
<td class="d-none d-xl-table-cell">
|
||||
{{d.added | date}}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
<div class=" m-n2 row" *ngIf="displayMode == 'smallCards'">
|
||||
<app-document-card-small [document]="d" *ngFor="let d of docs.documents"></app-document-card-small>
|
||||
<app-document-card-small [document]="d" *ngFor="let d of list.documents" (clickTag)="filterByTag($event)" (clickCorrespondent)="filterByCorrespondent($event)"></app-document-card-small>
|
||||
</div>
|
||||
|
||||
<p *ngIf="docs.documents.length == 0" class="mx-auto">No results</p>
|
||||
<p *ngIf="list.documents.length == 0" class="mx-auto">No results</p>
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { ActivatedRoute, Router } from '@angular/router';
|
||||
import { ActivatedRoute } from '@angular/router';
|
||||
import { NgbModal } from '@ng-bootstrap/ng-bootstrap';
|
||||
import { cloneFilterRules, FilterRule } from 'src/app/data/filter-rule';
|
||||
import { FILTER_CORRESPONDENT, FILTER_DOCUMENT_TYPE, FILTER_HAS_TAG, FILTER_RULE_TYPES } from 'src/app/data/filter-rule-type';
|
||||
import { PaperlessCorrespondent } from 'src/app/data/paperless-correspondent';
|
||||
import { PaperlessDocumentType } from 'src/app/data/paperless-document-type';
|
||||
import { PaperlessTag } from 'src/app/data/paperless-tag';
|
||||
import { SavedViewConfig } from 'src/app/data/saved-view-config';
|
||||
import { DocumentListViewService } from 'src/app/services/document-list-view.service';
|
||||
import { DOCUMENT_SORT_FIELDS } from 'src/app/services/rest/document.service';
|
||||
import { SavedViewConfigService } from 'src/app/services/saved-view-config.service';
|
||||
import { Toast, ToastService } from 'src/app/services/toast.service';
|
||||
import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-view-config-dialog.component';
|
||||
|
||||
@Component({
|
||||
@@ -16,9 +21,10 @@ import { SaveViewConfigDialogComponent } from './save-view-config-dialog/save-vi
|
||||
export class DocumentListComponent implements OnInit {
|
||||
|
||||
constructor(
|
||||
public docs: DocumentListViewService,
|
||||
public list: DocumentListViewService,
|
||||
public savedViewConfigService: SavedViewConfigService,
|
||||
public route: ActivatedRoute,
|
||||
private toastService: ToastService,
|
||||
public modalService: NgbModal) { }
|
||||
|
||||
displayMode = 'smallCards' // largeCards, smallCards, details
|
||||
@@ -27,17 +33,13 @@ export class DocumentListComponent implements OnInit {
|
||||
showFilter = false
|
||||
|
||||
getTitle() {
|
||||
return this.docs.viewConfigOverride ? this.docs.viewConfigOverride.title : "Documents"
|
||||
return this.list.savedViewTitle || "Documents"
|
||||
}
|
||||
|
||||
getSortFields() {
|
||||
return DOCUMENT_SORT_FIELDS
|
||||
}
|
||||
|
||||
setSort(field: string) {
|
||||
this.docs.sortField = field
|
||||
}
|
||||
|
||||
saveDisplayMode() {
|
||||
localStorage.setItem('document-list:displayMode', this.displayMode)
|
||||
}
|
||||
@@ -48,41 +50,74 @@ export class DocumentListComponent implements OnInit {
|
||||
}
|
||||
this.route.paramMap.subscribe(params => {
|
||||
if (params.has('id')) {
|
||||
this.docs.viewConfigOverride = this.savedViewConfigService.getConfig(params.get('id'))
|
||||
this.list.savedView = this.savedViewConfigService.getConfig(params.get('id'))
|
||||
} else {
|
||||
this.filterRules = this.docs.filterRules
|
||||
this.showFilter = this.filterRules.length > 0
|
||||
this.docs.viewConfigOverride = null
|
||||
this.list.savedView = null
|
||||
}
|
||||
this.reload()
|
||||
this.filterRules = this.list.filterRules
|
||||
//this.showFilter = this.filterRules.length > 0
|
||||
// prevents temporarily visible results from previous views
|
||||
this.list.documents = []
|
||||
this.list.reload()
|
||||
})
|
||||
}
|
||||
|
||||
reload() {
|
||||
this.docs.reload()
|
||||
}
|
||||
|
||||
applyFilterRules() {
|
||||
this.docs.filterRules = this.filterRules
|
||||
this.list.filterRules = this.filterRules
|
||||
}
|
||||
|
||||
loadViewConfig(config: SavedViewConfig) {
|
||||
this.filterRules = cloneFilterRules(config.filterRules)
|
||||
this.docs.loadViewConfig(config)
|
||||
this.list.load(config)
|
||||
}
|
||||
|
||||
saveViewConfig() {
|
||||
this.savedViewConfigService.updateConfig(this.list.savedView)
|
||||
this.toastService.showToast(Toast.make("Information", `View "${this.list.savedView.title}" saved successfully.`))
|
||||
}
|
||||
|
||||
saveViewConfigAs() {
|
||||
let modal = this.modalService.open(SaveViewConfigDialogComponent, {backdrop: 'static'})
|
||||
modal.componentInstance.saveClicked.subscribe(formValue => {
|
||||
this.savedViewConfigService.saveConfig({
|
||||
this.savedViewConfigService.newConfig({
|
||||
title: formValue.title,
|
||||
showInDashboard: formValue.showInDashboard,
|
||||
showInSideBar: formValue.showInSideBar,
|
||||
filterRules: this.docs.filterRules,
|
||||
sortDirection: this.docs.sortDirection,
|
||||
sortField: this.docs.sortField
|
||||
filterRules: this.list.filterRules,
|
||||
sortDirection: this.list.sortDirection,
|
||||
sortField: this.list.sortField
|
||||
})
|
||||
modal.close()
|
||||
})
|
||||
}
|
||||
|
||||
filterByTag(t: PaperlessTag) {
|
||||
if (this.filterRules.find(rule => rule.type.id == FILTER_HAS_TAG && rule.value == t.id)) {
|
||||
return
|
||||
}
|
||||
|
||||
this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_HAS_TAG), value: t.id})
|
||||
this.applyFilterRules()
|
||||
}
|
||||
|
||||
filterByCorrespondent(c: PaperlessCorrespondent) {
|
||||
let existing_rule = this.filterRules.find(rule => rule.type.id == FILTER_CORRESPONDENT)
|
||||
if (existing_rule) {
|
||||
existing_rule.value = c.id
|
||||
} else {
|
||||
this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_CORRESPONDENT), value: c.id})
|
||||
}
|
||||
this.applyFilterRules()
|
||||
}
|
||||
|
||||
filterByDocumentType(dt: PaperlessDocumentType) {
|
||||
let existing_rule = this.filterRules.find(rule => rule.type.id == FILTER_DOCUMENT_TYPE)
|
||||
if (existing_rule) {
|
||||
existing_rule.value = dt.id
|
||||
} else {
|
||||
this.filterRules.push({type: FILTER_RULE_TYPES.find(t => t.id == FILTER_DOCUMENT_TYPE), value: dt.id})
|
||||
}
|
||||
this.applyFilterRules()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
import { ComponentFixture, TestBed } from '@angular/core/testing';
|
||||
|
||||
import { ResultHightlightComponent } from './result-hightlight.component';
|
||||
import { ResultHighlightComponent } from './result-highlight.component';
|
||||
|
||||
describe('ResultHightlightComponent', () => {
|
||||
let component: ResultHightlightComponent;
|
||||
let fixture: ComponentFixture<ResultHightlightComponent>;
|
||||
describe('ResultHighlightComponent', () => {
|
||||
let component: ResultHighlightComponent;
|
||||
let fixture: ComponentFixture<ResultHighlightComponent>;
|
||||
|
||||
beforeEach(async () => {
|
||||
await TestBed.configureTestingModule({
|
||||
declarations: [ ResultHightlightComponent ]
|
||||
declarations: [ ResultHighlightComponent ]
|
||||
})
|
||||
.compileComponents();
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
fixture = TestBed.createComponent(ResultHightlightComponent);
|
||||
fixture = TestBed.createComponent(ResultHighlightComponent);
|
||||
component = fixture.componentInstance;
|
||||
fixture.detectChanges();
|
||||
});
|
||||
@@ -2,11 +2,11 @@ import { Component, Input, OnInit } from '@angular/core';
|
||||
import { SearchHitHighlight } from 'src/app/data/search-result';
|
||||
|
||||
@Component({
|
||||
selector: 'app-result-hightlight',
|
||||
templateUrl: './result-hightlight.component.html',
|
||||
styleUrls: ['./result-hightlight.component.scss']
|
||||
selector: 'app-result-highlight',
|
||||
templateUrl: './result-highlight.component.html',
|
||||
styleUrls: ['./result-highlight.component.scss']
|
||||
})
|
||||
export class ResultHightlightComponent implements OnInit {
|
||||
export class ResultHighlightComponent implements OnInit {
|
||||
|
||||
constructor() { }
|
||||
|
||||
@@ -1,13 +1,21 @@
|
||||
<app-page-header title="Search results">
|
||||
</app-page-header>
|
||||
|
||||
<p>Search string: <i>{{query}}</i></p>
|
||||
<div *ngIf="errorMessage" class="alert alert-danger">Invalid search query: {{errorMessage}}</div>
|
||||
|
||||
<div [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
|
||||
<p>
|
||||
Search string: <i>{{query}}</i>
|
||||
<ng-container *ngIf="correctedQuery">
|
||||
- Did you mean "<a [routerLink]="" (click)="searchCorrectedQuery()">{{correctedQuery}}</a>"?
|
||||
</ng-container>
|
||||
|
||||
</p>
|
||||
|
||||
<div *ngIf="!errorMessage" [class.result-content-searching]="searching" infiniteScroll (scrolled)="onScroll()">
|
||||
<p>{{resultCount}} result(s)</p>
|
||||
<app-document-card-large *ngFor="let result of results"
|
||||
[document]="result.document"
|
||||
[details]="result.highlights">
|
||||
|
||||
</app-document-card-large>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -11,5 +11,5 @@
|
||||
}
|
||||
|
||||
.result-content-searching {
|
||||
opacity: 0.2;
|
||||
opacity: 0.3;
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Component, OnInit } from '@angular/core';
|
||||
import { ActivatedRoute } from '@angular/router';
|
||||
import { ActivatedRoute, Router } from '@angular/router';
|
||||
import { SearchHit } from 'src/app/data/search-result';
|
||||
import { SearchService } from 'src/app/services/rest/search.service';
|
||||
|
||||
@@ -9,7 +9,7 @@ import { SearchService } from 'src/app/services/rest/search.service';
|
||||
styleUrls: ['./search.component.scss']
|
||||
})
|
||||
export class SearchComponent implements OnInit {
|
||||
|
||||
|
||||
results: SearchHit[] = []
|
||||
|
||||
query: string = ""
|
||||
@@ -22,7 +22,11 @@ export class SearchComponent implements OnInit {
|
||||
|
||||
resultCount
|
||||
|
||||
constructor(private searchService: SearchService, private route: ActivatedRoute) { }
|
||||
correctedQuery: string = null
|
||||
|
||||
errorMessage: string
|
||||
|
||||
constructor(private searchService: SearchService, private route: ActivatedRoute, private router: Router) { }
|
||||
|
||||
ngOnInit(): void {
|
||||
this.route.queryParamMap.subscribe(paramMap => {
|
||||
@@ -31,10 +35,16 @@ export class SearchComponent implements OnInit {
|
||||
this.currentPage = 1
|
||||
this.loadPage()
|
||||
})
|
||||
|
||||
|
||||
}
|
||||
|
||||
searchCorrectedQuery() {
|
||||
this.router.navigate(["search"], {queryParams: {query: this.correctedQuery}})
|
||||
}
|
||||
|
||||
loadPage(append: boolean = false) {
|
||||
this.errorMessage = null
|
||||
this.correctedQuery = null
|
||||
this.searchService.search(this.query, this.currentPage).subscribe(result => {
|
||||
if (append) {
|
||||
this.results.push(...result.results)
|
||||
@@ -44,12 +54,17 @@ export class SearchComponent implements OnInit {
|
||||
this.pageCount = result.page_count
|
||||
this.searching = false
|
||||
this.resultCount = result.count
|
||||
this.correctedQuery = result.corrected_query
|
||||
}, error => {
|
||||
this.searching = false
|
||||
this.resultCount = 1
|
||||
this.pageCount = 1
|
||||
this.results = []
|
||||
this.errorMessage = error.error
|
||||
})
|
||||
}
|
||||
|
||||
onScroll() {
|
||||
console.log(this.currentPage)
|
||||
console.log(this.pageCount)
|
||||
if (this.currentPage < this.pageCount) {
|
||||
this.currentPage += 1
|
||||
this.loadPage(true)
|
||||
|
||||
@@ -1,31 +1,51 @@
|
||||
export const FILTER_TITLE = 0
|
||||
export const FILTER_CONTENT = 1
|
||||
export const FILTER_ASN = 2
|
||||
export const FILTER_CORRESPONDENT = 3
|
||||
export const FILTER_DOCUMENT_TYPE = 4
|
||||
export const FILTER_IS_IN_INBOX = 5
|
||||
export const FILTER_HAS_TAG = 6
|
||||
export const FILTER_HAS_ANY_TAG = 7
|
||||
export const FILTER_CREATED_BEFORE = 8
|
||||
export const FILTER_CREATED_AFTER = 9
|
||||
export const FILTER_CREATED_YEAR = 10
|
||||
export const FILTER_CREATED_MONTH = 11
|
||||
export const FILTER_CREATED_DAY = 12
|
||||
export const FILTER_ADDED_BEFORE = 13
|
||||
export const FILTER_ADDED_AFTER = 14
|
||||
export const FILTER_MODIFIED_BEFORE = 15
|
||||
export const FILTER_MODIFIED_AFTER = 16
|
||||
|
||||
export const FILTER_RULE_TYPES: FilterRuleType[] = [
|
||||
{name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false},
|
||||
{name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false},
|
||||
|
||||
{id: FILTER_TITLE, name: "Title contains", filtervar: "title__icontains", datatype: "string", multi: false},
|
||||
{id: FILTER_CONTENT, name: "Content contains", filtervar: "content__icontains", datatype: "string", multi: false},
|
||||
|
||||
{name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false},
|
||||
{id: FILTER_ASN, name: "ASN is", filtervar: "archive_serial_number", datatype: "number", multi: false},
|
||||
|
||||
{name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false},
|
||||
{name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false},
|
||||
{id: FILTER_CORRESPONDENT, name: "Correspondent is", filtervar: "correspondent__id", datatype: "correspondent", multi: false},
|
||||
{id: FILTER_DOCUMENT_TYPE, name: "Document type is", filtervar: "document_type__id", datatype: "document_type", multi: false},
|
||||
|
||||
{name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false},
|
||||
{name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true},
|
||||
{name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false},
|
||||
{id: FILTER_IS_IN_INBOX, name: "Is in Inbox", filtervar: "is_in_inbox", datatype: "boolean", multi: false},
|
||||
{id: FILTER_HAS_TAG, name: "Has tag", filtervar: "tags__id__all", datatype: "tag", multi: true},
|
||||
{id: FILTER_HAS_ANY_TAG, name: "Has any tag", filtervar: "is_tagged", datatype: "boolean", multi: false},
|
||||
|
||||
{name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false},
|
||||
{name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false},
|
||||
{id: FILTER_CREATED_BEFORE, name: "Created before", filtervar: "created__date__lt", datatype: "date", multi: false},
|
||||
{id: FILTER_CREATED_AFTER, name: "Created after", filtervar: "created__date__gt", datatype: "date", multi: false},
|
||||
|
||||
{name: "Year created is", filtervar: "created__year", datatype: "number", multi: false},
|
||||
{name: "Month created is", filtervar: "created__month", datatype: "number", multi: false},
|
||||
{name: "Day created is", filtervar: "created__day", datatype: "number", multi: false},
|
||||
{id: FILTER_CREATED_YEAR, name: "Year created is", filtervar: "created__year", datatype: "number", multi: false},
|
||||
{id: FILTER_CREATED_MONTH, name: "Month created is", filtervar: "created__month", datatype: "number", multi: false},
|
||||
{id: FILTER_CREATED_DAY, name: "Day created is", filtervar: "created__day", datatype: "number", multi: false},
|
||||
|
||||
{name: "Added before", filtervar: "added__date__lt", datatype: "date", multi: false},
|
||||
{name: "Added after", filtervar: "added__date__gt", datatype: "date", multi: false},
|
||||
{id: FILTER_ADDED_BEFORE, name: "Added before", filtervar: "added__date__lt", datatype: "date", multi: false},
|
||||
{id: FILTER_ADDED_AFTER, name: "Added after", filtervar: "added__date__gt", datatype: "date", multi: false},
|
||||
|
||||
{name: "Modified before", filtervar: "modified__date__lt", datatype: "date", multi: false},
|
||||
{name: "Modified after", filtervar: "modified__date__gt", datatype: "date", multi: false},
|
||||
{id: FILTER_MODIFIED_BEFORE, name: "Modified before", filtervar: "modified__date__lt", datatype: "date", multi: false},
|
||||
{id: FILTER_MODIFIED_AFTER, name: "Modified after", filtervar: "modified__date__gt", datatype: "date", multi: false},
|
||||
]
|
||||
|
||||
export interface FilterRuleType {
|
||||
id: number
|
||||
name: string
|
||||
filtervar: string
|
||||
datatype: string //number, string, boolean, date
|
||||
|
||||
@@ -21,7 +21,9 @@ export interface SearchResult {
|
||||
page?: number
|
||||
page_count?: number
|
||||
|
||||
corrected_query?: string
|
||||
|
||||
results?: SearchHit[]
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,12 @@ import { DOCUMENT_LIST_SERVICE, GENERAL_SETTINGS } from '../data/storage-keys';
|
||||
import { DocumentService } from './rest/document.service';
|
||||
|
||||
|
||||
/**
|
||||
* This service manages the document list which is displayed using the document list view.
|
||||
*
|
||||
* This service also serves saved views by transparently switching between the document list
|
||||
* and saved views on request. See below.
|
||||
*/
|
||||
@Injectable({
|
||||
providedIn: 'root'
|
||||
})
|
||||
@@ -14,80 +20,127 @@ export class DocumentListViewService {
|
||||
|
||||
static DEFAULT_SORT_FIELD = 'created'
|
||||
|
||||
isReloading: boolean = false
|
||||
documents: PaperlessDocument[] = []
|
||||
currentPage = 1
|
||||
currentPageSize: number = +localStorage.getItem(GENERAL_SETTINGS.DOCUMENT_LIST_SIZE) || GENERAL_SETTINGS.DOCUMENT_LIST_SIZE_DEFAULT
|
||||
collectionSize: number
|
||||
|
||||
private currentViewConfig: SavedViewConfig
|
||||
//TODO: make private
|
||||
viewConfigOverride: SavedViewConfig
|
||||
/**
|
||||
* This is the current config for the document list. The service will always remember the last settings used for the document list.
|
||||
*/
|
||||
private _documentListViewConfig: SavedViewConfig
|
||||
/**
|
||||
* Optionally, this is the currently selected saved view, which might be null.
|
||||
*/
|
||||
private _savedViewConfig: SavedViewConfig
|
||||
|
||||
get viewId() {
|
||||
return this.viewConfigOverride?.id
|
||||
get savedView() {
|
||||
return this._savedViewConfig
|
||||
}
|
||||
|
||||
set savedView(value) {
|
||||
if (value) {
|
||||
//this is here so that we don't modify value, which might be the actual instance of the saved view.
|
||||
this._savedViewConfig = Object.assign({}, value)
|
||||
} else {
|
||||
this._savedViewConfig = null
|
||||
}
|
||||
}
|
||||
|
||||
get savedViewId() {
|
||||
return this.savedView?.id
|
||||
}
|
||||
|
||||
get savedViewTitle() {
|
||||
return this.savedView?.title
|
||||
}
|
||||
|
||||
get documentListView() {
|
||||
return this._documentListViewConfig
|
||||
}
|
||||
|
||||
set documentListView(value) {
|
||||
if (value) {
|
||||
this._documentListViewConfig = Object.assign({}, value)
|
||||
this.saveDocumentListView()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is what switches between the saved views and the document list view. Everything on the document list uses
|
||||
* this property to determine the settings for the currently displayed document list.
|
||||
*/
|
||||
get view() {
|
||||
return this.savedView || this.documentListView
|
||||
}
|
||||
|
||||
load(config: SavedViewConfig) {
|
||||
this.view.filterRules = cloneFilterRules(config.filterRules)
|
||||
this.view.sortDirection = config.sortDirection
|
||||
this.view.sortField = config.sortField
|
||||
this.reload()
|
||||
}
|
||||
|
||||
reload(onFinish?) {
|
||||
let viewConfig = this.viewConfigOverride || this.currentViewConfig
|
||||
|
||||
this.isReloading = true
|
||||
this.documentService.list(
|
||||
this.currentPage,
|
||||
this.currentPageSize,
|
||||
viewConfig.sortField,
|
||||
viewConfig.sortDirection,
|
||||
viewConfig.filterRules).subscribe(
|
||||
this.view.sortField,
|
||||
this.view.sortDirection,
|
||||
this.view.filterRules).subscribe(
|
||||
result => {
|
||||
this.collectionSize = result.count
|
||||
this.documents = result.results
|
||||
if (onFinish) {
|
||||
onFinish()
|
||||
}
|
||||
this.isReloading = false
|
||||
},
|
||||
error => {
|
||||
if (error.error['detail'] == 'Invalid page.') {
|
||||
this.currentPage = 1
|
||||
this.reload()
|
||||
}
|
||||
this.isReloading = false
|
||||
})
|
||||
}
|
||||
|
||||
set filterRules(filterRules: FilterRule[]) {
|
||||
this.currentViewConfig.filterRules = cloneFilterRules(filterRules)
|
||||
this.saveCurrentViewConfig()
|
||||
//we're going to clone the filterRules object, since we don't
|
||||
//want changes in the filter editor to propagate into here right away.
|
||||
this.view.filterRules = cloneFilterRules(filterRules)
|
||||
this.reload()
|
||||
this.saveDocumentListView()
|
||||
}
|
||||
|
||||
get filterRules(): FilterRule[] {
|
||||
return cloneFilterRules(this.currentViewConfig.filterRules)
|
||||
return cloneFilterRules(this.view.filterRules)
|
||||
}
|
||||
|
||||
set sortField(field: string) {
|
||||
this.currentViewConfig.sortField = field
|
||||
this.saveCurrentViewConfig()
|
||||
this.view.sortField = field
|
||||
this.saveDocumentListView()
|
||||
this.reload()
|
||||
}
|
||||
|
||||
get sortField(): string {
|
||||
return this.currentViewConfig.sortField
|
||||
return this.view.sortField
|
||||
}
|
||||
|
||||
set sortDirection(direction: string) {
|
||||
this.currentViewConfig.sortDirection = direction
|
||||
this.saveCurrentViewConfig()
|
||||
this.view.sortDirection = direction
|
||||
this.saveDocumentListView()
|
||||
this.reload()
|
||||
}
|
||||
|
||||
get sortDirection(): string {
|
||||
return this.currentViewConfig.sortDirection
|
||||
return this.view.sortDirection
|
||||
}
|
||||
|
||||
loadViewConfig(config: SavedViewConfig) {
|
||||
Object.assign(this.currentViewConfig, config)
|
||||
this.reload()
|
||||
}
|
||||
|
||||
private saveCurrentViewConfig() {
|
||||
sessionStorage.setItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG, JSON.stringify(this.currentViewConfig))
|
||||
private saveDocumentListView() {
|
||||
sessionStorage.setItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG, JSON.stringify(this.documentListView))
|
||||
}
|
||||
|
||||
getLastPage(): number {
|
||||
@@ -134,21 +187,21 @@ export class DocumentListViewService {
|
||||
}
|
||||
|
||||
constructor(private documentService: DocumentService) {
|
||||
let currentViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
|
||||
if (currentViewConfigJson) {
|
||||
let documentListViewConfigJson = sessionStorage.getItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
|
||||
if (documentListViewConfigJson) {
|
||||
try {
|
||||
this.currentViewConfig = JSON.parse(currentViewConfigJson)
|
||||
this.documentListView = JSON.parse(documentListViewConfigJson)
|
||||
} catch (e) {
|
||||
sessionStorage.removeItem(DOCUMENT_LIST_SERVICE.CURRENT_VIEW_CONFIG)
|
||||
this.currentViewConfig = null
|
||||
this.documentListView = null
|
||||
}
|
||||
}
|
||||
if (!this.currentViewConfig) {
|
||||
this.currentViewConfig = {
|
||||
if (!this.documentListView) {
|
||||
this.documentListView = {
|
||||
filterRules: [],
|
||||
sortDirection: 'des',
|
||||
sortField: 'created'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,13 +36,21 @@ export class SavedViewConfigService {
|
||||
return this.configs.find(sf => sf.id == id)
|
||||
}
|
||||
|
||||
saveConfig(config: SavedViewConfig) {
|
||||
newConfig(config: SavedViewConfig) {
|
||||
config.id = uuidv4()
|
||||
this.configs.push(config)
|
||||
|
||||
this.save()
|
||||
}
|
||||
|
||||
updateConfig(config: SavedViewConfig) {
|
||||
let savedConfig = this.configs.find(c => c.id == config.id)
|
||||
if (savedConfig) {
|
||||
Object.assign(savedConfig, config)
|
||||
this.save()
|
||||
}
|
||||
}
|
||||
|
||||
private save() {
|
||||
localStorage.setItem('saved-view-config-service:savedConfigs', JSON.stringify(this.configs))
|
||||
}
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
from .checks import changed_password_check
|
||||
# this is here so that django finds the checks.
|
||||
from .checks import *
|
||||
|
||||
@@ -4,12 +4,13 @@ import os
|
||||
import pickle
|
||||
import re
|
||||
|
||||
from django.conf import settings
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.neural_network import MLPClassifier
|
||||
from sklearn.preprocessing import MultiLabelBinarizer
|
||||
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
|
||||
from sklearn.utils.multiclass import type_of_target
|
||||
|
||||
from documents.models import Document, MatchingModel
|
||||
from paperless import settings
|
||||
|
||||
|
||||
class IncompatibleClassifierVersionError(Exception):
|
||||
@@ -27,7 +28,7 @@ def preprocess_content(content):
|
||||
|
||||
class DocumentClassifier(object):
|
||||
|
||||
FORMAT_VERSION = 5
|
||||
FORMAT_VERSION = 6
|
||||
|
||||
def __init__(self):
|
||||
# mtime of the model file on disk. used to prevent reloading when
|
||||
@@ -54,6 +55,8 @@ class DocumentClassifier(object):
|
||||
"Cannor load classifier, incompatible versions.")
|
||||
else:
|
||||
if self.classifier_version > 0:
|
||||
# Don't be confused by this check. It's simply here
|
||||
# so that we wont log anything on initial reload.
|
||||
logger.info("Classifier updated on disk, "
|
||||
"reloading classifier models")
|
||||
self.data_hash = pickle.load(f)
|
||||
@@ -122,9 +125,14 @@ class DocumentClassifier(object):
|
||||
labels_tags_unique = set([tag for tags in labels_tags for tag in tags])
|
||||
|
||||
num_tags = len(labels_tags_unique)
|
||||
|
||||
# substract 1 since -1 (null) is also part of the classes.
|
||||
num_correspondents = len(set(labels_correspondent)) - 1
|
||||
num_document_types = len(set(labels_document_type)) - 1
|
||||
|
||||
# union with {-1} accounts for cases where all documents have
|
||||
# correspondents and types assigned, so -1 isnt part of labels_x, which
|
||||
# it usually is.
|
||||
num_correspondents = len(set(labels_correspondent) | {-1}) - 1
|
||||
num_document_types = len(set(labels_document_type) | {-1}) - 1
|
||||
|
||||
logging.getLogger(__name__).debug(
|
||||
"{} documents, {} tag(s), {} correspondent(s), "
|
||||
@@ -145,12 +153,23 @@ class DocumentClassifier(object):
|
||||
)
|
||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
||||
|
||||
# Step 3: train the classifiers
|
||||
if num_tags > 0:
|
||||
logging.getLogger(__name__).debug("Training tags classifier...")
|
||||
|
||||
if num_tags == 1:
|
||||
# Special case where only one tag has auto:
|
||||
# Fallback to binary classification.
|
||||
labels_tags = [label[0] if len(label) == 1 else -1
|
||||
for label in labels_tags]
|
||||
self.tags_binarizer = LabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||
labels_tags).ravel()
|
||||
else:
|
||||
self.tags_binarizer = MultiLabelBinarizer()
|
||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||
labels_tags)
|
||||
|
||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||
else:
|
||||
@@ -222,6 +241,16 @@ class DocumentClassifier(object):
|
||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||
y = self.tags_classifier.predict(X)
|
||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||
return tags_ids
|
||||
if type_of_target(y).startswith('multilabel'):
|
||||
# the usual case when there are multiple tags.
|
||||
return list(tags_ids)
|
||||
elif type_of_target(y) == 'binary' and tags_ids != -1:
|
||||
# This is for when we have binary classification with only one
|
||||
# tag and the result is to assign this tag.
|
||||
return [tags_ids]
|
||||
else:
|
||||
# Usually binary as well with -1 as the result, but we're
|
||||
# going to catch everything else here as well.
|
||||
return []
|
||||
else:
|
||||
return []
|
||||
|
||||
@@ -8,12 +8,12 @@ from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from paperless.db import GnuPG
|
||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from .file_handling import generate_filename, create_source_path_directory
|
||||
from .file_handling import create_source_path_directory
|
||||
from .loggers import LoggingMixin
|
||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type
|
||||
from .parsers import ParseError, get_parser_class_for_mime_type, \
|
||||
get_supported_file_extensions
|
||||
from .signals import (
|
||||
document_consumption_finished,
|
||||
document_consumption_started
|
||||
@@ -40,16 +40,20 @@ class Consumer(LoggingMixin):
|
||||
raise ConsumerError("Cannot consume {}: It is not a file".format(
|
||||
self.path))
|
||||
|
||||
def pre_check_consumption_dir(self):
|
||||
if not settings.CONSUMPTION_DIR:
|
||||
raise ConsumerError(
|
||||
"The CONSUMPTION_DIR settings variable does not appear to be "
|
||||
"set.")
|
||||
def pre_check_file_extension(self):
|
||||
extensions = get_supported_file_extensions()
|
||||
_, ext = os.path.splitext(self.filename)
|
||||
|
||||
if not os.path.isdir(settings.CONSUMPTION_DIR):
|
||||
if not ext:
|
||||
raise ConsumerError(
|
||||
"Consumption directory {} does not exist".format(
|
||||
settings.CONSUMPTION_DIR))
|
||||
f"Not consuming {self.filename}: File type unknown."
|
||||
)
|
||||
|
||||
if ext not in extensions:
|
||||
raise ConsumerError(
|
||||
f"Not consuming {self.filename}: File extension {ext} does "
|
||||
f"not map to any known file type ({str(extensions)})"
|
||||
)
|
||||
|
||||
def pre_check_duplicate(self):
|
||||
with open(self.path, "rb") as f:
|
||||
@@ -92,7 +96,7 @@ class Consumer(LoggingMixin):
|
||||
# Make sure that preconditions for consuming the file are met.
|
||||
|
||||
self.pre_check_file_exists()
|
||||
self.pre_check_consumption_dir()
|
||||
self.pre_check_file_extension()
|
||||
self.pre_check_directories()
|
||||
self.pre_check_duplicate()
|
||||
|
||||
@@ -180,6 +184,13 @@ class Consumer(LoggingMixin):
|
||||
self._write(document, self.path, document.source_path)
|
||||
self._write(document, thumbnail, document.thumbnail_path)
|
||||
|
||||
# Afte performing all database operations and moving files
|
||||
# into place, tell paperless where the file is.
|
||||
document.filename = os.path.basename(document.source_path)
|
||||
# Saving the document now will trigger the filename handling
|
||||
# logic.
|
||||
document.save()
|
||||
|
||||
# Delete the file only if it was successfully consumed
|
||||
self.log("debug", "Deleting file {}".format(self.path))
|
||||
os.unlink(self.path)
|
||||
@@ -208,10 +219,7 @@ class Consumer(LoggingMixin):
|
||||
created = file_info.created or date or timezone.make_aware(
|
||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
else:
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
with open(self.path, "rb") as f:
|
||||
document = Document.objects.create(
|
||||
@@ -233,12 +241,6 @@ class Consumer(LoggingMixin):
|
||||
|
||||
self.apply_overrides(document)
|
||||
|
||||
document.filename = generate_filename(document)
|
||||
|
||||
# We need to save the document twice, since we need the PK of the
|
||||
# document in order to create its filename above.
|
||||
document.save()
|
||||
|
||||
return document
|
||||
|
||||
def apply_overrides(self, document):
|
||||
@@ -260,8 +262,4 @@ class Consumer(LoggingMixin):
|
||||
def _write(self, document, source, target):
|
||||
with open(source, "rb") as read_file:
|
||||
with open(target, "wb") as write_file:
|
||||
if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
|
||||
write_file.write(read_file.read())
|
||||
return
|
||||
self.log("debug", "Encrypting")
|
||||
write_file.write(GnuPG.encrypted(read_file))
|
||||
write_file.write(read_file.read())
|
||||
|
||||
@@ -4,10 +4,11 @@ from contextlib import contextmanager
|
||||
|
||||
from django.conf import settings
|
||||
from whoosh import highlight
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC
|
||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME
|
||||
from whoosh.highlight import Formatter, get_text
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
from whoosh.qparser import MultifieldParser
|
||||
from whoosh.qparser.dateparse import DateParserPlugin
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
|
||||
@@ -59,29 +60,40 @@ def get_schema():
|
||||
id=NUMERIC(stored=True, unique=True, numtype=int),
|
||||
title=TEXT(stored=True),
|
||||
content=TEXT(),
|
||||
correspondent=TEXT(stored=True)
|
||||
correspondent=TEXT(stored=True),
|
||||
tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True),
|
||||
type=TEXT(stored=True),
|
||||
created=DATETIME(stored=True, sortable=True),
|
||||
modified=DATETIME(stored=True, sortable=True),
|
||||
added=DATETIME(stored=True, sortable=True),
|
||||
)
|
||||
|
||||
|
||||
def open_index(recreate=False):
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR)
|
||||
else:
|
||||
# TODO: this is not thread safe. If 2 instances try to create the index
|
||||
# at the same time, this fails. This currently prevents parallel
|
||||
# tests.
|
||||
if not os.path.isdir(settings.INDEX_DIR):
|
||||
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
||||
return create_in(settings.INDEX_DIR, get_schema())
|
||||
try:
|
||||
if exists_in(settings.INDEX_DIR) and not recreate:
|
||||
return open_dir(settings.INDEX_DIR, schema=get_schema())
|
||||
except Exception as e:
|
||||
logger.error(f"Error while opening the index: {e}, recreating.")
|
||||
|
||||
if not os.path.isdir(settings.INDEX_DIR):
|
||||
os.makedirs(settings.INDEX_DIR, exist_ok=True)
|
||||
return create_in(settings.INDEX_DIR, get_schema())
|
||||
|
||||
|
||||
def update_document(writer, doc):
|
||||
logger.debug("Indexing {}...".format(doc))
|
||||
tags = ",".join([t.name for t in doc.tags.all()])
|
||||
writer.update_document(
|
||||
id=doc.pk,
|
||||
title=doc.title,
|
||||
content=doc.content,
|
||||
correspondent=doc.correspondent.name if doc.correspondent else None
|
||||
correspondent=doc.correspondent.name if doc.correspondent else None,
|
||||
tag=tags if tags else None,
|
||||
type=doc.document_type.name if doc.document_type else None,
|
||||
created=doc.created,
|
||||
added=doc.added,
|
||||
modified=doc.modified,
|
||||
)
|
||||
|
||||
|
||||
@@ -103,16 +115,27 @@ def remove_document_from_index(document):
|
||||
|
||||
|
||||
@contextmanager
|
||||
def query_page(ix, query, page):
|
||||
def query_page(ix, querystring, page):
|
||||
searcher = ix.searcher()
|
||||
try:
|
||||
query_parser = MultifieldParser(["content", "title", "correspondent"],
|
||||
ix.schema).parse(query)
|
||||
result_page = searcher.search_page(query_parser, page)
|
||||
qp = MultifieldParser(
|
||||
["content", "title", "correspondent", "tag", "type"],
|
||||
ix.schema)
|
||||
qp.add_plugin(DateParserPlugin())
|
||||
|
||||
q = qp.parse(querystring)
|
||||
result_page = searcher.search_page(q, page)
|
||||
result_page.results.fragmenter = highlight.ContextFragmenter(
|
||||
surround=50)
|
||||
result_page.results.formatter = JsonFormatter()
|
||||
yield result_page
|
||||
|
||||
corrected = searcher.correct_query(q, querystring)
|
||||
if corrected.query != q:
|
||||
corrected_query = corrected.string
|
||||
else:
|
||||
corrected_query = None
|
||||
|
||||
yield result_page, corrected_query
|
||||
finally:
|
||||
searcher.close()
|
||||
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class PaperlessHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
if settings.DISABLE_DBHANDLER:
|
||||
return
|
||||
|
||||
# We have to do the import here or Django will barf when it tries to
|
||||
# load this because the apps aren't loaded at that point
|
||||
from .models import Log
|
||||
|
||||
@@ -17,16 +17,6 @@ class Command(BaseCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
|
||||
parser.add_argument(
|
||||
"from",
|
||||
choices=("gpg", "unencrypted"),
|
||||
help="The state you want to change your documents from"
|
||||
)
|
||||
parser.add_argument(
|
||||
"to",
|
||||
choices=("gpg", "unencrypted"),
|
||||
help="The state you want to change your documents to"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--passphrase",
|
||||
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
||||
@@ -50,11 +40,6 @@ class Command(BaseCommand):
|
||||
except KeyboardInterrupt:
|
||||
return
|
||||
|
||||
if options["from"] == options["to"]:
|
||||
raise CommandError(
|
||||
'The "from" and "to" values can\'t be the same.'
|
||||
)
|
||||
|
||||
passphrase = options["passphrase"] or settings.PASSPHRASE
|
||||
if not passphrase:
|
||||
raise CommandError(
|
||||
@@ -62,10 +47,7 @@ class Command(BaseCommand):
|
||||
"by declaring it in your environment or your config."
|
||||
)
|
||||
|
||||
if options["from"] == "gpg" and options["to"] == "unencrypted":
|
||||
self.__gpg_to_unencrypted(passphrase)
|
||||
elif options["from"] == "unencrypted" and options["to"] == "gpg":
|
||||
self.__unencrypted_to_gpg(passphrase)
|
||||
self.__gpg_to_unencrypted(passphrase)
|
||||
|
||||
@staticmethod
|
||||
def __gpg_to_unencrypted(passphrase):
|
||||
@@ -79,42 +61,28 @@ class Command(BaseCommand):
|
||||
document).encode('utf-8'), "green"))
|
||||
|
||||
old_paths = [document.source_path, document.thumbnail_path]
|
||||
|
||||
raw_document = GnuPG.decrypted(document.source_file, passphrase)
|
||||
raw_thumb = GnuPG.decrypted(document.thumbnail_file, passphrase)
|
||||
|
||||
document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
||||
ext = os.path.splitext(document.filename)[1]
|
||||
|
||||
if not ext == '.gpg':
|
||||
raise CommandError(
|
||||
f"Abort: encrypted file {document.source_path} does not "
|
||||
f"end with .gpg")
|
||||
|
||||
document.filename = os.path.splitext(document.filename)[0]
|
||||
|
||||
with open(document.source_path, "wb") as f:
|
||||
f.write(raw_document)
|
||||
|
||||
with open(document.thumbnail_path, "wb") as f:
|
||||
f.write(raw_thumb)
|
||||
|
||||
document.save(update_fields=("storage_type",))
|
||||
|
||||
for path in old_paths:
|
||||
os.unlink(path)
|
||||
|
||||
@staticmethod
|
||||
def __unencrypted_to_gpg(passphrase):
|
||||
|
||||
unencrypted_files = Document.objects.filter(
|
||||
storage_type=Document.STORAGE_TYPE_UNENCRYPTED)
|
||||
|
||||
for document in unencrypted_files:
|
||||
|
||||
print(coloured("Encrypting {}".format(document), "green"))
|
||||
|
||||
old_paths = [document.source_path, document.thumbnail_path]
|
||||
with open(document.source_path, "rb") as raw_document:
|
||||
with open(document.thumbnail_path, "rb") as raw_thumb:
|
||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||
with open(document.source_path, "wb") as f:
|
||||
f.write(GnuPG.encrypted(raw_document, passphrase))
|
||||
with open(document.thumbnail_path, "wb") as f:
|
||||
f.write(GnuPG.encrypted(raw_thumb, passphrase))
|
||||
|
||||
document.save(update_fields=("storage_type",))
|
||||
document.save(update_fields=("storage_type", "filename"))
|
||||
|
||||
for path in old_paths:
|
||||
os.unlink(path)
|
||||
@@ -1,11 +1,11 @@
|
||||
import logging
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
from django_q.tasks import async_task
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.observers.polling import PollingObserver
|
||||
|
||||
try:
|
||||
@@ -13,25 +13,54 @@ try:
|
||||
except ImportError:
|
||||
INotify = flags = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _consume(file):
|
||||
try:
|
||||
if os.path.isfile(file):
|
||||
async_task("documents.tasks.consume_file",
|
||||
file,
|
||||
task_name=os.path.basename(file)[:100])
|
||||
else:
|
||||
logger.debug(
|
||||
f"Not consuming file {file}: File has moved.")
|
||||
|
||||
except Exception as e:
|
||||
# Catch all so that the consumer won't crash.
|
||||
# This is also what the test case is listening for to check for
|
||||
# errors.
|
||||
logger.error(
|
||||
"Error while consuming document: {}".format(e))
|
||||
|
||||
|
||||
def _consume_wait_unmodified(file, num_tries=20, wait_time=1):
|
||||
mtime = -1
|
||||
current_try = 0
|
||||
while current_try < num_tries:
|
||||
try:
|
||||
new_mtime = os.stat(file).st_mtime
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"File {file} moved while waiting for it to remain "
|
||||
f"unmodified.")
|
||||
return
|
||||
if new_mtime == mtime:
|
||||
_consume(file)
|
||||
return
|
||||
mtime = new_mtime
|
||||
sleep(wait_time)
|
||||
current_try += 1
|
||||
|
||||
logger.error(f"Timeout while waiting on file {file} to remain unmodified.")
|
||||
|
||||
|
||||
class Handler(FileSystemEventHandler):
|
||||
|
||||
def _consume(self, file):
|
||||
if os.path.isfile(file):
|
||||
try:
|
||||
async_task("documents.tasks.consume_file",
|
||||
file,
|
||||
task_name=os.path.basename(file)[:100])
|
||||
except Exception as e:
|
||||
# Catch all so that the consumer won't crash.
|
||||
logging.getLogger(__name__).error(
|
||||
"Error while consuming document: {}".format(e))
|
||||
|
||||
def on_created(self, event):
|
||||
self._consume(event.src_path)
|
||||
_consume_wait_unmodified(event.src_path)
|
||||
|
||||
def on_moved(self, event):
|
||||
self._consume(event.src_path)
|
||||
_consume_wait_unmodified(event.dest_path)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
@@ -40,12 +69,15 @@ class Command(BaseCommand):
|
||||
consumption directory.
|
||||
"""
|
||||
|
||||
# This is here primarily for the tests and is irrelevant in production.
|
||||
stop_flag = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
self.verbosity = 0
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.observer = None
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
@@ -54,38 +86,66 @@ class Command(BaseCommand):
|
||||
nargs="?",
|
||||
help="The consumption directory."
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
|
||||
self.verbosity = options["verbosity"]
|
||||
directory = options["directory"]
|
||||
|
||||
logging.getLogger(__name__).info(
|
||||
"Starting document consumer at {}".format(
|
||||
directory
|
||||
)
|
||||
parser.add_argument(
|
||||
"--oneshot",
|
||||
action="store_true",
|
||||
help="Run only once."
|
||||
)
|
||||
|
||||
# Consume all files as this is not done initially by the watchdog
|
||||
for entry in os.scandir(directory):
|
||||
if entry.is_file():
|
||||
async_task("documents.tasks.consume_file",
|
||||
entry.path,
|
||||
task_name=os.path.basename(entry.path)[:100])
|
||||
def handle(self, *args, **options):
|
||||
directory = options["directory"]
|
||||
|
||||
# Start the watchdog. Woof!
|
||||
if settings.CONSUMER_POLLING > 0:
|
||||
logging.getLogger(__name__).info(
|
||||
"Using polling instead of file system notifications.")
|
||||
observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
if not directory:
|
||||
raise CommandError(
|
||||
"CONSUMPTION_DIR does not appear to be set."
|
||||
)
|
||||
|
||||
if not os.path.isdir(directory):
|
||||
raise CommandError(
|
||||
f"Consumption directory {directory} does not exist")
|
||||
|
||||
for entry in os.scandir(directory):
|
||||
_consume(entry.path)
|
||||
|
||||
if options["oneshot"]:
|
||||
return
|
||||
|
||||
if settings.CONSUMER_POLLING == 0 and INotify:
|
||||
self.handle_inotify(directory)
|
||||
else:
|
||||
observer = Observer()
|
||||
event_handler = Handler()
|
||||
observer.schedule(event_handler, directory, recursive=True)
|
||||
observer.start()
|
||||
self.handle_polling(directory)
|
||||
|
||||
logger.debug("Consumer exiting.")
|
||||
|
||||
def handle_polling(self, directory):
|
||||
logging.getLogger(__name__).info(
|
||||
f"Polling directory for changes: {directory}")
|
||||
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||
self.observer.schedule(Handler(), directory, recursive=False)
|
||||
self.observer.start()
|
||||
try:
|
||||
while observer.is_alive():
|
||||
observer.join(1)
|
||||
while self.observer.is_alive():
|
||||
self.observer.join(1)
|
||||
if self.stop_flag:
|
||||
self.observer.stop()
|
||||
except KeyboardInterrupt:
|
||||
observer.stop()
|
||||
observer.join()
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
|
||||
def handle_inotify(self, directory):
|
||||
logging.getLogger(__name__).info(
|
||||
f"Using inotify to watch directory for changes: {directory}")
|
||||
|
||||
inotify = INotify()
|
||||
descriptor = inotify.add_watch(
|
||||
directory, flags.CLOSE_WRITE | flags.MOVED_TO)
|
||||
try:
|
||||
while not self.stop_flag:
|
||||
for event in inotify.read(timeout=1000, read_delay=1000):
|
||||
file = os.path.join(directory, event.name)
|
||||
_consume(file)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
inotify.rm_watch(descriptor)
|
||||
inotify.close()
|
||||
|
||||
@@ -22,13 +22,6 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("target")
|
||||
parser.add_argument(
|
||||
"--legacy",
|
||||
action="store_true",
|
||||
help="Don't try to export all of the document data, just dump the "
|
||||
"original document files out in a format that makes "
|
||||
"re-consuming them easy."
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
@@ -44,10 +37,7 @@ class Command(Renderable, BaseCommand):
|
||||
if not os.access(self.target, os.W_OK):
|
||||
raise CommandError("That path doesn't appear to be writable")
|
||||
|
||||
if options["legacy"]:
|
||||
self.dump_legacy()
|
||||
else:
|
||||
self.dump()
|
||||
self.dump()
|
||||
|
||||
def dump(self):
|
||||
|
||||
@@ -102,33 +92,3 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
with open(os.path.join(self.target, "manifest.json"), "w") as f:
|
||||
json.dump(manifest, f, indent=2)
|
||||
|
||||
def dump_legacy(self):
|
||||
|
||||
for document in Document.objects.all():
|
||||
|
||||
target = os.path.join(
|
||||
self.target, self._get_legacy_file_name(document))
|
||||
|
||||
print("Exporting: {}".format(target))
|
||||
|
||||
with open(target, "wb") as f:
|
||||
f.write(GnuPG.decrypted(document.source_file))
|
||||
t = int(time.mktime(document.created.timetuple()))
|
||||
os.utime(target, times=(t, t))
|
||||
|
||||
@staticmethod
|
||||
def _get_legacy_file_name(doc):
|
||||
|
||||
if not doc.correspondent and not doc.title:
|
||||
return os.path.basename(doc.source_path)
|
||||
|
||||
created = doc.created.strftime("%Y%m%d%H%M%SZ")
|
||||
tags = ",".join([t.slug for t in doc.tags.all()])
|
||||
|
||||
if tags:
|
||||
return "{} - {} - {} - {}{}".format(
|
||||
created, doc.correspondent, doc.title, tags, doc.file_type)
|
||||
|
||||
return "{} - {} - {}{}".format(
|
||||
created, doc.correspondent, doc.title, doc.file_type)
|
||||
|
||||
@@ -82,8 +82,6 @@ class Command(Renderable, BaseCommand):
|
||||
def _import_files_from_manifest(self):
|
||||
|
||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
if settings.PASSPHRASE:
|
||||
storage_type = Document.STORAGE_TYPE_GPG
|
||||
|
||||
for record in self.manifest:
|
||||
|
||||
@@ -105,23 +103,8 @@ class Command(Renderable, BaseCommand):
|
||||
|
||||
create_source_path_directory(document.source_path)
|
||||
|
||||
if settings.PASSPHRASE:
|
||||
|
||||
with open(document_path, "rb") as unencrypted:
|
||||
with open(document.source_path, "wb") as encrypted:
|
||||
print("Encrypting {} and saving it to {}".format(
|
||||
doc_file, document.source_path))
|
||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||
|
||||
with open(thumbnail_path, "rb") as unencrypted:
|
||||
with open(document.thumbnail_path, "wb") as encrypted:
|
||||
print("Encrypting {} and saving it to {}".format(
|
||||
thumb_file, document.thumbnail_path))
|
||||
encrypted.write(GnuPG.encrypted(unencrypted))
|
||||
|
||||
else:
|
||||
print(f"Moving {document_path} to {document.source_path}")
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
print(f"Moving {document_path} to {document.source_path}")
|
||||
shutil.copy(document_path, document.source_path)
|
||||
shutil.copy(thumbnail_path, document.thumbnail_path)
|
||||
|
||||
document.save()
|
||||
|
||||
@@ -5,23 +5,6 @@ from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
def make_index(apps, schema_editor):
|
||||
Document = apps.get_model("documents", "Document")
|
||||
documents = Document.objects.all()
|
||||
print()
|
||||
try:
|
||||
print(" --> Creating document index...")
|
||||
from whoosh.writing import AsyncWriter
|
||||
from documents import index
|
||||
ix = index.open_index(recreate=True)
|
||||
with AsyncWriter(ix) as writer:
|
||||
for document in documents:
|
||||
index.update_document(writer, document)
|
||||
except ImportError:
|
||||
# index may not be relevant anymore
|
||||
print(" --> Cannot create document index.")
|
||||
|
||||
|
||||
def logs_set_default_group(apps, schema_editor):
|
||||
Log = apps.get_model('documents', 'Log')
|
||||
for log in Log.objects.all():
|
||||
@@ -99,8 +82,4 @@ class Migration(migrations.Migration):
|
||||
code=django.db.migrations.operations.special.RunPython.noop,
|
||||
reverse_code=logs_set_default_group
|
||||
),
|
||||
migrations.RunPython(
|
||||
code=make_index,
|
||||
reverse_code=django.db.migrations.operations.special.RunPython.noop,
|
||||
),
|
||||
]
|
||||
|
||||
26
src/documents/migrations/1004_sanity_check_schedule.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# Generated by Django 3.1.3 on 2020-11-25 14:53
|
||||
|
||||
from django.db import migrations
|
||||
from django.db.migrations import RunPython
|
||||
from django_q.models import Schedule
|
||||
from django_q.tasks import schedule
|
||||
|
||||
|
||||
def add_schedules(apps, schema_editor):
|
||||
schedule('documents.tasks.sanity_check', name="Perform sanity check", schedule_type=Schedule.WEEKLY)
|
||||
|
||||
|
||||
def remove_schedules(apps, schema_editor):
|
||||
Schedule.objects.filter(func='documents.tasks.sanity_check').delete()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('documents', '1003_mime_types'),
|
||||
('django_q', '0013_task_attempt_count'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
RunPython(add_schedules, remove_schedules)
|
||||
]
|
||||
@@ -1,7 +1,6 @@
|
||||
# coding=utf-8
|
||||
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
@@ -12,6 +11,8 @@ from django.db import models
|
||||
from django.utils import timezone
|
||||
from django.utils.text import slugify
|
||||
|
||||
from documents.parsers import get_default_file_extension
|
||||
|
||||
|
||||
class MatchingModel(models.Model):
|
||||
|
||||
@@ -198,7 +199,7 @@ class Document(models.Model):
|
||||
ordering = ("correspondent", "title")
|
||||
|
||||
def __str__(self):
|
||||
created = self.created.strftime("%Y%m%d%H%M%S")
|
||||
created = self.created.strftime("%Y%m%d")
|
||||
if self.correspondent and self.title:
|
||||
return "{}: {} - {}".format(
|
||||
created, self.correspondent, self.title)
|
||||
@@ -230,7 +231,7 @@ class Document(models.Model):
|
||||
|
||||
@property
|
||||
def file_type(self):
|
||||
return mimetypes.guess_extension(str(self.mime_type))
|
||||
return get_default_file_extension(self.mime_type)
|
||||
|
||||
@property
|
||||
def thumbnail_path(self):
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
@@ -42,6 +43,29 @@ def is_mime_type_supported(mime_type):
|
||||
return get_parser_class_for_mime_type(mime_type) is not None
|
||||
|
||||
|
||||
def get_default_file_extension(mime_type):
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
supported_mime_types = parser_declaration["mime_types"]
|
||||
|
||||
if mime_type in supported_mime_types:
|
||||
return supported_mime_types[mime_type]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_supported_file_extensions():
|
||||
extensions = set()
|
||||
for response in document_consumer_declaration.send(None):
|
||||
parser_declaration = response[1]
|
||||
supported_mime_types = parser_declaration["mime_types"]
|
||||
|
||||
for mime_type in supported_mime_types:
|
||||
extensions.update(mimetypes.guess_all_extensions(mime_type))
|
||||
|
||||
return extensions
|
||||
|
||||
|
||||
def get_parser_class_for_mime_type(mime_type):
|
||||
|
||||
options = []
|
||||
|
||||
94
src/documents/sanity_checker.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
class SanityMessage:
|
||||
message = None
|
||||
|
||||
|
||||
class SanityWarning(SanityMessage):
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return f"Warning: {self.message}"
|
||||
|
||||
|
||||
class SanityError(SanityMessage):
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return f"ERROR: {self.message}"
|
||||
|
||||
|
||||
class SanityFailedError(Exception):
|
||||
|
||||
def __init__(self, messages):
|
||||
self.messages = messages
|
||||
|
||||
def __str__(self):
|
||||
message_string = "\n".join([str(m) for m in self.messages])
|
||||
return (
|
||||
f"The following issuse were found by the sanity checker:\n"
|
||||
f"{message_string}\n\n===============\n\n")
|
||||
|
||||
|
||||
def check_sanity():
|
||||
messages = []
|
||||
|
||||
present_files = []
|
||||
for root, subdirs, files in os.walk(settings.MEDIA_ROOT):
|
||||
for f in files:
|
||||
present_files.append(os.path.normpath(os.path.join(root, f)))
|
||||
|
||||
for doc in Document.objects.all():
|
||||
# Check thumbnail
|
||||
if not os.path.isfile(doc.thumbnail_path):
|
||||
messages.append(SanityError(
|
||||
f"Thumbnail of document {doc.pk} does not exist."))
|
||||
else:
|
||||
present_files.remove(os.path.normpath(doc.thumbnail_path))
|
||||
try:
|
||||
with doc.thumbnail_file as f:
|
||||
f.read()
|
||||
except OSError as e:
|
||||
messages.append(SanityError(
|
||||
f"Cannot read thumbnail file of document {doc.pk}: {e}"
|
||||
))
|
||||
|
||||
# Check document
|
||||
if not os.path.isfile(doc.source_path):
|
||||
messages.append(SanityError(
|
||||
f"Original of document {doc.pk} does not exist."))
|
||||
else:
|
||||
present_files.remove(os.path.normpath(doc.source_path))
|
||||
checksum = None
|
||||
try:
|
||||
with doc.source_file as f:
|
||||
checksum = hashlib.md5(f.read()).hexdigest()
|
||||
except OSError as e:
|
||||
messages.append(SanityError(
|
||||
f"Cannot read original file of document {doc.pk}: {e}"))
|
||||
|
||||
if checksum and not checksum == doc.checksum:
|
||||
messages.append(SanityError(
|
||||
f"Checksum mismatch of document {doc.pk}. "
|
||||
f"Stored: {doc.checksum}, actual: {checksum}."
|
||||
))
|
||||
|
||||
if not doc.content:
|
||||
messages.append(SanityWarning(
|
||||
f"Document {doc.pk} has no content."
|
||||
))
|
||||
|
||||
for extra_file in present_files:
|
||||
messages.append(SanityWarning(
|
||||
f"Orphaned file in media dir: {extra_file}"
|
||||
))
|
||||
|
||||
return messages
|
||||
@@ -93,14 +93,11 @@ class DocumentSerializer(serializers.ModelSerializer):
|
||||
"document_type_id",
|
||||
"title",
|
||||
"content",
|
||||
"mime_type",
|
||||
"tags",
|
||||
"tags_id",
|
||||
"checksum",
|
||||
"created",
|
||||
"modified",
|
||||
"added",
|
||||
"file_name",
|
||||
"archive_serial_number"
|
||||
)
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from django.contrib.contenttypes.models import ContentType
|
||||
from django.db import models, DatabaseError
|
||||
from django.dispatch import receiver
|
||||
from django.utils import timezone
|
||||
from rest_framework.reverse import reverse
|
||||
|
||||
from .. import index, matching
|
||||
from ..file_handling import delete_empty_directories, generate_filename, \
|
||||
@@ -157,10 +158,10 @@ def run_post_consume_script(sender, document, **kwargs):
|
||||
settings.POST_CONSUME_SCRIPT,
|
||||
str(document.pk),
|
||||
document.file_name,
|
||||
document.source_path,
|
||||
document.thumbnail_path,
|
||||
None,
|
||||
None,
|
||||
os.path.normpath(document.source_path),
|
||||
os.path.normpath(document.thumbnail_path),
|
||||
reverse("document-download", kwargs={"pk": document.pk}),
|
||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||
str(document.correspondent),
|
||||
str(",".join(document.tags.all().values_list("slug", flat=True)))
|
||||
)).wait()
|
||||
@@ -217,7 +218,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
||||
try:
|
||||
os.rename(old_path, new_path)
|
||||
instance.filename = new_filename
|
||||
instance.save()
|
||||
# Don't save here to prevent infinite recursion.
|
||||
Document.objects.filter(pk=instance.pk).update(filename=new_filename)
|
||||
|
||||
logging.getLogger(__name__).debug(
|
||||
f"Moved file {old_path} to {new_path}.")
|
||||
|
||||
except OSError as e:
|
||||
instance.filename = old_filename
|
||||
|
||||
@@ -3,11 +3,12 @@ import logging
|
||||
from django.conf import settings
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents import index, sanity_checker
|
||||
from documents.classifier import DocumentClassifier, \
|
||||
IncompatibleClassifierVersionError
|
||||
from documents.consumer import Consumer, ConsumerError
|
||||
from documents.models import Document
|
||||
from documents.sanity_checker import SanityFailedError
|
||||
|
||||
|
||||
def index_optimize():
|
||||
@@ -74,3 +75,12 @@ def consume_file(path,
|
||||
else:
|
||||
raise ConsumerError("Unknown error: Returned document was null, but "
|
||||
"no error message was given.")
|
||||
|
||||
|
||||
def sanity_check():
|
||||
messages = sanity_checker.check_sanity()
|
||||
|
||||
if len(messages) > 0:
|
||||
raise SanityFailedError(messages)
|
||||
else:
|
||||
return "No issues detected."
|
||||
|
||||
BIN
src/documents/tests/samples/documents/originals/0000001.pdf
Normal file
BIN
src/documents/tests/samples/documents/originals/0000002.pdf.gpg
Normal file
BIN
src/documents/tests/samples/documents/thumbnails/0000001.png
Normal file
|
After Width: | Height: | Size: 7.7 KiB |
BIN
src/documents/tests/samples/documents/thumbnails/0000002.png.gpg
Normal file
BIN
src/documents/tests/samples/simple.pdf
Normal file
BIN
src/documents/tests/samples/simple.zip
Normal file
@@ -1,40 +1,25 @@
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
from django.test import override_settings
|
||||
from pathvalidate import ValidationError
|
||||
from rest_framework.test import APITestCase
|
||||
from whoosh.writing import AsyncWriter
|
||||
|
||||
from documents import index
|
||||
from documents.models import Document, Correspondent, DocumentType, Tag
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class DocumentApiTest(APITestCase):
|
||||
class TestDocumentApi(DirectoriesMixin, APITestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.scratch_dir = tempfile.mkdtemp()
|
||||
self.media_dir = tempfile.mkdtemp()
|
||||
self.originals_dir = os.path.join(self.media_dir, "documents", "originals")
|
||||
self.thumbnail_dir = os.path.join(self.media_dir, "documents", "thumbnails")
|
||||
|
||||
os.makedirs(self.originals_dir, exist_ok=True)
|
||||
os.makedirs(self.thumbnail_dir, exist_ok=True)
|
||||
|
||||
override_settings(
|
||||
SCRATCH_DIR=self.scratch_dir,
|
||||
MEDIA_ROOT=self.media_dir,
|
||||
ORIGINALS_DIR=self.originals_dir,
|
||||
THUMBNAIL_DIR=self.thumbnail_dir
|
||||
).enable()
|
||||
super(TestDocumentApi, self).setUp()
|
||||
|
||||
user = User.objects.create_superuser(username="temp_admin")
|
||||
self.client.force_login(user=user)
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
||||
|
||||
def testDocuments(self):
|
||||
|
||||
response = self.client.get("/api/documents/").data
|
||||
@@ -87,7 +72,7 @@ class DocumentApiTest(APITestCase):
|
||||
|
||||
def test_document_actions(self):
|
||||
|
||||
_, filename = tempfile.mkstemp(dir=self.originals_dir)
|
||||
_, filename = tempfile.mkstemp(dir=self.dirs.originals_dir)
|
||||
|
||||
content = b"This is a test"
|
||||
content_thumbnail = b"thumbnail content"
|
||||
@@ -97,7 +82,7 @@ class DocumentApiTest(APITestCase):
|
||||
|
||||
doc = Document.objects.create(title="none", filename=os.path.basename(filename), mime_type="application/pdf")
|
||||
|
||||
with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
|
||||
with open(os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
|
||||
f.write(content_thumbnail)
|
||||
|
||||
response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
|
||||
@@ -179,6 +164,109 @@ class DocumentApiTest(APITestCase):
|
||||
results = response.data['results']
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
def test_search_no_query(self):
|
||||
response = self.client.get("/api/search/")
|
||||
results = response.data['results']
|
||||
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_search(self):
|
||||
d1=Document.objects.create(title="invoice", content="the thing i bought at a shop and paid with bank account", checksum="A", pk=1)
|
||||
d2=Document.objects.create(title="bank statement 1", content="things i paid for in august", pk=2, checksum="B")
|
||||
d3=Document.objects.create(title="bank statement 3", content="things i paid for in september", pk=3, checksum="C")
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
# Note to future self: there is a reason we dont use a model signal handler to update the index: some operations edit many documents at once
|
||||
# (retagger, renamer) and we don't want to open a writer for each of these, but rather perform the entire operation with one writer.
|
||||
# That's why we cant open the writer in a model on_save handler or something.
|
||||
index.update_document(writer, d1)
|
||||
index.update_document(writer, d2)
|
||||
index.update_document(writer, d3)
|
||||
response = self.client.get("/api/search/?query=bank")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 3)
|
||||
self.assertEqual(response.data['page'], 1)
|
||||
self.assertEqual(response.data['page_count'], 1)
|
||||
self.assertEqual(len(results), 3)
|
||||
|
||||
response = self.client.get("/api/search/?query=september")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 1)
|
||||
self.assertEqual(response.data['page'], 1)
|
||||
self.assertEqual(response.data['page_count'], 1)
|
||||
self.assertEqual(len(results), 1)
|
||||
|
||||
response = self.client.get("/api/search/?query=statement")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 2)
|
||||
self.assertEqual(response.data['page'], 1)
|
||||
self.assertEqual(response.data['page_count'], 1)
|
||||
self.assertEqual(len(results), 2)
|
||||
|
||||
response = self.client.get("/api/search/?query=sfegdfg")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 0)
|
||||
self.assertEqual(response.data['page'], 0)
|
||||
self.assertEqual(response.data['page_count'], 0)
|
||||
self.assertEqual(len(results), 0)
|
||||
|
||||
def test_search_multi_page(self):
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content="content")
|
||||
index.update_document(writer, doc)
|
||||
|
||||
# This is here so that we test that no document gets returned twice (might happen if the paging is not working)
|
||||
seen_ids = []
|
||||
|
||||
for i in range(1, 6):
|
||||
response = self.client.get(f"/api/search/?query=content&page={i}")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 55)
|
||||
self.assertEqual(response.data['page'], i)
|
||||
self.assertEqual(response.data['page_count'], 6)
|
||||
self.assertEqual(len(results), 10)
|
||||
|
||||
for result in results:
|
||||
self.assertNotIn(result['id'], seen_ids)
|
||||
seen_ids.append(result['id'])
|
||||
|
||||
response = self.client.get(f"/api/search/?query=content&page=6")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 55)
|
||||
self.assertEqual(response.data['page'], 6)
|
||||
self.assertEqual(response.data['page_count'], 6)
|
||||
self.assertEqual(len(results), 5)
|
||||
|
||||
for result in results:
|
||||
self.assertNotIn(result['id'], seen_ids)
|
||||
seen_ids.append(result['id'])
|
||||
|
||||
response = self.client.get(f"/api/search/?query=content&page=7")
|
||||
results = response.data['results']
|
||||
self.assertEqual(response.data['count'], 55)
|
||||
self.assertEqual(response.data['page'], 6)
|
||||
self.assertEqual(response.data['page_count'], 6)
|
||||
self.assertEqual(len(results), 5)
|
||||
|
||||
def test_search_invalid_page(self):
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(15):
|
||||
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content="content")
|
||||
index.update_document(writer, doc)
|
||||
|
||||
first_page = self.client.get(f"/api/search/?query=content&page=1").data
|
||||
second_page = self.client.get(f"/api/search/?query=content&page=2").data
|
||||
should_be_first_page_1 = self.client.get(f"/api/search/?query=content&page=0").data
|
||||
should_be_first_page_2 = self.client.get(f"/api/search/?query=content&page=dgfd").data
|
||||
should_be_first_page_3 = self.client.get(f"/api/search/?query=content&page=").data
|
||||
should_be_first_page_4 = self.client.get(f"/api/search/?query=content&page=-7868").data
|
||||
|
||||
self.assertDictEqual(first_page, should_be_first_page_1)
|
||||
self.assertDictEqual(first_page, should_be_first_page_2)
|
||||
self.assertDictEqual(first_page, should_be_first_page_3)
|
||||
self.assertDictEqual(first_page, should_be_first_page_4)
|
||||
self.assertNotEqual(len(first_page['results']), len(second_page['results']))
|
||||
|
||||
@mock.patch("documents.index.autocomplete")
|
||||
def test_search_autocomplete(self, m):
|
||||
m.side_effect = lambda ix, term, limit: [term for _ in range(limit)]
|
||||
@@ -201,6 +289,22 @@ class DocumentApiTest(APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(len(response.data), 10)
|
||||
|
||||
def test_search_spelling_correction(self):
|
||||
with AsyncWriter(index.open_index()) as writer:
|
||||
for i in range(55):
|
||||
doc = Document.objects.create(checksum=str(i), pk=i+1, title=f"Document {i+1}", content=f"Things document {i+1}")
|
||||
index.update_document(writer, doc)
|
||||
|
||||
response = self.client.get("/api/search/?query=thing")
|
||||
correction = response.data['corrected_query']
|
||||
|
||||
self.assertEqual(correction, "things")
|
||||
|
||||
response = self.client.get("/api/search/?query=things")
|
||||
correction = response.data['corrected_query']
|
||||
|
||||
self.assertEqual(correction, None)
|
||||
|
||||
def test_statistics(self):
|
||||
|
||||
doc1 = Document.objects.create(title="none1", checksum="A")
|
||||
@@ -215,3 +319,42 @@ class DocumentApiTest(APITestCase):
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(response.data['documents_total'], 3)
|
||||
self.assertEqual(response.data['documents_inbox'], 1)
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
def test_upload(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
|
||||
m.assert_called_once()
|
||||
|
||||
args, kwargs = m.call_args
|
||||
self.assertEqual(kwargs['override_filename'], "simple.pdf")
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
def test_upload_invalid_form(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"documenst": f})
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
def test_upload_invalid_file(self, m):
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||
self.assertEqual(response.status_code, 400)
|
||||
m.assert_not_called()
|
||||
|
||||
@mock.patch("documents.forms.async_task")
|
||||
@mock.patch("documents.forms.validate_filename")
|
||||
def test_upload_invalid_filename(self, validate_filename, async_task):
|
||||
validate_filename.side_effect = ValidationError()
|
||||
with open(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb") as f:
|
||||
response = self.client.post("/api/documents/post_document/", {"document": f})
|
||||
self.assertEqual(response.status_code, 400)
|
||||
|
||||
async_task.assert_not_called()
|
||||
|
||||
@@ -1,24 +1,29 @@
|
||||
import tempfile
|
||||
from time import sleep
|
||||
from unittest import mock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from documents.classifier import DocumentClassifier
|
||||
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError
|
||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
|
||||
|
||||
class TestClassifier(TestCase):
|
||||
class TestClassifier(DirectoriesMixin, TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
||||
super(TestClassifier, self).setUp()
|
||||
self.classifier = DocumentClassifier()
|
||||
|
||||
def generate_test_data(self):
|
||||
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
self.c2 = Correspondent.objects.create(name="c2")
|
||||
self.c3 = Correspondent.objects.create(name="c3", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
||||
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
||||
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
self.dt2 = DocumentType.objects.create(name="dt2", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
|
||||
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
||||
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
||||
@@ -59,8 +64,8 @@ class TestClassifier(TestCase):
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc1.content), (self.t1.pk,))
|
||||
self.assertTupleEqual(self.classifier.predict_tags(self.doc2.content), (self.t1.pk, self.t3.pk))
|
||||
self.assertListEqual(self.classifier.predict_tags(self.doc1.content), [self.t1.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk])
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||
|
||||
@@ -71,6 +76,44 @@ class TestClassifier(TestCase):
|
||||
self.assertTrue(self.classifier.train())
|
||||
self.assertFalse(self.classifier.train())
|
||||
|
||||
def testVersionIncreased(self):
|
||||
|
||||
self.generate_test_data()
|
||||
self.assertTrue(self.classifier.train())
|
||||
self.assertFalse(self.classifier.train())
|
||||
|
||||
self.classifier.save_classifier()
|
||||
|
||||
classifier2 = DocumentClassifier()
|
||||
|
||||
current_ver = DocumentClassifier.FORMAT_VERSION
|
||||
with mock.patch("documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver+1):
|
||||
# assure that we won't load old classifiers.
|
||||
self.assertRaises(IncompatibleClassifierVersionError, classifier2.reload)
|
||||
|
||||
self.classifier.save_classifier()
|
||||
|
||||
# assure that we can load the classifier after saving it.
|
||||
classifier2.reload()
|
||||
|
||||
def testReload(self):
|
||||
|
||||
self.generate_test_data()
|
||||
self.assertTrue(self.classifier.train())
|
||||
self.classifier.save_classifier()
|
||||
|
||||
classifier2 = DocumentClassifier()
|
||||
classifier2.reload()
|
||||
v1 = classifier2.classifier_version
|
||||
|
||||
# change the classifier after some time.
|
||||
sleep(1)
|
||||
self.classifier.save_classifier()
|
||||
|
||||
classifier2.reload()
|
||||
v2 = classifier2.classifier_version
|
||||
self.assertNotEqual(v1, v2)
|
||||
|
||||
@override_settings(DATA_DIR=tempfile.mkdtemp())
|
||||
def testSaveClassifier(self):
|
||||
|
||||
@@ -83,3 +126,112 @@ class TestClassifier(TestCase):
|
||||
new_classifier = DocumentClassifier()
|
||||
new_classifier.reload()
|
||||
self.assertFalse(new_classifier.train())
|
||||
|
||||
def test_one_correspondent_predict(self):
|
||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||
|
||||
def test_one_correspondent_predict_manydocs(self):
|
||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from noone", checksum="B")
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||
self.assertIsNone(self.classifier.predict_correspondent(doc2.content))
|
||||
|
||||
def test_one_type_predict(self):
|
||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||
checksum="A", document_type=dt)
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||
|
||||
def test_one_type_predict_manydocs(self):
|
||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
||||
checksum="A", document_type=dt)
|
||||
|
||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2",
|
||||
checksum="B")
|
||||
|
||||
self.classifier.train()
|
||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||
self.assertIsNone(self.classifier.predict_document_type(doc2.content))
|
||||
|
||||
def test_one_tag_predict(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
|
||||
doc1.tags.add(t1)
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||
|
||||
def test_one_tag_predict_unassigned(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [])
|
||||
|
||||
def test_two_tags_predict_singledoc(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||
|
||||
doc4.tags.add(t1)
|
||||
doc4.tags.add(t2)
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc4.content), [t1.pk, t2.pk])
|
||||
|
||||
def test_two_tags_predict(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2", checksum="B")
|
||||
doc3 = Document.objects.create(title="doc1", content="this is a document from c3", checksum="C")
|
||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
||||
|
||||
doc1.tags.add(t1)
|
||||
doc2.tags.add(t2)
|
||||
|
||||
doc4.tags.add(t1)
|
||||
doc4.tags.add(t2)
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(doc2.content), [t2.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(doc3.content), [])
|
||||
self.assertListEqual(self.classifier.predict_tags(doc4.content), [t1.pk, t2.pk])
|
||||
|
||||
def test_one_tag_predict_multi(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||
|
||||
doc1.tags.add(t1)
|
||||
doc2.tags.add(t1)
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(doc2.content), [t1.pk])
|
||||
|
||||
def test_one_tag_predict_multi_2(self):
|
||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||
|
||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
||||
|
||||
doc1.tags.add(t1)
|
||||
self.classifier.train()
|
||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [t1.pk])
|
||||
self.assertListEqual(self.classifier.predict_tags(doc2.content), [])
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from unittest import mock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
from .utils import DirectoriesMixin
|
||||
from ..consumer import Consumer, ConsumerError
|
||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
|
||||
from ..parsers import DocumentParser, ParseError
|
||||
@@ -408,32 +408,22 @@ def fake_magic_from_file(file, mime=False):
|
||||
|
||||
|
||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||
class TestConsumer(TestCase):
|
||||
class TestConsumer(DirectoriesMixin, TestCase):
|
||||
|
||||
def make_dummy_parser(self, path, logging_group):
|
||||
return DummyParser(path, logging_group, self.scratch_dir)
|
||||
return DummyParser(path, logging_group, self.dirs.scratch_dir)
|
||||
|
||||
def make_faulty_parser(self, path, logging_group):
|
||||
return FaultyParser(path, logging_group, self.scratch_dir)
|
||||
return FaultyParser(path, logging_group, self.dirs.scratch_dir)
|
||||
|
||||
def setUp(self):
|
||||
self.scratch_dir = tempfile.mkdtemp()
|
||||
self.media_dir = tempfile.mkdtemp()
|
||||
self.consumption_dir = tempfile.mkdtemp()
|
||||
|
||||
override_settings(
|
||||
SCRATCH_DIR=self.scratch_dir,
|
||||
MEDIA_ROOT=self.media_dir,
|
||||
ORIGINALS_DIR=os.path.join(self.media_dir, "documents", "originals"),
|
||||
THUMBNAIL_DIR=os.path.join(self.media_dir, "documents", "thumbnails"),
|
||||
CONSUMPTION_DIR=self.consumption_dir
|
||||
).enable()
|
||||
super(TestConsumer, self).setUp()
|
||||
|
||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||
m = patcher.start()
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_dummy_parser,
|
||||
"mime_types": ["application/pdf"],
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0
|
||||
})]
|
||||
|
||||
@@ -441,15 +431,11 @@ class TestConsumer(TestCase):
|
||||
|
||||
self.consumer = Consumer()
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.scratch_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.media_dir, ignore_errors=True)
|
||||
shutil.rmtree(self.consumption_dir, ignore_errors=True)
|
||||
|
||||
def get_test_file(self):
|
||||
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.scratch_dir)
|
||||
fd, f = tempfile.mkstemp(suffix=".pdf", dir=self.dirs.scratch_dir)
|
||||
return f
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||
def testNormalOperation(self):
|
||||
|
||||
filename = self.get_test_file()
|
||||
@@ -516,26 +502,6 @@ class TestConsumer(TestCase):
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@override_settings(CONSUMPTION_DIR=None)
|
||||
def testConsumptionDirUnset(self):
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "The CONSUMPTION_DIR settings variable does not appear to be set.")
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
@override_settings(CONSUMPTION_DIR="asd")
|
||||
def testNoConsumptionDir(self):
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertEqual(str(e), "Consumption directory asd does not exist")
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
|
||||
def testDuplicates(self):
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
|
||||
@@ -554,7 +520,7 @@ class TestConsumer(TestCase):
|
||||
try:
|
||||
self.consumer.try_consume_file(self.get_test_file())
|
||||
except ConsumerError as e:
|
||||
self.assertTrue(str(e).startswith("No parsers abvailable"))
|
||||
self.assertTrue("File extension .pdf does not map to any" in str(e))
|
||||
return
|
||||
|
||||
self.fail("Should throw exception")
|
||||
@@ -563,7 +529,7 @@ class TestConsumer(TestCase):
|
||||
def testFaultyParser(self, m):
|
||||
m.return_value = [(None, {
|
||||
"parser": self.make_faulty_parser,
|
||||
"mime_types": ["application/pdf"],
|
||||
"mime_types": {"application/pdf": ".pdf"},
|
||||
"weight": 0
|
||||
})]
|
||||
|
||||
@@ -598,13 +564,34 @@ class TestConsumer(TestCase):
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
|
||||
print(document.source_path)
|
||||
print("===")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertEqual(document.filename, "bank/new-docs-0000001.pdf")
|
||||
|
||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||
@mock.patch("documents.signals.handlers.generate_filename")
|
||||
def testFilenameHandlingUnstableFormat(self, m):
|
||||
|
||||
filenames = ["this", "that", "now this", "i cant decide"]
|
||||
|
||||
def get_filename():
|
||||
f = filenames.pop()
|
||||
filenames.insert(0, f)
|
||||
return f
|
||||
|
||||
m.side_effect = lambda f: get_filename()
|
||||
|
||||
filename = self.get_test_file()
|
||||
|
||||
Tag.objects.create(name="test", is_inbox_tag=True)
|
||||
|
||||
document = self.consumer.try_consume_file(filename, override_filename="Bank - Test.pdf", override_title="new docs")
|
||||
|
||||
self.assertEqual(document.title, "new docs")
|
||||
self.assertEqual(document.correspondent.name, "Bank")
|
||||
self.assertIsNotNone(os.path.isfile(document.title))
|
||||
self.assertTrue(os.path.isfile(document.source_path))
|
||||
|
||||
@mock.patch("documents.consumer.DocumentClassifier")
|
||||
def testClassifyDocument(self, m):
|
||||
correspondent = Correspondent.objects.create(name="test")
|
||||
|
||||