From 21e663aee872193d7a036c3d31e42c87e6cbcc0e Mon Sep 17 00:00:00 2001 From: Eva Date: Thu, 18 May 2023 01:04:49 +0200 Subject: [PATCH] server/net: add support for gallery-dl It is tried first, and falls back to yt-dlp. --- server/Dockerfile | 1 + server/pyproject.toml | 2 +- server/requirements.txt | 1 + server/szurubooru/api/post_api.py | 4 +- server/szurubooru/api/upload_api.py | 2 +- server/szurubooru/func/net.py | 37 +++++++++++++++---- server/szurubooru/rest/context.py | 4 +- .../tests/api/test_post_creating.py | 4 +- .../tests/api/test_post_updating.py | 4 +- server/szurubooru/tests/func/test_net.py | 6 +-- server/szurubooru/tests/rest/test_context.py | 2 +- 11 files changed, 46 insertions(+), 21 deletions(-) diff --git a/server/Dockerfile b/server/Dockerfile index 487f1923..531cf71a 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -27,6 +27,7 @@ RUN apk --no-cache add \ && pip3 install --no-cache-dir --disable-pip-version-check \ "alembic>=0.8.5" \ "coloredlogs==5.0" \ + gallery_dl \ "pyheif==0.6.1" \ "heif-image-plugin>=0.3.2" \ youtube_dl \ diff --git a/server/pyproject.toml b/server/pyproject.toml index ccf47fc4..d39b9e2e 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -3,7 +3,7 @@ line-length = 79 [tool.isort] known_first_party = ["szurubooru"] -known_third_party = ["PIL", "alembic", "coloredlogs", "freezegun", "nacl", "numpy", "pyrfc3339", "pytest", "pytz", "sqlalchemy", "yaml", "youtube_dl"] +known_third_party = ["PIL", "alembic", "coloredlogs", "freezegun", "gallery_dl", "nacl", "numpy", "pyrfc3339", "pytest", "pytz", "sqlalchemy", "yaml", "youtube_dl"] multi_line_output = 3 include_trailing_comma = true force_grid_wrap = 0 diff --git a/server/requirements.txt b/server/requirements.txt index 16b29fff..a9640811 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,6 +1,7 @@ alembic>=0.8.5 certifi>=2017.11.5 coloredlogs==5.0 +gallery_dl heif-image-plugin==0.3.2 numpy>=1.8.2 pillow-avif-plugin>=1.1.0 diff --git a/server/szurubooru/api/post_api.py b/server/szurubooru/api/post_api.py index daba7f7e..2996937e 100644 --- a/server/szurubooru/api/post_api.py +++ b/server/szurubooru/api/post_api.py @@ -61,7 +61,7 @@ def create_post( auth.verify_privilege(ctx.user, "posts:create:identified") content = ctx.get_file( "content", - use_video_downloader=auth.has_privilege( + use_downloader=auth.has_privilege( ctx.user, "uploads:use_downloader" ), ) @@ -128,7 +128,7 @@ def update_post(ctx: rest.Context, params: Dict[str, str]) -> rest.Response: post, ctx.get_file( "content", - use_video_downloader=auth.has_privilege( + use_downloader=auth.has_privilege( ctx.user, "uploads:use_downloader" ), ), diff --git a/server/szurubooru/api/upload_api.py b/server/szurubooru/api/upload_api.py index 3b7bca8a..193c0018 100644 --- a/server/szurubooru/api/upload_api.py +++ b/server/szurubooru/api/upload_api.py @@ -12,7 +12,7 @@ def create_temporary_file( content = ctx.get_file( "content", allow_tokens=False, - use_video_downloader=auth.has_privilege( + use_downloader=auth.has_privilege( ctx.user, "uploads:use_downloader" ), ) diff --git a/server/szurubooru/func/net.py b/server/szurubooru/func/net.py index c53a62eb..caf75b0f 100644 --- a/server/szurubooru/func/net.py +++ b/server/szurubooru/func/net.py @@ -21,14 +21,22 @@ class DownloadTooLargeError(DownloadError): pass -def download(url: str, use_video_downloader: bool = False) -> bytes: +def download(url: str, use_downloader: bool = False) -> bytes: assert url - youtube_dl_error = None - if use_video_downloader: + dl_error = None + new_url = None + if use_downloader: try: - url = _get_youtube_dl_content_url(url) or url + new_url = _get_gallery_dl_content_url(url) except errors.ThirdPartyError as ex: - youtube_dl_error = ex + dl_error = ex + if new_url: + url = new_url + else: + try: + url = _get_youtube_dl_content_url(url) or url + except errors.ThirdPartyError as ex: + dl_error = ex request = urllib.request.Request(url) if config.config["user_agent"]: @@ -55,10 +63,10 @@ def download(url: str, use_video_downloader: bool = False) -> bytes: ) from ex if ( - youtube_dl_error + dl_error and mime.get_mime_type(content_buffer) == "application/octet-stream" ): - raise youtube_dl_error + raise dl_error return content_buffer @@ -81,6 +89,21 @@ def _get_youtube_dl_content_url(url: str) -> str: ) from None +def _get_gallery_dl_content_url(url: str) -> str: + cmd = ["gallery-dl", "-q", "-g", url] + try: + return ( + subprocess.run(cmd, text=True, capture_output=True, check=True) + .stdout.split("\n")[0] + .strip() + ) + except subprocess.CalledProcessError: + raise errors.ThirdPartyError( + "Could not extract content location from URL.", + extra_fields={"URL": url}, + ) from None + + def post_to_webhooks(payload: Dict[str, Any]) -> List[Thread]: threads = [ Thread(target=_post_to_webhook, args=(webhook, payload), daemon=False) diff --git a/server/szurubooru/rest/context.py b/server/szurubooru/rest/context.py index 40ba0bcb..68c9d33b 100644 --- a/server/szurubooru/rest/context.py +++ b/server/szurubooru/rest/context.py @@ -48,7 +48,7 @@ class Context: self, name: str, default: Union[object, bytes] = MISSING, - use_video_downloader: bool = False, + use_downloader: bool = False, allow_tokens: bool = True, ) -> bytes: if name in self._files and self._files[name]: @@ -57,7 +57,7 @@ class Context: if name + "Url" in self._params: return net.download( self._params[name + "Url"], - use_video_downloader=use_video_downloader, + use_downloader=use_downloader, ) if allow_tokens and name + "Token" in self._params: diff --git a/server/szurubooru/tests/api/test_post_creating.py b/server/szurubooru/tests/api/test_post_creating.py index a1ad4de7..f95f9af0 100644 --- a/server/szurubooru/tests/api/test_post_creating.py +++ b/server/szurubooru/tests/api/test_post_creating.py @@ -214,7 +214,7 @@ def test_creating_from_url_saves_source( ) ) net.download.assert_called_once_with( - "example.com", use_video_downloader=False + "example.com", use_downloader=False ) posts.create_post.assert_called_once_with( b"content", ["tag1", "tag2"], auth_user @@ -259,7 +259,7 @@ def test_creating_from_url_with_source_specified( ) ) net.download.assert_called_once_with( - "example.com", use_video_downloader=True + "example.com", use_downloader=True ) posts.create_post.assert_called_once_with( b"content", ["tag1", "tag2"], auth_user diff --git a/server/szurubooru/tests/api/test_post_updating.py b/server/szurubooru/tests/api/test_post_updating.py index e4a606d2..2a34f0a9 100644 --- a/server/szurubooru/tests/api/test_post_updating.py +++ b/server/szurubooru/tests/api/test_post_updating.py @@ -124,7 +124,7 @@ def test_uploading_from_url_saves_source( {"post_id": post.post_id}, ) net.download.assert_called_once_with( - "example.com", use_video_downloader=True + "example.com", use_downloader=True ) posts.update_post_content.assert_called_once_with(post, b"content") posts.update_post_source.assert_called_once_with(post, "example.com") @@ -156,7 +156,7 @@ def test_uploading_from_url_with_source_specified( {"post_id": post.post_id}, ) net.download.assert_called_once_with( - "example.com", use_video_downloader=True + "example.com", use_downloader=True ) posts.update_post_content.assert_called_once_with(post, b"content") posts.update_post_source.assert_called_once_with(post, "example2.com") diff --git a/server/szurubooru/tests/func/test_net.py b/server/szurubooru/tests/func/test_net.py index be2f3c93..77562098 100644 --- a/server/szurubooru/tests/func/test_net.py +++ b/server/szurubooru/tests/func/test_net.py @@ -79,7 +79,7 @@ def test_download(): ) def test_too_large_download(url): with pytest.raises(net.DownloadTooLargeError): - net.download(url, use_video_downloader=True) + net.download(url, use_downloader=True) @pytest.mark.skipif( @@ -103,7 +103,7 @@ def test_too_large_download(url): ], ) def test_content_download(url, expected_sha1): - actual_content = net.download(url, use_video_downloader=True) + actual_content = net.download(url, use_downloader=True) assert get_sha1(actual_content) == expected_sha1 @@ -113,7 +113,7 @@ def test_content_download(url, expected_sha1): def test_bad_content_downlaod(): url = "http://info.cern.ch/hypertext/WWW/TheProject.html" with pytest.raises(errors.ThirdPartyError): - net.download(url, use_video_downloader=True) + net.download(url, use_downloader=True) def test_no_webhooks(config_injector): diff --git a/server/szurubooru/tests/rest/test_context.py b/server/szurubooru/tests/rest/test_context.py index ec652b18..49fa6821 100644 --- a/server/szurubooru/tests/rest/test_context.py +++ b/server/szurubooru/tests/rest/test_context.py @@ -29,7 +29,7 @@ def test_get_file_from_url(): ) assert ctx.get_file("key") == b"content" net.download.assert_called_once_with( - "example.com", use_video_downloader=False + "example.com", use_downloader=False ) with pytest.raises(errors.ValidationError): assert ctx.get_file("non-existing")