diff --git a/bookmarks/api/routes.py b/bookmarks/api/routes.py index 8c1257b6..332dc605 100644 --- a/bookmarks/api/routes.py +++ b/bookmarks/api/routes.py @@ -56,7 +56,12 @@ def get_queryset(self): return Bookmark.objects.all().filter(owner=user) def get_serializer_context(self): - return {"request": self.request, "user": self.request.user} + disable_scraping = "disable_scraping" in self.request.GET + return { + "request": self.request, + "user": self.request.user, + "disable_scraping": disable_scraping, + } @action(methods=["get"], detail=False) def archived(self, request): @@ -101,16 +106,7 @@ def check(self, request): self.get_serializer(bookmark).data if bookmark else None ) - # Either return metadata from existing bookmark, or scrape from URL - if bookmark: - metadata = WebsiteMetadata( - url, - bookmark.website_title, - bookmark.website_description, - None, - ) - else: - metadata = website_loader.load_website_metadata(url) + metadata = website_loader.load_website_metadata(url) # Return tags that would be automatically applied to the bookmark profile = request.user.profile @@ -120,7 +116,7 @@ def check(self, request): auto_tags = auto_tagging.get_tags(profile.auto_tagging_rules, url) except Exception as e: logger.error( - f"Failed to auto-tag bookmark. url={bookmark.url}", + f"Failed to auto-tag bookmark. url={url}", exc_info=e, ) diff --git a/bookmarks/api/serializers.py b/bookmarks/api/serializers.py index 38f19b9a..88efa269 100644 --- a/bookmarks/api/serializers.py +++ b/bookmarks/api/serializers.py @@ -4,7 +4,11 @@ from rest_framework.serializers import ListSerializer from bookmarks.models import Bookmark, Tag, build_tag_string, UserProfile -from bookmarks.services.bookmarks import create_bookmark, update_bookmark +from bookmarks.services.bookmarks import ( + create_bookmark, + update_bookmark, + enhance_with_website_metadata, +) from bookmarks.services.tags import get_or_create_tag @@ -29,8 +33,6 @@ class Meta: "title", "description", "notes", - "website_title", - "website_description", "web_archive_snapshot_url", "favicon_url", "preview_image_url", @@ -40,15 +42,17 @@ class Meta: "tag_names", "date_added", "date_modified", - ] - read_only_fields = [ "website_title", "website_description", + ] + read_only_fields = [ "web_archive_snapshot_url", "favicon_url", "preview_image_url", "date_added", "date_modified", + "website_title", + "website_description", ] list_serializer_class = BookmarkListSerializer @@ -63,6 +67,9 @@ class Meta: tag_names = TagListField(required=False, default=[]) favicon_url = serializers.SerializerMethodField() preview_image_url = serializers.SerializerMethodField() + # Add dummy website title and description fields for backwards compatibility but keep them empty + website_title = serializers.SerializerMethodField() + website_description = serializers.SerializerMethodField() def get_favicon_url(self, obj: Bookmark): if not obj.favicon_file: @@ -80,6 +87,12 @@ def get_preview_image_url(self, obj: Bookmark): preview_image_url = request.build_absolute_uri(preview_image_file_path) return preview_image_url + def get_website_title(self, obj: Bookmark): + return None + + def get_website_description(self, obj: Bookmark): + return None + def create(self, validated_data): bookmark = Bookmark() bookmark.url = validated_data["url"] @@ -90,7 +103,14 @@ def create(self, validated_data): bookmark.unread = validated_data["unread"] bookmark.shared = validated_data["shared"] tag_string = build_tag_string(validated_data["tag_names"]) - return create_bookmark(bookmark, tag_string, self.context["user"]) + + saved_bookmark = create_bookmark(bookmark, tag_string, self.context["user"]) + # Unless scraping is explicitly disabled, enhance bookmark with website + # metadata to preserve backwards compatibility with clients that expect + # title and description to be populated automatically when left empty + if not self.context.get("disable_scraping", False): + enhance_with_website_metadata(saved_bookmark) + return saved_bookmark def update(self, instance: Bookmark, validated_data): # Update fields if they were provided in the payload diff --git a/bookmarks/e2e/e2e_test_bookmark_details_modal.py b/bookmarks/e2e/e2e_test_bookmark_details_modal.py index 947a48f6..93f03e43 100644 --- a/bookmarks/e2e/e2e_test_bookmark_details_modal.py +++ b/bookmarks/e2e/e2e_test_bookmark_details_modal.py @@ -135,6 +135,9 @@ def test_delete(self): details_modal = self.open_details_modal(bookmark) + # Wait for confirm button to be initialized + self.page.wait_for_timeout(1000) + # Delete bookmark, verify return url with self.page.expect_navigation(url=self.live_server_url + url): details_modal.get_by_text("Delete...").click() diff --git a/bookmarks/e2e/e2e_test_bookmark_form.py b/bookmarks/e2e/e2e_test_bookmark_form.py index 8c031d5d..e6236d65 100644 --- a/bookmarks/e2e/e2e_test_bookmark_form.py +++ b/bookmarks/e2e/e2e_test_bookmark_form.py @@ -1,26 +1,48 @@ +from unittest.mock import patch + from django.urls import reverse from playwright.sync_api import sync_playwright, expect from bookmarks.e2e.helpers import LinkdingE2ETestCase +from bookmarks.services import website_loader class BookmarkFormE2ETestCase(LinkdingE2ETestCase): + def test_create_enter_url_prefills_title_and_description(self): + with patch.object( + website_loader, "load_website_metadata" + ) as mock_load_website_metadata: + mock_load_website_metadata.return_value = website_loader.WebsiteMetadata( + url="https://example.com", + title="Example Domain", + description="This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.", + preview_image=None, + ) + + with sync_playwright() as p: + page = self.open(reverse("bookmarks:new"), p) + + page.get_by_label("URL").fill("https://example.com") + + title = page.get_by_label("Title") + description = page.get_by_label("Description") + expect(title).to_have_value("Example Domain") + expect(description).to_have_value( + "This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission." + ) + def test_create_should_check_for_existing_bookmark(self): existing_bookmark = self.setup_bookmark( title="Existing title", description="Existing description", notes="Existing notes", tags=[self.setup_tag(name="tag1"), self.setup_tag(name="tag2")], - website_title="Existing website title", - website_description="Existing website description", unread=True, ) tag_names = " ".join(existing_bookmark.tag_names) with sync_playwright() as p: - browser = self.setup_browser(p) - page = browser.new_page() - page.goto(self.live_server_url + reverse("bookmarks:new")) + page = self.open(reverse("bookmarks:new"), p) # Enter bookmarked URL page.get_by_label("URL").fill(existing_bookmark.url) @@ -37,14 +59,6 @@ def test_create_should_check_for_existing_bookmark(self): self.assertEqual( existing_bookmark.notes, page.get_by_label("Notes").input_value() ) - self.assertEqual( - existing_bookmark.website_title, - page.get_by_label("Title").get_attribute("placeholder"), - ) - self.assertEqual( - existing_bookmark.website_description, - page.get_by_label("Description").get_attribute("placeholder"), - ) self.assertEqual(tag_names, page.get_by_label("Tags").input_value()) self.assertTrue(tag_names, page.get_by_label("Mark as unread").is_checked()) @@ -55,30 +69,66 @@ def test_create_should_check_for_existing_bookmark(self): state="hidden", timeout=2000 ) - browser.close() - def test_edit_should_not_check_for_existing_bookmark(self): bookmark = self.setup_bookmark() with sync_playwright() as p: - browser = self.setup_browser(p) - page = browser.new_page() - page.goto( - self.live_server_url + reverse("bookmarks:edit", args=[bookmark.id]) - ) + page = self.open(reverse("bookmarks:edit", args=[bookmark.id]), p) page.wait_for_timeout(timeout=1000) page.get_by_text("This URL is already bookmarked.").wait_for(state="hidden") + def test_edit_should_not_prefill_title_and_description(self): + bookmark = self.setup_bookmark() + with patch.object( + website_loader, "load_website_metadata" + ) as mock_load_website_metadata: + mock_load_website_metadata.return_value = website_loader.WebsiteMetadata( + url="https://example.com", + title="Example Domain", + description="This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.", + preview_image=None, + ) + + with sync_playwright() as p: + page = self.open(reverse("bookmarks:edit", args=[bookmark.id]), p) + page.wait_for_timeout(timeout=1000) + + title = page.get_by_label("Title") + description = page.get_by_label("Description") + expect(title).to_have_value(bookmark.title) + expect(description).to_have_value(bookmark.description) + + def test_edit_enter_url_should_not_prefill_title_and_description(self): + bookmark = self.setup_bookmark() + with patch.object( + website_loader, "load_website_metadata" + ) as mock_load_website_metadata: + mock_load_website_metadata.return_value = website_loader.WebsiteMetadata( + url="https://example.com", + title="Example Domain", + description="This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.", + preview_image=None, + ) + + with sync_playwright() as p: + page = self.open(reverse("bookmarks:edit", args=[bookmark.id]), p) + + page.get_by_label("URL").fill("https://example.com") + page.wait_for_timeout(timeout=1000) + + title = page.get_by_label("Title") + description = page.get_by_label("Description") + expect(title).to_have_value(bookmark.title) + expect(description).to_have_value(bookmark.description) + def test_enter_url_of_existing_bookmark_should_show_notes(self): bookmark = self.setup_bookmark( notes="Existing notes", description="Existing description" ) with sync_playwright() as p: - browser = self.setup_browser(p) - page = browser.new_page() - page.goto(self.live_server_url + reverse("bookmarks:new")) + page = self.open(reverse("bookmarks:new"), p) details = page.locator("details.notes") expect(details).not_to_have_attribute("open", value="") @@ -93,11 +143,11 @@ def test_create_should_preview_auto_tags(self): with sync_playwright() as p: # Open page with URL that should have auto tags - browser = self.setup_browser(p) - page = browser.new_page() - url = self.live_server_url + reverse("bookmarks:new") - url += f"?url=https%3A%2F%2Fgithub.com%2Fsissbruecker%2Flinkding" - page.goto(url) + url = ( + reverse("bookmarks:new") + + "?url=https%3A%2F%2Fgithub.com%2Fsissbruecker%2Flinkding" + ) + page = self.open(url, p) auto_tags_hint = page.locator(".form-input-hint.auto-tags") expect(auto_tags_hint).to_be_visible() diff --git a/bookmarks/frontend/components/SearchAutoComplete.svelte b/bookmarks/frontend/components/SearchAutoComplete.svelte index 4a9cec94..64d84181 100644 --- a/bookmarks/frontend/components/SearchAutoComplete.svelte +++ b/bookmarks/frontend/components/SearchAutoComplete.svelte @@ -122,7 +122,7 @@ } const fetchedBookmarks = await api.listBookmarks(suggestionSearch, {limit: 5, offset: 0, path}) bookmarks = fetchedBookmarks.map(bookmark => { - const fullLabel = bookmark.title || bookmark.website_title || bookmark.url + const fullLabel = bookmark.title || bookmark.url const label = clampText(fullLabel, 60) return { type: 'bookmark', diff --git a/bookmarks/migrations/0041_merge_metadata.py b/bookmarks/migrations/0041_merge_metadata.py new file mode 100644 index 00000000..81e3ab91 --- /dev/null +++ b/bookmarks/migrations/0041_merge_metadata.py @@ -0,0 +1,36 @@ +# Generated by Django 5.1.1 on 2024-09-21 08:13 + +from django.db import migrations +from django.db.models import Q +from django.db.models.expressions import RawSQL + +from bookmarks.models import Bookmark + + +def forwards(apps, schema_editor): + Bookmark.objects.filter( + Q(title__isnull=True) | Q(title__exact=""), + ).extra( + where=["website_title IS NOT NULL"] + ).update(title=RawSQL("website_title", ())) + + Bookmark.objects.filter( + Q(description__isnull=True) | Q(description__exact=""), + ).extra(where=["website_description IS NOT NULL"]).update( + description=RawSQL("website_description", ()) + ) + + +def reverse(apps, schema_editor): + pass + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookmarks", "0040_userprofile_items_per_page_and_more"), + ] + + operations = [ + migrations.RunPython(forwards, reverse), + ] diff --git a/bookmarks/models.py b/bookmarks/models.py index 7d8bc5a2..e65c2251 100644 --- a/bookmarks/models.py +++ b/bookmarks/models.py @@ -56,7 +56,9 @@ class Bookmark(models.Model): title = models.CharField(max_length=512, blank=True) description = models.TextField(blank=True) notes = models.TextField(blank=True) + # Obsolete field, kept to not remove column when generating migrations website_title = models.CharField(max_length=512, blank=True, null=True) + # Obsolete field, kept to not remove column when generating migrations website_description = models.TextField(blank=True, null=True) web_archive_snapshot_url = models.CharField(max_length=2048, blank=True) favicon_file = models.CharField(max_length=512, blank=True) @@ -74,14 +76,12 @@ class Bookmark(models.Model): def resolved_title(self): if self.title: return self.title - elif self.website_title: - return self.website_title else: return self.url @property def resolved_description(self): - return self.website_description if not self.description else self.description + return self.description @property def tag_names(self): @@ -141,14 +141,9 @@ class BookmarkForm(forms.ModelForm): # Use URLField for URL url = forms.CharField(validators=[BookmarkURLValidator()]) tag_string = forms.CharField(required=False) - # Do not require title and description in form as we fill these automatically if they are empty + # Do not require title and description as they may be empty title = forms.CharField(max_length=512, required=False) description = forms.CharField(required=False, widget=forms.Textarea()) - # Include website title and description as hidden field as they only provide info when editing bookmarks - website_title = forms.CharField( - max_length=512, required=False, widget=forms.HiddenInput() - ) - website_description = forms.CharField(required=False, widget=forms.HiddenInput()) unread = forms.BooleanField(required=False) shared = forms.BooleanField(required=False) # Hidden field that determines whether to close window/tab after saving the bookmark @@ -162,8 +157,6 @@ class Meta: "title", "description", "notes", - "website_title", - "website_description", "unread", "shared", "auto_close", diff --git a/bookmarks/queries.py b/bookmarks/queries.py index 112f8df0..8fd6a08e 100644 --- a/bookmarks/queries.py +++ b/bookmarks/queries.py @@ -53,8 +53,6 @@ def _base_bookmarks_query( Q(title__icontains=term) | Q(description__icontains=term) | Q(notes__icontains=term) - | Q(website_title__icontains=term) - | Q(website_description__icontains=term) | Q(url__icontains=term) ) @@ -97,10 +95,6 @@ def _base_bookmarks_query( query_set = query_set.annotate( effective_title=Case( When(Q(title__isnull=False) & ~Q(title__exact=""), then=Lower("title")), - When( - Q(website_title__isnull=False) & ~Q(website_title__exact=""), - then=Lower("website_title"), - ), default=Lower("url"), output_field=CharField(), ) diff --git a/bookmarks/services/bookmarks.py b/bookmarks/services/bookmarks.py index 37d940bd..034404fc 100644 --- a/bookmarks/services/bookmarks.py +++ b/bookmarks/services/bookmarks.py @@ -26,8 +26,6 @@ def create_bookmark(bookmark: Bookmark, tag_string: str, current_user: User): _merge_bookmark_data(bookmark, existing_bookmark) return update_bookmark(existing_bookmark, tag_string, current_user) - # Update website info - _update_website_metadata(bookmark) # Set currently logged in user as owner bookmark.owner = current_user # Set dates @@ -67,13 +65,22 @@ def update_bookmark(bookmark: Bookmark, tag_string, current_user: User): if has_url_changed: # Update web archive snapshot, if URL changed tasks.create_web_archive_snapshot(current_user, bookmark, True) - # Only update website metadata if URL changed - _update_website_metadata(bookmark) bookmark.save() return bookmark +def enhance_with_website_metadata(bookmark: Bookmark): + metadata = website_loader.load_website_metadata(bookmark.url) + if not bookmark.title: + bookmark.title = metadata.title or "" + + if not bookmark.description: + bookmark.description = metadata.description or "" + + bookmark.save() + + def archive_bookmark(bookmark: Bookmark): bookmark.is_archived = True bookmark.date_modified = timezone.now() @@ -235,12 +242,6 @@ def _merge_bookmark_data(from_bookmark: Bookmark, to_bookmark: Bookmark): to_bookmark.shared = from_bookmark.shared -def _update_website_metadata(bookmark: Bookmark): - metadata = website_loader.load_website_metadata(bookmark.url) - bookmark.website_title = metadata.title - bookmark.website_description = metadata.description - - def _update_bookmark_tags(bookmark: Bookmark, tag_string: str, user: User): tag_names = parse_tag_string(tag_string) diff --git a/bookmarks/services/website_loader.py b/bookmarks/services/website_loader.py index 389e3746..2134659b 100644 --- a/bookmarks/services/website_loader.py +++ b/bookmarks/services/website_loader.py @@ -14,8 +14,8 @@ @dataclass class WebsiteMetadata: url: str - title: str - description: str + title: str | None + description: str | None preview_image: str | None def to_dict(self): @@ -43,7 +43,8 @@ def load_website_metadata(url: str): start = timezone.now() soup = BeautifulSoup(page_text, "html.parser") - title = soup.title.string.strip() if soup.title is not None else None + if soup.title and soup.title.string: + title = soup.title.string.strip() description_tag = soup.find("meta", attrs={"name": "description"}) description = ( description_tag["content"].strip() diff --git a/bookmarks/templates/bookmarks/form.html b/bookmarks/templates/bookmarks/form.html index f35b32e7..22238523 100644 --- a/bookmarks/templates/bookmarks/form.html +++ b/bookmarks/templates/bookmarks/form.html @@ -3,12 +3,13 @@