Skip to content
This repository has been archived by the owner on Sep 2, 2024. It is now read-only.

Commit

Permalink
Update last crawled timestamp
Browse files Browse the repository at this point in the history
  • Loading branch information
gdonald committed Oct 10, 2023
1 parent f8a2096 commit 3a4d9c7
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 4 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ source 'https://rubygems.org'
ruby '3.2.2'

gem 'aasm', '~> 5.5.0'
gem 'after_commit_everywhere', '~> 1.0'
gem 'bcrypt', '~> 3.1.7'
gem 'bootstrap5-kaminari-views', '~> 0.0.1'
gem 'cssbundling-rails'
Expand Down
4 changes: 4 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ GEM
tzinfo (~> 2.0)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
after_commit_everywhere (1.3.1)
activerecord (>= 4.2)
activesupport
airbrussh (1.5.0)
sshkit (>= 1.6.1, != 1.7.0)
ast (2.4.2)
Expand Down Expand Up @@ -405,6 +408,7 @@ PLATFORMS

DEPENDENCIES
aasm (~> 5.5.0)
after_commit_everywhere (~> 1.0)
bcrypt (~> 3.1.7)
bcrypt_pbkdf
bootstrap5-kaminari-views (~> 0.0.1)
Expand Down
1 change: 1 addition & 0 deletions Procfile.dev
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
web: env RUBY_DEBUG_OPEN=true bin/rails server
js: yarn build --watch
css: yarn watch:css
sidekiq: bundle exec sidekiq
8 changes: 7 additions & 1 deletion app/models/page_crawl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class PageCrawl < ApplicationRecord
transitions from: :created, to: :running
end

event :complete do
event :complete, after_commit: :update_last_crawled do
transitions from: :running, to: :completed
end

Expand Down Expand Up @@ -45,4 +45,10 @@ def self.ransackable_associations(_auth_object = nil)
def self.host_wait
ENV.fetch('HOST_THROTTLE_SECONDS', 900).to_i.seconds.ago
end

private

def update_last_crawled
host.update(last_crawled_at: Time.zone.now)
end
end
4 changes: 1 addition & 3 deletions app/services/page_crawl_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ def crawl!
return unless html?(response)

doc = Nokogiri::HTML(response.body)

page.title = doc.title
page.save!
page.update!(title: doc.title)

Html.create!(page:, content: html(doc))
end
Expand Down
4 changes: 4 additions & 0 deletions spec/factories/page_crawls.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,9 @@
FactoryBot.define do
factory :page_crawl do
page

trait :running do
aasm_state { 'running' }
end
end
end
11 changes: 11 additions & 0 deletions spec/models/page_crawl_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,15 @@
expect(described_class.host_wait).to be_a(Time)
end
end

describe '#update_last_crawled' do
let(:page_crawl) { create(:page_crawl, :running) }
let(:host) { page_crawl.host }

it 'updates the last_crawled_at attribute' do
expect do
page_crawl.complete!
end.to(change(host, :last_crawled_at))
end
end
end
3 changes: 3 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
SimpleCov.start 'rails' do
enable_coverage :branch
primary_coverage :branch
SimpleCov.groups.delete('Channels')
SimpleCov.groups.delete('Libraries')
SimpleCov.add_group 'Services', 'app/services'
end

require 'aasm/rspec'
Expand Down

0 comments on commit 3a4d9c7

Please sign in to comment.