Skip to content

Commit 77de05f

Browse files
authored
Add task to verify indexing against Top 100 RubyGems (#2330)
Add task to verify against against Top 100 RubyGems
1 parent 7885527 commit 77de05f

File tree

4 files changed

+222
-0
lines changed

4 files changed

+222
-0
lines changed

.github/workflows/indexing.yml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: CI (indexing)
2+
3+
on:
4+
push:
5+
paths:
6+
- 'Gemfile.lock'
7+
- 'lib/ruby_indexer/**'
8+
pull_request:
9+
paths:
10+
- 'Gemfile.lock'
11+
- 'lib/ruby_indexer/**'
12+
13+
jobs:
14+
indexing_sanity_check:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Set up Ruby
20+
uses: ruby/setup-ruby@v1
21+
with:
22+
bundler-cache: true
23+
24+
- name: Index Top 100 Ruby gems
25+
run: bundle exec rake index:topgems

rakelib/index.rake

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# frozen_string_literal: true
2+
3+
# Based on https://github.com/ruby/prism/blob/main/rakelib/lex.rake
4+
5+
module GemIndexing
6+
class << self
7+
# This method is responsible for iterating through a list of items and running
8+
# each item in a separate thread. It will block until all items have been
9+
# processed. This is particularly useful for tasks that are IO-bound like
10+
# downloading files or reading files from disk.
11+
def parallelize(items, &block)
12+
Thread.abort_on_exception = true
13+
14+
queue = Queue.new
15+
items.each { |item| queue << item }
16+
17+
workers =
18+
ENV.fetch("WORKERS") { 16 }.to_i.times.map do
19+
parallelize_thread(queue, &block)
20+
end
21+
22+
workers.map(&:join)
23+
end
24+
25+
private
26+
27+
# Create a new thread with a minimal number of locals that it can access.
28+
def parallelize_thread(queue, &block)
29+
Thread.new { block.call(queue.shift) until queue.empty? }
30+
end
31+
end
32+
end
33+
34+
TOP_100_GEM_FILENAME = "rakelib/top_100_gems.yml"
35+
TOP_100_GEMS_DIR = "tmp/top_100_gems"
36+
37+
namespace :download do
38+
directory TOP_100_GEMS_DIR
39+
40+
desc "Download the top 100 rubygems under #{TOP_100_GEMS_DIR}/"
41+
task topgems: TOP_100_GEMS_DIR do
42+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
43+
require "net/http"
44+
require "rubygems/package"
45+
require "tmpdir"
46+
47+
GemIndexing.parallelize(YAML.safe_load_file(TOP_100_GEM_FILENAME)) do |gem_name|
48+
directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")
49+
next if File.directory?(directory)
50+
51+
puts "Downloading #{gem_name}"
52+
53+
uri = URI.parse("https://rubygems.org/gems/#{gem_name}.gem")
54+
response = Net::HTTP.get_response(uri)
55+
raise gem_name unless response.is_a?(Net::HTTPSuccess)
56+
57+
Dir.mktmpdir do |tmpdir|
58+
filepath = File.join(tmpdir, "#{gem_name}.gem")
59+
File.write(filepath, response.body)
60+
Gem::Package.new(filepath).extract_files(directory, "**/*.rb")
61+
end
62+
end
63+
end
64+
end
65+
66+
# This task indexes against the top 100 gems, and will exit(1) if any fail.
67+
desc "Index against the top 100 rubygems"
68+
task "index:topgems": ["download:topgems"] do
69+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
70+
require "net/http"
71+
require "rubygems/package"
72+
require "tmpdir"
73+
74+
gem_names = YAML.safe_load_file(TOP_100_GEM_FILENAME)
75+
76+
errors = []
77+
GemIndexing.parallelize(gem_names) do |gem_name|
78+
directory = File.expand_path("#{TOP_100_GEMS_DIR}/#{gem_name}")
79+
80+
index = RubyIndexer::Index.new
81+
82+
errors = Dir[File.join(directory, "**", "*.rb")].filter_map do |filepath|
83+
print(".")
84+
code = File.read(filepath)
85+
index.index_single(RubyIndexer::IndexablePath.new(nil, filepath), code)
86+
nil
87+
rescue => e
88+
errors << { message: e.message, file: filepath }
89+
end
90+
end
91+
92+
puts "errors: #{errors}" if errors.any?
93+
ensure
94+
FileUtils.rm_rf(TOP_100_GEMS_DIR)
95+
end

rakelib/top_100_gems.yml

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
---
2+
- actioncable-7.0.4.3
3+
- actionmailbox-7.0.4.3
4+
- actionmailer-7.0.4.3
5+
- actionpack-7.0.4.3
6+
- actiontext-7.0.4.3
7+
- actionview-7.0.4.3
8+
- activejob-7.0.4.3
9+
- activemodel-7.0.4.3
10+
- activerecord-7.0.4.3
11+
- activestorage-7.0.4.3
12+
- activesupport-7.0.4.3
13+
- addressable-2.8.4
14+
- autoprefixer-rails-10.4.13.0
15+
- aws-partitions-1.744.0
16+
- aws-sdk-cloudformation-1.77.0
17+
- aws-sdk-cloudfront-1.76.0
18+
- aws-sdk-cloudwatch-1.72.0
19+
- aws-sdk-core-3.171.0
20+
- aws-sdk-dynamodb-1.83.0
21+
- aws-sdk-ec2-1.375.0
22+
- aws-sdk-iam-1.77.0
23+
- aws-sdk-kinesis-1.45.0
24+
- aws-sdk-kms-1.63.0
25+
- aws-sdk-lambda-1.93.0
26+
- aws-sdk-rds-1.175.0
27+
- aws-sdk-resources-3.162.0
28+
- aws-sdk-s3-1.120.1
29+
- aws-sdk-secretsmanager-1.73.0
30+
- aws-sdk-sns-1.60.0
31+
- aws-sdk-ssm-1.150.0
32+
- backports-3.24.1
33+
- brakeman-5.4.1
34+
- bundler-2.4.11
35+
- capybara-3.39.0
36+
- concurrent-ruby-1.2.2
37+
- connection_pool-2.4.0
38+
- dalli-3.2.4
39+
- database_cleaner-2.0.2
40+
- devise-4.9.2
41+
- dry-types-1.7.1
42+
- elasticsearch-8.7.0
43+
- elasticsearch-api-8.7.0
44+
- excon-0.99.0
45+
- faker-3.1.1
46+
- faraday-retry-2.1.0
47+
- fastlane-2.212.1
48+
- fog-aws-3.18.0
49+
- git-1.18.0
50+
- google-cloud-errors-1.3.1
51+
- google-protobuf-3.22.2
52+
- googleauth-1.5.1
53+
- graphql-2.0.21
54+
- grpc-1.53.0
55+
- jwt-2.7.0
56+
- loofah-2.20.0
57+
- mail-2.8.1
58+
- mime-types-data-3.2023.0218.1
59+
- minitest-5.18.0
60+
- msgpack-1.7.0
61+
- net-http-persistent-4.0.2
62+
- net-ssh-7.1.0
63+
- newrelic_rpm-9.1.0
64+
- nio4r-2.5.9
65+
- nokogiri-1.14.3
66+
- octokit-6.1.1
67+
- oj-3.14.3
68+
- parser-3.2.2.0
69+
- pg-1.4.6
70+
- plist-3.7.0
71+
- puma-6.2.1
72+
- rack-3.0.7
73+
- rack-cors-2.0.1
74+
- rack-protection-3.0.6
75+
- rack-test-2.1.0
76+
- rails-7.0.4.3
77+
- railties-7.0.4.3
78+
- raindrops-0.20.1
79+
- redis-store-1.9.2
80+
- regexp_parser-2.7.0
81+
- responders-3.1.0
82+
- rouge-4.1.0
83+
- rspec-core-3.12.1
84+
- rspec-mocks-3.12.5
85+
- rubocop-1.50.0
86+
- rubocop-ast-1.28.0
87+
- rubocop-performance-1.17.1
88+
- rubocop-rails-2.19.0
89+
- rubocop-rspec-2.19.0
90+
- ruby-progressbar-1.13.0
91+
- ruby_parser-3.20.0
92+
- rubygems-update-3.4.11
93+
- selenium-webdriver-4.8.6
94+
- sidekiq-7.0.8
95+
- sinatra-3.0.6
96+
- slop-4.10.1
97+
- sqlite3-1.6.2
98+
- thin-1.8.2
99+
- tilt-2.1.0
100+
- yard-0.9.32
101+
- zeitwerk-2.6.7

sorbet/config

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
--dir
22
.
33
--ignore=vendor/
4+
--ignore=tmp/
45
--ignore=test/fixtures/
56
--ignore=test/expectations/
67
--enable-experimental-requires-ancestor

0 commit comments

Comments
 (0)