Skip to content
This repository was archived by the owner on Sep 18, 2019. It is now read-only.

Commit f0c58e7

Browse files
author
Ben Smith
committed
Made this Jekyll plugin available as a Ruby Gem
Create `lib` folder and move plugin files. Working Ruby Gem, version `0.1.1`. Bump version of pre-built plugin. Ignore `Gemfile.lock`.
1 parent a686e15 commit f0c58e7

17 files changed

+367
-342
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
bower_components
22
*.gem
33
*.rbc
4+
Gemfile.lock
45
.bundle
56
.config
67
coverage

Gemfile

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
source 'https://rubygems.org'
2-
3-
gem 'rake'
4-
gem 'uglifier'
2+
gemspec

Gemfile.lock

-18
This file was deleted.

Rakefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ task :create_build_dir do
1111
end
1212

1313
task :copy_jekyll_plugin do
14-
system('ls *.rb | while read file; do cat $file; echo ""; done > build/jekyll_lunr_js_search.rb')
14+
system('ls lib/jekyll_lunr_js_search/*.rb | while read file; do cat $file; echo ""; done > build/jekyll_lunr_js_search.rb')
1515
end
1616

1717
task :concat_js do

build/jekyll_lunr_js_search.rb

+146-141
Original file line numberDiff line numberDiff line change
@@ -1,174 +1,179 @@
1-
require 'rubygems'
21
require 'json'
32

43
module Jekyll
4+
module LunrJsSearch
5+
class Indexer < Jekyll::Generator
6+
def initialize(config = {})
7+
super(config)
8+
9+
lunr_config = {
10+
'excludes' => [],
11+
'strip_index_html' => false,
12+
'min_length' => 3,
13+
'stopwords' => 'stopwords.txt'
14+
}.merge!(config['lunr_search'] || {})
15+
16+
@excludes = lunr_config['excludes']
17+
18+
# if web host supports index.html as default doc, then optionally exclude it from the url
19+
@strip_index_html = lunr_config['strip_index_html']
520

6-
class Indexer < Generator
7-
8-
def initialize(config = {})
9-
super(config)
10-
11-
lunr_config = {
12-
'excludes' => [],
13-
'strip_index_html' => false,
14-
'min_length' => 3,
15-
'stopwords' => 'stopwords.txt'
16-
}.merge!(config['lunr_search'] || {})
17-
18-
@excludes = lunr_config['excludes']
19-
20-
# if web host supports index.html as default doc, then optionally exclude it from the url
21-
@strip_index_html = lunr_config['strip_index_html']
22-
23-
# stop word exclusion configuration
24-
@min_length = lunr_config['min_length']
25-
@stopwords_file = lunr_config['stopwords']
26-
end
27-
28-
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
29-
# The main content from each page is extracted and saved to disk as json
30-
def generate(site)
31-
puts 'Running the search indexer...'
32-
33-
# gather pages and posts
34-
items = pages_to_index(site)
35-
content_renderer = PageRenderer.new(site)
36-
index = []
37-
38-
items.each do |item|
39-
entry = SearchEntry.create(item, content_renderer)
21+
# stop word exclusion configuration
22+
@min_length = lunr_config['min_length']
23+
@stopwords_file = lunr_config['stopwords']
24+
end
4025

41-
entry.strip_index_suffix_from_url! if @strip_index_html
42-
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
26+
# Index all pages except pages matching any value in config['lunr_excludes'] or with date['exclude_from_search']
27+
# The main content from each page is extracted and saved to disk as json
28+
def generate(site)
29+
puts 'Running the search indexer...'
30+
31+
# gather pages and posts
32+
items = pages_to_index(site)
33+
content_renderer = PageRenderer.new(site)
34+
index = []
35+
36+
items.each do |item|
37+
entry = SearchEntry.create(item, content_renderer)
38+
39+
entry.strip_index_suffix_from_url! if @strip_index_html
40+
entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)
41+
42+
index << {
43+
:title => entry.title,
44+
:url => entry.url,
45+
:date => entry.date,
46+
:categories => entry.categories,
47+
:body => entry.body
48+
}
49+
50+
puts 'Indexed ' << "#{entry.title} (#{entry.url})"
51+
end
52+
53+
json = JSON.generate({:entries => index})
4354

44-
index << {
45-
:title => entry.title,
46-
:url => entry.url,
47-
:date => entry.date,
48-
:categories => entry.categories,
49-
:body => entry.body
50-
}
55+
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
56+
Dir::mkdir(site.dest) unless File.directory?(site.dest)
5157

52-
puts 'Indexed ' << "#{entry.title} (#{entry.url})"
58+
# File I/O: create search.json file and write out pretty-printed JSON
59+
filename = 'search.json'
60+
61+
File.open(File.join(site.dest, filename), "w") do |file|
62+
file.write(json)
63+
end
64+
65+
# Keep the search.json file from being cleaned by Jekyll
66+
site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
5367
end
68+
69+
private
5470

55-
json = JSON.generate({:entries => index})
56-
57-
# Create destination directory if it doesn't exist yet. Otherwise, we cannot write our file there.
58-
Dir::mkdir(site.dest) unless File.directory?(site.dest)
59-
60-
# File I/O: create search.json file and write out pretty-printed JSON
61-
filename = 'search.json'
62-
63-
File.open(File.join(site.dest, filename), "w") do |file|
64-
file.write(json)
71+
# load the stopwords file
72+
def stopwords
73+
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
6574
end
66-
67-
# Keep the search.json file from being cleaned by Jekyll
68-
site.static_files << Jekyll::SearchIndexFile.new(site, site.dest, "/", filename)
69-
end
70-
71-
private
72-
73-
# load the stopwords file
74-
def stopwords
75-
@stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
76-
end
77-
78-
def pages_to_index(site)
79-
items = []
8075

81-
# deep copy pages
82-
site.pages.each {|page| items << page.dup }
83-
site.posts.each {|post| items << post.dup }
76+
def pages_to_index(site)
77+
items = []
78+
79+
# deep copy pages
80+
site.pages.each {|page| items << page.dup }
81+
site.posts.each {|post| items << post.dup }
8482

85-
# only process files that will be converted to .html and only non excluded files
86-
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
87-
items.reject! {|i| i.data['exclude_from_search'] }
88-
89-
items
83+
# only process files that will be converted to .html and only non excluded files
84+
items.select! {|i| i.output_ext == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
85+
items.reject! {|i| i.data['exclude_from_search'] }
86+
87+
items
88+
end
9089
end
9190
end
9291
end
9392
require 'nokogiri'
9493

9594
module Jekyll
96-
97-
class PageRenderer
98-
def initialize(site)
99-
@site = site
100-
end
101-
102-
# render the item, parse the output and get all text inside <p> elements
103-
def render(item)
104-
item.render({}, @site.site_payload)
105-
doc = Nokogiri::HTML(item.output)
106-
paragraphs = doc.search('//text()').map {|t| t.content }
107-
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
95+
module LunrJsSearch
96+
class PageRenderer
97+
def initialize(site)
98+
@site = site
99+
end
100+
101+
# render the item, parse the output and get all text inside <p> elements
102+
def render(item)
103+
item.render({}, @site.site_payload)
104+
doc = Nokogiri::HTML(item.output)
105+
paragraphs = doc.search('//text()').map {|t| t.content }
106+
paragraphs = paragraphs.join(" ").gsub("\r", " ").gsub("\n", " ").gsub("\t", " ").gsub(/\s+/, " ")
107+
end
108108
end
109-
end
110-
109+
end
111110
end
112111
require 'nokogiri'
113112

114113
module Jekyll
115-
116-
class SearchEntry
117-
def self.create(page_or_post, renderer)
118-
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
119-
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
120-
raise 'Not supported'
121-
end
122-
123-
def self.create_from_page(page, renderer)
124-
title, url = extract_title_and_url(page)
125-
body = renderer.render(page)
126-
date = nil
127-
categories = []
114+
module LunrJsSearch
115+
class SearchEntry
116+
def self.create(page_or_post, renderer)
117+
return create_from_post(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Post)
118+
return create_from_page(page_or_post, renderer) if page_or_post.is_a?(Jekyll::Page)
119+
raise 'Not supported'
120+
end
128121

129-
SearchEntry.new(title, url, date, categories, body)
130-
end
131-
132-
def self.create_from_post(post, renderer)
133-
title, url = extract_title_and_url(post)
134-
body = renderer.render(post)
135-
date = post.date
136-
categories = post.categories
122+
def self.create_from_page(page, renderer)
123+
title, url = extract_title_and_url(page)
124+
body = renderer.render(page)
125+
date = nil
126+
categories = []
127+
128+
SearchEntry.new(title, url, date, categories, body)
129+
end
137130

138-
SearchEntry.new(title, url, date, categories, body)
139-
end
131+
def self.create_from_post(post, renderer)
132+
title, url = extract_title_and_url(post)
133+
body = renderer.render(post)
134+
date = post.date
135+
categories = post.categories
136+
137+
SearchEntry.new(title, url, date, categories, body)
138+
end
140139

141-
def self.extract_title_and_url(item)
142-
data = item.to_liquid
143-
[ data['title'], data['url'] ]
144-
end
140+
def self.extract_title_and_url(item)
141+
data = item.to_liquid
142+
[ data['title'], data['url'] ]
143+
end
145144

146-
attr_reader :title, :url, :date, :categories, :body
147-
148-
def initialize(title, url, date, categories, body)
149-
@title, @url, @date, @categories, @body = title, url, date, categories, body
150-
end
151-
152-
def strip_index_suffix_from_url!
153-
@url.gsub!(/index\.html$/, '')
145+
attr_reader :title, :url, :date, :categories, :body
146+
147+
def initialize(title, url, date, categories, body)
148+
@title, @url, @date, @categories, @body = title, url, date, categories, body
149+
end
150+
151+
def strip_index_suffix_from_url!
152+
@url.gsub!(/index\.html$/, '')
153+
end
154+
155+
# remove anything that is in the stop words list from the text to be indexed
156+
def strip_stopwords!(stopwords, min_length)
157+
@body = @body.split.delete_if() do |x|
158+
t = x.downcase.gsub(/[^a-z]/, '')
159+
t.length < min_length || stopwords.include?(t)
160+
end.join(' ')
161+
end
154162
end
155-
156-
# remove anything that is in the stop words list from the text to be indexed
157-
def strip_stopwords!(stopwords, min_length)
158-
@body = @body.split.delete_if() do |x|
159-
t = x.downcase.gsub(/[^a-z]/, '')
160-
t.length < min_length || stopwords.include?(t)
161-
end.join(' ')
162-
end
163163
end
164164
end
165165
module Jekyll
166-
167-
class SearchIndexFile < StaticFile
168-
# Override write as the search.json index file has already been created
169-
def write(dest)
170-
true
171-
end
166+
module LunrJsSearch
167+
class SearchIndexFile < Jekyll::StaticFile
168+
# Override write as the search.json index file has already been created
169+
def write(dest)
170+
true
171+
end
172+
end
172173
end
173-
174+
end
175+
module Jekyll
176+
module LunrJsSearch
177+
VERSION = "0.1.1"
178+
end
174179
end

0 commit comments

Comments
 (0)