FIX: makes bbcode parsing much more resilient by using nokogiri on cooked

This commit is contained in:
jjaffeux 2020-04-12 10:32:44 +02:00
parent c195a28578
commit bcff2fcb58
4 changed files with 61 additions and 30 deletions

View File

@ -175,7 +175,7 @@ module DiscoursePostEvent
end
def self.update_from_raw(post)
events = DiscoursePostEvent::EventParser.extract_events(post.raw)
events = DiscoursePostEvent::EventParser.extract_events(post)
if events.present?
event_params = events.first
event = post.event || Event.new(id: post.id)
@ -184,7 +184,7 @@ module DiscoursePostEvent
starts_at: event_params[:start] || event.starts_at,
ends_at: event_params[:end] || event.ends_at,
status: event_params[:status].present? ? Event.statuses[event_params[:status].to_sym] : event.status,
raw_invitees: event_params[:allowedGroups] ? event_params[:allowedGroups].split(',') : nil
raw_invitees: event_params[:"allowed-groups"] ? event_params[:"allowed-groups"].split(',') : nil
}
event.enforce_utc!(params)
event.update_with_params!(params)

View File

@ -1,34 +1,32 @@
# frozen_string_literal: true
EVENT_REGEX = /\[wrap=event\s(.*?)\]/m
EVENT_OPTIONS_REGEX = /(\w+\=".*?")/m
VALID_OPTIONS = [
:start,
:end,
:status,
:allowedGroups,
:"allowed-groups",
:name
]
module DiscoursePostEvent
class EventParser
def self.extract_events(str)
str.scan(EVENT_REGEX).map do |scan|
extract_options(scan[0].gsub(/\\/, ''))
end.compact
end
def self.extract_events(post)
cooked = PrettyText.cook(post.raw, topic_id: post.topic_id, user_id: post.user_id)
valid_options = VALID_OPTIONS.map { |o| "data-#{o}" }
def self.extract_options(str)
options = nil
str.scan(EVENT_OPTIONS_REGEX).each do |option|
key, value = option[0].split("=")
if VALID_OPTIONS.include?(key.to_sym) && value
options ||= {}
options[key.to_sym] = value.delete('\\"')
Nokogiri::HTML(cooked).css('[data-wrap="event"]').map do |doc|
event = nil
doc.attributes.values.each do |attribute|
name = attribute.name
value = attribute.value
if valid_options.include?(name) && value
event ||= {}
event[name["data-".length..-1].to_sym] = CGI.escapeHTML(value)
end
end
end
options
event
end.compact
end
end
end

View File

@ -7,7 +7,7 @@ module DiscoursePostEvent
end
def validate_event
extracted_events = DiscoursePostEvent::EventParser::extract_events(@post.raw)
extracted_events = DiscoursePostEvent::EventParser::extract_events(@post)
if extracted_events.count == 0
return false

View File

@ -2,46 +2,79 @@
require "rails_helper"
def build_post(user, raw)
Post.new(user: user, raw: raw)
end
describe DiscoursePostEvent::EventParser do
subject { DiscoursePostEvent::EventParser }
let(:user) { Fabricate(:user) }
it 'works with no event' do
events = subject.extract_events('this could be a nice event')
events = subject.extract_events(build_post(user, 'this could be a nice event'))
expect(events.length).to eq(0)
end
it 'finds one event' do
events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap]')
events = subject.extract_events(build_post(user, '[wrap=event start="foo" end="bar"]\n[/wrap]'))
expect(events.length).to eq(1)
end
it 'finds multiple events' do
events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap] baz [wrap=event start="foo" end="bar"]\n[/wrap]')
post_event = build_post user, <<-TXT
[wrap=event start="2020"][/wrap]
[wrap=event start="2021"][/wrap]
TXT
events = subject.extract_events(post_event)
expect(events.length).to eq(2)
end
it 'parses options' do
events = subject.extract_events('[wrap=event start="foo" end="bar"]\n[/wrap]')
events = subject.extract_events(build_post(user, '[wrap=event start="foo" end="bar"]\n[/wrap]'))
expect(events[0][:start]).to eq("foo")
expect(events[0][:end]).to eq("bar")
end
it 'works with escaped string' do
events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event start=\"bar\"]\n[/wrap]\n\n[wrap=event start=\"foo\"]\n[/wrap]")
events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event start=\"bar\"]\n[/wrap]"))
expect(events[0][:start]).to eq("bar")
expect(events[1][:start]).to eq("foo")
end
it 'parses options where value has spaces' do
events = subject.extract_events('[wrap=event start="foo" name="bar baz"]\n[/wrap]')
events = subject.extract_events(build_post(user, '[wrap=event start="foo" name="bar baz"]\n[/wrap]'))
expect(events[0][:name]).to eq("bar baz")
end
it 'doesnt parse invalid options' do
events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event start=\"foo\" something=\"bar\"]\n[/wrap]")
events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event start=\"foo\" something=\"bar\"]\n[/wrap]"))
expect(events[0][:something]).to be(nil)
events = subject.extract_events("I am going to get that fixed.\n\n[wrap=event something=\"bar\"]\n[/wrap]")
events = subject.extract_events(build_post(user, "I am going to get that fixed.\n\n[wrap=event something=\"bar\"]\n[/wrap]"))
expect(events).to eq([])
end
it 'doesnt parse an event in codeblock' do
post_event = build_post user, <<-TXT
Example event:
```
[wrap=event start=\"bar\"]\n[/wrap]
```
TXT
events = subject.extract_events(post_event)
expect(events).to eq([])
end
it 'doesnt parse in blockquote' do
post_event = build_post user, <<-TXT
[wrap=event start="2020"][/wrap]
TXT
events = subject.extract_events(post_event)
expect(events).to eq([])
end
end