diff --git a/lib/mmmd.rb b/lib/mmmd.rb new file mode 100644 index 0000000..8eada91 --- /dev/null +++ b/lib/mmmd.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require_relative 'mmmd/blankshell' +require_relative 'mmmd/renderers' + +# Extensible, multi-format markdown processor +module MMMD + # Parse a Markdown document into a DOM form + # @param doc [String] + # @return [::PointBlank::DOM::Document] + def self.parse(doc) + ::PointBlank::DOM::Document.parse(doc) + end +end diff --git a/lib/mmmd/blankshell.rb b/lib/mmmd/blankshell.rb index c473e35..01a57f6 100644 --- a/lib/mmmd/blankshell.rb +++ b/lib/mmmd/blankshell.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative 'util' +require 'uri' # Modular, extensible approach to parsing markdown as close as # it gets to CommonMark spec (as of version 0.31.2). @@ -75,13 +76,13 @@ module PointBlank def read_destination(text) if (result = text.match(/\A<.*?(?/m)) && !result[0][1..].match?(/(?<])/, '')[1..-2], + [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]), text.delete_prefix(result[0]).lstrip] elsif (result = text.match(/\A\S+/)) && !result[0].start_with?('<') && result && balanced?(result[0]) - [result[0], + [process_destination(result[0]), text.delete_prefix(result[0]).lstrip] else [nil, text] @@ -95,15 +96,15 @@ module PointBlank def read_title(text) if text.start_with?("'") && (result = text.match(/\A'.*?(??@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + URI.encode_uri_component( + MMMD::EntityUtils.decode_entities(string) + ) + end + + # Process title string + # @param string [String] + # @return [String] + def process_title(string) + string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + MMMD::EntityUtils.decode_entities(string) + end end class LineScanner diff --git a/lib/mmmd/renderers.rb b/lib/mmmd/renderers.rb index 7cbe4b5..b6a8b9d 100644 --- a/lib/mmmd/renderers.rb +++ b/lib/mmmd/renderers.rb @@ -1,9 +1,11 @@ # frozen_string_literal: true -module RBMark +$LOAD_PATH.append(__dir__) + +module MMMD # Renderers from Markdown to expected output format module Renderers + autoload :HTML, 'renderers/html' + autoload :PlainTerm, 'renderers/plainterm' end end - -require_relative 'renderers/html' diff --git a/lib/mmmd/renderers/html.rb b/lib/mmmd/renderers/html.rb index 4f771c9..9aa5f49 100644 --- a/lib/mmmd/renderers/html.rb +++ b/lib/mmmd/renderers/html.rb @@ -7,7 +7,8 @@ module MMMD module HTMLConstants ELEMENT_MAP = { "PointBlank::DOM::InlinePre" => { - tag: "pre" + tag: "code", + style: "white-space: pre;" }, "PointBlank::DOM::InlineBreak" => { tag: "br" @@ -26,12 +27,14 @@ module MMMD tag: "s" }, "PointBlank::DOM::InlineLink" => { - tag: "link", + tag: "a", href: true }, "PointBlank::DOM::InlineImage" => { tag: "img", - src: true + src: true, + inline: true, + alt: true }, "PointBlank::DOM::ULBlock" => { tag: "ul" @@ -88,7 +91,8 @@ module MMMD tag: "blockquote" }, "PointBlank::DOM::HorizontalRule" => { - tag: "hr" + tag: "hr", + inline: true }, "PointBlank::DOM::Text" => { sanitize: true @@ -128,41 +132,80 @@ module MMMD def initialize(dom, options) @document = dom @options = options + @options["linewrap"] ||= 80 @options["init_level"] ||= 2 @options["indent"] ||= 2 mapmanager = HTMLConstants::MapManager.new(options) @mapping = mapmanager.mapping + return unless @options["nowrap"] + + @options["init_level"] = 0 + @mapping.delete("PointBlank::DOM::Document") end # Render document to HTML def render text = _render(@document, @options, level: @options["init_level"]) @options["init_level"].times { text = indent(text) } - [ - preambule, - text, - postambule - ].join("\n") + if @options["nowrap"] + text + else + [ + preambule, + remove_pre_spaces(text), + postambule + ].join("\n") + end end private + # Find and remove extra spaces inbetween preformatted text + # @param string [String] + # @return [String] + def remove_pre_spaces(string) + output = [] + buffer = [] + open = nil + string.lines.each do |line| + opentoken = line.match?(/
/)
+ closetoken = line.match?(/<\/pre>/)
+ if closetoken
+ open = false
+ buffer = strip_leading_spaces_in_buffer(buffer)
+ output.append(*buffer)
+ buffer = []
+ end
+ (open ? buffer : output).append(line)
+ open = true if opentoken && !closetoken
+ end
+ output.append(*buffer) unless buffer.empty?
+ output.join('')
+ end
+
+ # Strip leading spaces in the buffer
+ # @param lines [Array]
+ # @return [Array]
+ def strip_leading_spaces_in_buffer(buffer)
+ minprefix = buffer.map { |x| x.match(/^ */)[0] }
+ .min_by(&:length)
+ buffer.map do |line|
+ line.delete_prefix(minprefix)
+ end
+ end
+
# Word wrapping algorithm
# @param text [String]
# @param width [Integer]
# @return [String]
def wordwrap(text, width)
- words = text.split(/( +)/)
+ words = text.split(/( +|<[^>]+>)/)
output = []
line = ""
length = 0
until words.empty?
word = words.shift
wordlength = word.length
- if wordlength > width
- words.prepend(word[width..])
- word = word[..width - 1]
- end
if length + wordlength + 1 > width
output.append(line.lstrip)
line = word
@@ -176,7 +219,7 @@ module MMMD
output.join("\n")
end
- def _render(element, options, inline: false, level: 0)
+ def _render(element, options, inline: false, level: 0, literaltext: false)
modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
element.is_a?(::PointBlank::DOM::Paragraph)
inline ||= modeswitch
@@ -184,24 +227,39 @@ module MMMD
text = if element.children.empty?
element.content
else
+ literal = @mapping[element.class.name][:inline] || literaltext
element.children.map do |child|
_render(child, options, inline: inline,
- level: level)
+ level: level,
+ literaltext: literal)
end.join(inline ? '' : "\n")
end
run_filters(text, element, level: level,
inline: inline,
- modeswitch: modeswitch)
+ modeswitch: modeswitch,
+ literaltext: literaltext)
end
- def run_filters(text, element, level:, inline:, modeswitch:)
+ def run_filters(text, element, level:, inline:, modeswitch:,
+ literaltext:)
element_style = @mapping[element.class.name]
- hsize = 80 - (level * @options["indent"])
+ return text unless element_style
+ return text if literaltext
+
+ hsize = @options["linewrap"] - (level * @options["indent"])
text = wordwrap(text, hsize) if modeswitch
if element_style[:sanitize]
text = MMMD::EntityUtils.encode_entities(text)
end
- opentag, closetag = construct_tags(element_style)
+ if element_style[:inline]
+ innerclose(element, element_style, text)
+ else
+ openclose(text, element, element_style, inline)
+ end
+ end
+
+ def openclose(text, element, element_style, inline)
+ opentag, closetag = construct_tags(element_style, element)
if inline
opentag + text + closetag
else
@@ -211,21 +269,54 @@ module MMMD
end
end
- def construct_tags(style)
+ def innerclose(element, style, text)
+ props = element.properties
+ tag = "<#{style[:tag]}"
+ tag += " style=#{style[:style].inspect}" if style[:style]
+ tag += " href=#{read_link(element)}" if style[:href]
+ tag += " alt=#{text.inspect}" if style[:alt]
+ tag += " src=#{read_link(element)}" if style[:src]
+ tag += " title=#{read_title(element)}" if style[:title] && props[:title]
+ tag += ">"
+ if style[:outer]
+ outeropen, outerclose = construct_tags(style[:outer], element)
+ tag = outeropen + tag + outerclose
+ end
+ tag
+ end
+
+ def construct_tags(style, element)
return ["", ""] unless style && style[:tag]
+ props = element.properties
opentag = "<#{style[:tag]}"
closetag = "#{style[:tag]}>"
- opentag += " style=#{style[:style].dump}" if style["style"]
+ opentag += " style=#{style[:style].inspect}" if style[:style]
+ opentag += " href=#{read_link(element)}" if style[:href]
+ opentag += " src=#{read_link(element)}" if style[:src]
+ opentag += " title=#{read_title(element)}" if style[:title] &&
+ props[:title]
opentag += ">"
if style[:outer]
- outeropen, outerclose = construct_tags(style[:outer])
+ outeropen, outerclose = construct_tags(style[:outer], element)
opentag = outeropen + opentag
closetag += outerclose
end
[opentag, closetag]
end
+ def read_title(element)
+ title = element.properties[:title]
+ title = MMMD::EntityUtils.encode_entities(title)
+ title.dump
+ end
+
+ def read_link(element)
+ link = element.properties[:uri]
+ link = MMMD::EntityUtils.encode_entities(link)
+ link.dump
+ end
+
def indent(text)
text.lines.map do |line|
"#{' ' * @options["indent"]}#{line}"
@@ -233,13 +324,17 @@ module MMMD
end
def preambule
+ head = @options['head']
+ headinfo = "#{indent(<<~HEAD.rstrip)}\n " if head
+
+ #{head.is_a?(Array) ? head.join("\n") : head}
+
+ HEAD
+ headinfo ||= " "
@options['preambule'] or <<~TEXT.rstrip
-
- #{@options['head']}
-
-
+ #{headinfo}
TEXT
end
diff --git a/lib/mmmd/renderers/plainterm.rb b/lib/mmmd/renderers/plainterm.rb
index 27dec04..8ad1f48 100644
--- a/lib/mmmd/renderers/plainterm.rb
+++ b/lib/mmmd/renderers/plainterm.rb
@@ -312,6 +312,9 @@ module MMMD
"PointBlank::DOM::QuoteBlock" => {
leftline: true,
increase_level: true
+ },
+ "PointBlank::DOM::HorizontalRule" => {
+ underline_full_block: true
}
}.freeze
diff --git a/lib/mmmd/util.rb b/lib/mmmd/util.rb
index 5b15cf0..f9efe09 100644
--- a/lib/mmmd/util.rb
+++ b/lib/mmmd/util.rb
@@ -22,6 +22,21 @@ module MMMD
end
end
+ # Encode unsafe html entities in string (ASCII-compatible)
+ # @param string [String]
+ # @return [String]
+ # @sg-ignore
+ def self.encode_entities_ascii(string)
+ string.gsub("&", "&")
+ .gsub("<", "<")
+ .gsub(">", ">")
+ .gsub('"', """)
+ .gsub("'", "'")
+ .gsub(/[^\x00-\x7F]/) do |match|
+ "#{match.codepoints[0]};"
+ end
+ end
+
# Encode unsafe html entities in string
# @param string [String]
# @return [String]
@@ -32,9 +47,6 @@ module MMMD
.gsub(">", ">")
.gsub('"', """)
.gsub("'", "'")
- .gsub(/[^\x00-\x7F]/) do |match|
- "#{match.codepoints[0]};"
- end
end
end
end
diff --git a/lib/rubymark b/lib/rubymark
deleted file mode 120000
index 5f5df8f..0000000
--- a/lib/rubymark
+++ /dev/null
@@ -1 +0,0 @@
-mmmd
\ No newline at end of file