diff --git a/lib/mmmd.rb b/lib/mmmd.rb new file mode 100644 index 0000000..8eada91 --- /dev/null +++ b/lib/mmmd.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require_relative 'mmmd/blankshell' +require_relative 'mmmd/renderers' + +# Extensible, multi-format markdown processor +module MMMD + # Parse a Markdown document into a DOM form + # @param doc [String] + # @return [::PointBlank::DOM::Document] + def self.parse(doc) + ::PointBlank::DOM::Document.parse(doc) + end +end diff --git a/lib/mmmd/blankshell.rb b/lib/mmmd/blankshell.rb index c473e35..01a57f6 100644 --- a/lib/mmmd/blankshell.rb +++ b/lib/mmmd/blankshell.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative 'util' +require 'uri' # Modular, extensible approach to parsing markdown as close as # it gets to CommonMark spec (as of version 0.31.2). @@ -75,13 +76,13 @@ module PointBlank def read_destination(text) if (result = text.match(/\A<.*?(?/m)) && !result[0][1..].match?(/(?<])/, '')[1..-2], + [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]), text.delete_prefix(result[0]).lstrip] elsif (result = text.match(/\A\S+/)) && !result[0].start_with?('<') && result && balanced?(result[0]) - [result[0], + [process_destination(result[0]), text.delete_prefix(result[0]).lstrip] else [nil, text] @@ -95,15 +96,15 @@ module PointBlank def read_title(text) if text.start_with?("'") && (result = text.match(/\A'.*?(??@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + URI.encode_uri_component( + MMMD::EntityUtils.decode_entities(string) + ) + end + + # Process title string + # @param string [String] + # @return [String] + def process_title(string) + string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + MMMD::EntityUtils.decode_entities(string) + end end class LineScanner diff --git a/lib/mmmd/renderers.rb b/lib/mmmd/renderers.rb index 7cbe4b5..b6a8b9d 100644 --- a/lib/mmmd/renderers.rb +++ b/lib/mmmd/renderers.rb @@ -1,9 +1,11 @@ # frozen_string_literal: true -module RBMark +$LOAD_PATH.append(__dir__) + +module MMMD # Renderers from Markdown to expected output format module Renderers + autoload :HTML, 'renderers/html' + autoload :PlainTerm, 'renderers/plainterm' end end - -require_relative 'renderers/html' diff --git a/lib/mmmd/renderers/html.rb b/lib/mmmd/renderers/html.rb index 4f771c9..9aa5f49 100644 --- a/lib/mmmd/renderers/html.rb +++ b/lib/mmmd/renderers/html.rb @@ -7,7 +7,8 @@ module MMMD module HTMLConstants ELEMENT_MAP = { "PointBlank::DOM::InlinePre" => { - tag: "pre" + tag: "code", + style: "white-space: pre;" }, "PointBlank::DOM::InlineBreak" => { tag: "br" @@ -26,12 +27,14 @@ module MMMD tag: "s" }, "PointBlank::DOM::InlineLink" => { - tag: "link", + tag: "a", href: true }, "PointBlank::DOM::InlineImage" => { tag: "img", - src: true + src: true, + inline: true, + alt: true }, "PointBlank::DOM::ULBlock" => { tag: "ul" @@ -88,7 +91,8 @@ module MMMD tag: "blockquote" }, "PointBlank::DOM::HorizontalRule" => { - tag: "hr" + tag: "hr", + inline: true }, "PointBlank::DOM::Text" => { sanitize: true @@ -128,41 +132,80 @@ module MMMD def initialize(dom, options) @document = dom @options = options + @options["linewrap"] ||= 80 @options["init_level"] ||= 2 @options["indent"] ||= 2 mapmanager = HTMLConstants::MapManager.new(options) @mapping = mapmanager.mapping + return unless @options["nowrap"] + + @options["init_level"] = 0 + @mapping.delete("PointBlank::DOM::Document") end # Render document to HTML def render text = _render(@document, @options, level: @options["init_level"]) @options["init_level"].times { text = indent(text) } - [ - preambule, - text, - postambule - ].join("\n") + if @options["nowrap"] + text + else + [ + preambule, + remove_pre_spaces(text), + postambule + ].join("\n") + end end private + # Find and remove extra spaces inbetween preformatted text + # @param string [String] + # @return [String] + def remove_pre_spaces(string) + output = [] + buffer = [] + open = nil + string.lines.each do |line| + opentoken = line.match?(/
/) + closetoken = line.match?(/<\/pre>/) + if closetoken + open = false + buffer = strip_leading_spaces_in_buffer(buffer) + output.append(*buffer) + buffer = [] + end + (open ? buffer : output).append(line) + open = true if opentoken && !closetoken + end + output.append(*buffer) unless buffer.empty? + output.join('') + end + + # Strip leading spaces in the buffer + # @param lines [Array] + # @return [Array ] + def strip_leading_spaces_in_buffer(buffer) + minprefix = buffer.map { |x| x.match(/^ */)[0] } + .min_by(&:length) + buffer.map do |line| + line.delete_prefix(minprefix) + end + end + # Word wrapping algorithm # @param text [String] # @param width [Integer] # @return [String] def wordwrap(text, width) - words = text.split(/( +)/) + words = text.split(/( +|<[^>]+>)/) output = [] line = "" length = 0 until words.empty? word = words.shift wordlength = word.length - if wordlength > width - words.prepend(word[width..]) - word = word[..width - 1] - end if length + wordlength + 1 > width output.append(line.lstrip) line = word @@ -176,7 +219,7 @@ module MMMD output.join("\n") end - def _render(element, options, inline: false, level: 0) + def _render(element, options, inline: false, level: 0, literaltext: false) modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) || element.is_a?(::PointBlank::DOM::Paragraph) inline ||= modeswitch @@ -184,24 +227,39 @@ module MMMD text = if element.children.empty? element.content else + literal = @mapping[element.class.name][:inline] || literaltext element.children.map do |child| _render(child, options, inline: inline, - level: level) + level: level, + literaltext: literal) end.join(inline ? '' : "\n") end run_filters(text, element, level: level, inline: inline, - modeswitch: modeswitch) + modeswitch: modeswitch, + literaltext: literaltext) end - def run_filters(text, element, level:, inline:, modeswitch:) + def run_filters(text, element, level:, inline:, modeswitch:, + literaltext:) element_style = @mapping[element.class.name] - hsize = 80 - (level * @options["indent"]) + return text unless element_style + return text if literaltext + + hsize = @options["linewrap"] - (level * @options["indent"]) text = wordwrap(text, hsize) if modeswitch if element_style[:sanitize] text = MMMD::EntityUtils.encode_entities(text) end - opentag, closetag = construct_tags(element_style) + if element_style[:inline] + innerclose(element, element_style, text) + else + openclose(text, element, element_style, inline) + end + end + + def openclose(text, element, element_style, inline) + opentag, closetag = construct_tags(element_style, element) if inline opentag + text + closetag else @@ -211,21 +269,54 @@ module MMMD end end - def construct_tags(style) + def innerclose(element, style, text) + props = element.properties + tag = "<#{style[:tag]}" + tag += " style=#{style[:style].inspect}" if style[:style] + tag += " href=#{read_link(element)}" if style[:href] + tag += " alt=#{text.inspect}" if style[:alt] + tag += " src=#{read_link(element)}" if style[:src] + tag += " title=#{read_title(element)}" if style[:title] && props[:title] + tag += ">" + if style[:outer] + outeropen, outerclose = construct_tags(style[:outer], element) + tag = outeropen + tag + outerclose + end + tag + end + + def construct_tags(style, element) return ["", ""] unless style && style[:tag] + props = element.properties opentag = "<#{style[:tag]}" closetag = "#{style[:tag]}>" - opentag += " style=#{style[:style].dump}" if style["style"] + opentag += " style=#{style[:style].inspect}" if style[:style] + opentag += " href=#{read_link(element)}" if style[:href] + opentag += " src=#{read_link(element)}" if style[:src] + opentag += " title=#{read_title(element)}" if style[:title] && + props[:title] opentag += ">" if style[:outer] - outeropen, outerclose = construct_tags(style[:outer]) + outeropen, outerclose = construct_tags(style[:outer], element) opentag = outeropen + opentag closetag += outerclose end [opentag, closetag] end + def read_title(element) + title = element.properties[:title] + title = MMMD::EntityUtils.encode_entities(title) + title.dump + end + + def read_link(element) + link = element.properties[:uri] + link = MMMD::EntityUtils.encode_entities(link) + link.dump + end + def indent(text) text.lines.map do |line| "#{' ' * @options["indent"]}#{line}" @@ -233,13 +324,17 @@ module MMMD end def preambule + head = @options['head'] + headinfo = "#{indent(<<~HEAD.rstrip)}\n " if head + + #{head.is_a?(Array) ? head.join("\n") : head} + + HEAD + headinfo ||= " " @options['preambule'] or <<~TEXT.rstrip - - #{@options['head']} - - + #{headinfo} TEXT end diff --git a/lib/mmmd/renderers/plainterm.rb b/lib/mmmd/renderers/plainterm.rb index 27dec04..8ad1f48 100644 --- a/lib/mmmd/renderers/plainterm.rb +++ b/lib/mmmd/renderers/plainterm.rb @@ -312,6 +312,9 @@ module MMMD "PointBlank::DOM::QuoteBlock" => { leftline: true, increase_level: true + }, + "PointBlank::DOM::HorizontalRule" => { + underline_full_block: true } }.freeze diff --git a/lib/mmmd/util.rb b/lib/mmmd/util.rb index 5b15cf0..f9efe09 100644 --- a/lib/mmmd/util.rb +++ b/lib/mmmd/util.rb @@ -22,6 +22,21 @@ module MMMD end end + # Encode unsafe html entities in string (ASCII-compatible) + # @param string [String] + # @return [String] + # @sg-ignore + def self.encode_entities_ascii(string) + string.gsub("&", "&") + .gsub("<", "<") + .gsub(">", ">") + .gsub('"', """) + .gsub("'", "'") + .gsub(/[^\x00-\x7F]/) do |match| + "#{match.codepoints[0]};" + end + end + # Encode unsafe html entities in string # @param string [String] # @return [String] @@ -32,9 +47,6 @@ module MMMD .gsub(">", ">") .gsub('"', """) .gsub("'", "'") - .gsub(/[^\x00-\x7F]/) do |match| - "#{match.codepoints[0]};" - end end end end diff --git a/lib/rubymark b/lib/rubymark deleted file mode 120000 index 5f5df8f..0000000 --- a/lib/rubymark +++ /dev/null @@ -1 +0,0 @@ -mmmd \ No newline at end of file