diff --git a/lib/mmmd.rb b/lib/mmmd.rb new file mode 100644 index 0000000..8eada91 --- /dev/null +++ b/lib/mmmd.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +require_relative 'mmmd/blankshell' +require_relative 'mmmd/renderers' + +# Extensible, multi-format markdown processor +module MMMD + # Parse a Markdown document into a DOM form + # @param doc [String] + # @return [::PointBlank::DOM::Document] + def self.parse(doc) + ::PointBlank::DOM::Document.parse(doc) + end +end diff --git a/lib/mmmd/blankshell.rb b/lib/mmmd/blankshell.rb index c473e35..01a57f6 100644 --- a/lib/mmmd/blankshell.rb +++ b/lib/mmmd/blankshell.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative 'util' +require 'uri' # Modular, extensible approach to parsing markdown as close as # it gets to CommonMark spec (as of version 0.31.2). @@ -75,13 +76,13 @@ module PointBlank def read_destination(text) if (result = text.match(/\A<.*?(?/m)) && !result[0][1..].match?(/(?<])/, '')[1..-2], + [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]), text.delete_prefix(result[0]).lstrip] elsif (result = text.match(/\A\S+/)) && !result[0].start_with?('<') && result && balanced?(result[0]) - [result[0], + [process_destination(result[0]), text.delete_prefix(result[0]).lstrip] else [nil, text] @@ -95,15 +96,15 @@ module PointBlank def read_title(text) if text.start_with?("'") && (result = text.match(/\A'.*?(??@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + URI.encode_uri_component( + MMMD::EntityUtils.decode_entities(string) + ) + end + + # Process title string + # @param string [String] + # @return [String] + def process_title(string) + string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/, + '\\1') + string = string.gsub("\n", " ") + MMMD::EntityUtils.decode_entities(string) + end end class LineScanner diff --git a/lib/mmmd/renderers.rb b/lib/mmmd/renderers.rb index 7cbe4b5..b6a8b9d 100644 --- a/lib/mmmd/renderers.rb +++ b/lib/mmmd/renderers.rb @@ -1,9 +1,11 @@ # frozen_string_literal: true -module RBMark +$LOAD_PATH.append(__dir__) + +module MMMD # Renderers from Markdown to expected output format module Renderers + autoload :HTML, 'renderers/html' + autoload :PlainTerm, 'renderers/plainterm' end end - -require_relative 'renderers/html' diff --git a/lib/mmmd/renderers/html.rb b/lib/mmmd/renderers/html.rb index 4f771c9..9aa5f49 100644 --- a/lib/mmmd/renderers/html.rb +++ b/lib/mmmd/renderers/html.rb @@ -7,7 +7,8 @@ module MMMD module HTMLConstants ELEMENT_MAP = { "PointBlank::DOM::InlinePre" => { - tag: "pre" + tag: "code", + style: "white-space: pre;" }, "PointBlank::DOM::InlineBreak" => { tag: "br" @@ -26,12 +27,14 @@ module MMMD tag: "s" }, "PointBlank::DOM::InlineLink" => { - tag: "link", + tag: "a", href: true }, "PointBlank::DOM::InlineImage" => { tag: "img", - src: true + src: true, + inline: true, + alt: true }, "PointBlank::DOM::ULBlock" => { tag: "ul" @@ -88,7 +91,8 @@ module MMMD tag: "blockquote" }, "PointBlank::DOM::HorizontalRule" => { - tag: "hr" + tag: "hr", + inline: true }, "PointBlank::DOM::Text" => { sanitize: true @@ -128,41 +132,80 @@ module MMMD def initialize(dom, options) @document = dom @options = options + @options["linewrap"] ||= 80 @options["init_level"] ||= 2 @options["indent"] ||= 2 mapmanager = HTMLConstants::MapManager.new(options) @mapping = mapmanager.mapping + return unless @options["nowrap"] + + @options["init_level"] = 0 + @mapping.delete("PointBlank::DOM::Document") end # Render document to HTML def render text = _render(@document, @options, level: @options["init_level"]) @options["init_level"].times { text = indent(text) } - [ - preambule, - text, - postambule - ].join("\n") + if @options["nowrap"] + text + else + [ + preambule, + remove_pre_spaces(text), + postambule + ].join("\n") + end end private + # Find and remove extra spaces inbetween preformatted text + # @param string [String] + # @return [String] + def remove_pre_spaces(string) + output = [] + buffer = [] + open = nil + string.lines.each do |line| + opentoken = line.match?(/
/)
+          closetoken = line.match?(/<\/pre>/)
+          if closetoken
+            open = false
+            buffer = strip_leading_spaces_in_buffer(buffer)
+            output.append(*buffer)
+            buffer = []
+          end
+          (open ? buffer : output).append(line)
+          open = true if opentoken && !closetoken
+        end
+        output.append(*buffer) unless buffer.empty?
+        output.join('')
+      end
+
+      # Strip leading spaces in the buffer
+      # @param lines [Array]
+      # @return [Array]
+      def strip_leading_spaces_in_buffer(buffer)
+        minprefix = buffer.map { |x| x.match(/^ */)[0] }
+                          .min_by(&:length)
+        buffer.map do |line|
+          line.delete_prefix(minprefix)
+        end
+      end
+
       # Word wrapping algorithm
       # @param text [String]
       # @param width [Integer]
       # @return [String]
       def wordwrap(text, width)
-        words = text.split(/( +)/)
+        words = text.split(/( +|<[^>]+>)/)
         output = []
         line = ""
         length = 0
         until words.empty?
           word = words.shift
           wordlength = word.length
-          if wordlength > width
-            words.prepend(word[width..])
-            word = word[..width - 1]
-          end
           if length + wordlength + 1 > width
             output.append(line.lstrip)
             line = word
@@ -176,7 +219,7 @@ module MMMD
         output.join("\n")
       end
 
-      def _render(element, options, inline: false, level: 0)
+      def _render(element, options, inline: false, level: 0, literaltext: false)
         modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
                      element.is_a?(::PointBlank::DOM::Paragraph)
         inline ||= modeswitch
@@ -184,24 +227,39 @@ module MMMD
         text = if element.children.empty?
                  element.content
                else
+                 literal = @mapping[element.class.name][:inline] || literaltext
                  element.children.map do |child|
                    _render(child, options, inline: inline,
-                                           level: level)
+                                           level: level,
+                                           literaltext: literal)
                  end.join(inline ? '' : "\n")
                end
         run_filters(text, element, level: level,
                                    inline: inline,
-                                   modeswitch: modeswitch)
+                                   modeswitch: modeswitch,
+                                   literaltext: literaltext)
       end
 
-      def run_filters(text, element, level:, inline:, modeswitch:)
+      def run_filters(text, element, level:, inline:, modeswitch:,
+                                     literaltext:)
         element_style = @mapping[element.class.name]
-        hsize = 80 - (level * @options["indent"])
+        return text unless element_style
+        return text if literaltext
+
+        hsize = @options["linewrap"] - (level * @options["indent"])
         text = wordwrap(text, hsize) if modeswitch
         if element_style[:sanitize]
           text = MMMD::EntityUtils.encode_entities(text)
         end
-        opentag, closetag = construct_tags(element_style)
+        if element_style[:inline]
+          innerclose(element, element_style, text)
+        else
+          openclose(text, element, element_style, inline)
+        end
+      end
+
+      def openclose(text, element, element_style, inline)
+        opentag, closetag = construct_tags(element_style, element)
         if inline
           opentag + text + closetag
         else
@@ -211,21 +269,54 @@ module MMMD
         end
       end
 
-      def construct_tags(style)
+      def innerclose(element, style, text)
+        props = element.properties
+        tag = "<#{style[:tag]}"
+        tag += " style=#{style[:style].inspect}" if style[:style]
+        tag += " href=#{read_link(element)}" if style[:href]
+        tag += " alt=#{text.inspect}" if style[:alt]
+        tag += " src=#{read_link(element)}" if style[:src]
+        tag += " title=#{read_title(element)}" if style[:title] && props[:title]
+        tag += ">"
+        if style[:outer]
+          outeropen, outerclose = construct_tags(style[:outer], element)
+          tag = outeropen + tag + outerclose
+        end
+        tag
+      end
+
+      def construct_tags(style, element)
         return ["", ""] unless style && style[:tag]
 
+        props = element.properties
         opentag = "<#{style[:tag]}"
         closetag = ""
-        opentag += " style=#{style[:style].dump}" if style["style"]
+        opentag += " style=#{style[:style].inspect}" if style[:style]
+        opentag += " href=#{read_link(element)}" if style[:href]
+        opentag += " src=#{read_link(element)}" if style[:src]
+        opentag += " title=#{read_title(element)}" if style[:title] &&
+                                                      props[:title]
         opentag += ">"
         if style[:outer]
-          outeropen, outerclose = construct_tags(style[:outer])
+          outeropen, outerclose = construct_tags(style[:outer], element)
           opentag = outeropen + opentag
           closetag += outerclose
         end
         [opentag, closetag]
       end
 
+      def read_title(element)
+        title = element.properties[:title]
+        title = MMMD::EntityUtils.encode_entities(title)
+        title.dump
+      end
+
+      def read_link(element)
+        link = element.properties[:uri]
+        link = MMMD::EntityUtils.encode_entities(link)
+        link.dump
+      end
+
       def indent(text)
         text.lines.map do |line|
           "#{' ' * @options["indent"]}#{line}"
@@ -233,13 +324,17 @@ module MMMD
       end
 
       def preambule
+        head = @options['head']
+        headinfo = "#{indent(<<~HEAD.rstrip)}\n  " if head
+          
+            #{head.is_a?(Array) ? head.join("\n") : head}
+          
+        HEAD
+        headinfo ||= "  "
         @options['preambule'] or <<~TEXT.rstrip
           
           
-            
-              #{@options['head']}
-            
-            
+          #{headinfo}
         TEXT
       end
 
diff --git a/lib/mmmd/renderers/plainterm.rb b/lib/mmmd/renderers/plainterm.rb
index 27dec04..8ad1f48 100644
--- a/lib/mmmd/renderers/plainterm.rb
+++ b/lib/mmmd/renderers/plainterm.rb
@@ -312,6 +312,9 @@ module MMMD
         "PointBlank::DOM::QuoteBlock" => {
           leftline: true,
           increase_level: true
+        },
+        "PointBlank::DOM::HorizontalRule" => {
+          underline_full_block: true
         }
       }.freeze
 
diff --git a/lib/mmmd/util.rb b/lib/mmmd/util.rb
index 5b15cf0..f9efe09 100644
--- a/lib/mmmd/util.rb
+++ b/lib/mmmd/util.rb
@@ -22,6 +22,21 @@ module MMMD
       end
     end
 
+    # Encode unsafe html entities in string (ASCII-compatible)
+    # @param string [String]
+    # @return [String]
+    # @sg-ignore
+    def self.encode_entities_ascii(string)
+      string.gsub("&", "&")
+            .gsub("<", "<")
+            .gsub(">", ">")
+            .gsub('"', """)
+            .gsub("'", "'")
+            .gsub(/[^\x00-\x7F]/) do |match|
+              "&#x#{match.codepoints[0]};"
+            end
+    end
+
     # Encode unsafe html entities in string
     # @param string [String]
     # @return [String]
@@ -32,9 +47,6 @@ module MMMD
             .gsub(">", ">")
             .gsub('"', """)
             .gsub("'", "'")
-            .gsub(/[^\x00-\x7F]/) do |match|
-              "&#x#{match.codepoints[0]};"
-            end
     end
   end
 end
diff --git a/lib/rubymark b/lib/rubymark
deleted file mode 120000
index 5f5df8f..0000000
--- a/lib/rubymark
+++ /dev/null
@@ -1 +0,0 @@
-mmmd
\ No newline at end of file