From 1fb5f15eadce697ead8f131864ae56edf22cb7fb Mon Sep 17 00:00:00 2001
From: Yessiest <yessiest@memeware.net>
Date: Fri, 7 Mar 2025 21:29:24 +0000
Subject: [PATCH] HTML renderer fixes, additional compliance

---
 lib/mmmd.rb                     |  14 +++
 lib/mmmd/blankshell.rb          |  35 ++++++--
 lib/mmmd/renderers.rb           |   8 +-
 lib/mmmd/renderers/html.rb      | 149 ++++++++++++++++++++++++++------
 lib/mmmd/renderers/plainterm.rb |   3 +
 lib/mmmd/util.rb                |  18 +++-
 lib/rubymark                    |   1 -
 7 files changed, 188 insertions(+), 40 deletions(-)
 create mode 100644 lib/mmmd.rb
 delete mode 120000 lib/rubymark

diff --git a/lib/mmmd.rb b/lib/mmmd.rb
new file mode 100644
index 0000000..8eada91
--- /dev/null
+++ b/lib/mmmd.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+require_relative 'mmmd/blankshell'
+require_relative 'mmmd/renderers'
+
+# Extensible, multi-format markdown processor
+module MMMD
+  # Parse a Markdown document into a DOM form
+  # @param doc [String]
+  # @return [::PointBlank::DOM::Document]
+  def self.parse(doc)
+    ::PointBlank::DOM::Document.parse(doc)
+  end
+end
diff --git a/lib/mmmd/blankshell.rb b/lib/mmmd/blankshell.rb
index c473e35..01a57f6 100644
--- a/lib/mmmd/blankshell.rb
+++ b/lib/mmmd/blankshell.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require_relative 'util'
+require 'uri'
 
 # Modular, extensible approach to parsing markdown as close as
 # it gets to CommonMark spec (as of version 0.31.2).
@@ -75,13 +76,13 @@ module PointBlank
       def read_destination(text)
         if (result = text.match(/\A<.*?(?<![^\\]\\)>/m)) &&
            !result[0][1..].match?(/(?<![^\\]\\)</)
-          [result[0].gsub(/\\(?=[><])/, '')[1..-2],
+          [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]),
            text.delete_prefix(result[0]).lstrip]
         elsif (result = text.match(/\A\S+/)) &&
               !result[0].start_with?('<') &&
               result &&
               balanced?(result[0])
-          [result[0],
+          [process_destination(result[0]),
            text.delete_prefix(result[0]).lstrip]
         else
           [nil, text]
@@ -95,15 +96,15 @@ module PointBlank
       def read_title(text)
         if text.start_with?("'") &&
            (result = text.match(/\A'.*?(?<!\\)'/m))
-          [result[0][1..-2],
+          [process_title(result[0][1..-2]),
            text.delete_prefix(result[0]).lstrip]
         elsif text.start_with?('"') &&
               (result = text.match(/\A".*?(?<!\\)"/m))
-          [result[0][1..-2],
+          [process_title(result[0][1..-2]),
            text.delete_prefix(result[0]).lstrip]
         elsif text.start_with?('(') &&
               (result = find_balanced_end(text))
-          [text[1..(result - 1)],
+          [process_title(text[1..(result - 1)]),
            text.delete_prefix(text[..result]).lstrip]
         else
           [nil, text]
@@ -125,7 +126,7 @@ module PointBlank
           return [nil, text] unless destination
 
           title, remaining = read_title(remaining)
-          properties[:destination] = destination
+          properties[:uri] = destination
           properties[:title] = title
           close_bracket = true
         end
@@ -169,6 +170,28 @@ module PointBlank
         end
         nil
       end
+
+      # Process destination string
+      # @param string [String]
+      # @return [String]
+      def process_destination(string)
+        string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
+                             '\\1')
+        string = string.gsub("\n", " ")
+        URI.encode_uri_component(
+          MMMD::EntityUtils.decode_entities(string)
+        )
+      end
+
+      # Process title string
+      # @param string [String]
+      # @return [String]
+      def process_title(string)
+        string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
+                             '\\1')
+        string = string.gsub("\n", " ")
+        MMMD::EntityUtils.decode_entities(string)
+      end
     end
 
     class LineScanner
diff --git a/lib/mmmd/renderers.rb b/lib/mmmd/renderers.rb
index 7cbe4b5..b6a8b9d 100644
--- a/lib/mmmd/renderers.rb
+++ b/lib/mmmd/renderers.rb
@@ -1,9 +1,11 @@
 # frozen_string_literal: true
 
-module RBMark
+$LOAD_PATH.append(__dir__)
+
+module MMMD
   # Renderers from Markdown to expected output format
   module Renderers
+    autoload :HTML, 'renderers/html'
+    autoload :PlainTerm, 'renderers/plainterm'
   end
 end
-
-require_relative 'renderers/html'
diff --git a/lib/mmmd/renderers/html.rb b/lib/mmmd/renderers/html.rb
index 4f771c9..9aa5f49 100644
--- a/lib/mmmd/renderers/html.rb
+++ b/lib/mmmd/renderers/html.rb
@@ -7,7 +7,8 @@ module MMMD
     module HTMLConstants
       ELEMENT_MAP = {
         "PointBlank::DOM::InlinePre" => {
-          tag: "pre"
+          tag: "code",
+          style: "white-space: pre;"
         },
         "PointBlank::DOM::InlineBreak" => {
           tag: "br"
@@ -26,12 +27,14 @@ module MMMD
           tag: "s"
         },
         "PointBlank::DOM::InlineLink" => {
-          tag: "link",
+          tag: "a",
           href: true
         },
         "PointBlank::DOM::InlineImage" => {
           tag: "img",
-          src: true
+          src: true,
+          inline: true,
+          alt: true
         },
         "PointBlank::DOM::ULBlock" => {
           tag: "ul"
@@ -88,7 +91,8 @@ module MMMD
           tag: "blockquote"
         },
         "PointBlank::DOM::HorizontalRule" => {
-          tag: "hr"
+          tag: "hr",
+          inline: true
         },
         "PointBlank::DOM::Text" => {
           sanitize: true
@@ -128,41 +132,80 @@ module MMMD
       def initialize(dom, options)
         @document = dom
         @options = options
+        @options["linewrap"] ||= 80
         @options["init_level"] ||= 2
         @options["indent"] ||= 2
         mapmanager = HTMLConstants::MapManager.new(options)
         @mapping = mapmanager.mapping
+        return unless @options["nowrap"]
+
+        @options["init_level"] = 0
+        @mapping.delete("PointBlank::DOM::Document")
       end
 
       # Render document to HTML
       def render
         text = _render(@document, @options, level: @options["init_level"])
         @options["init_level"].times { text = indent(text) }
-        [
-          preambule,
-          text,
-          postambule
-        ].join("\n")
+        if @options["nowrap"]
+          text
+        else
+          [
+            preambule,
+            remove_pre_spaces(text),
+            postambule
+          ].join("\n")
+        end
       end
 
       private
 
+      # Find and remove extra spaces inbetween preformatted text
+      # @param string [String]
+      # @return [String]
+      def remove_pre_spaces(string)
+        output = []
+        buffer = []
+        open = nil
+        string.lines.each do |line|
+          opentoken = line.match?(/<pre>/)
+          closetoken = line.match?(/<\/pre>/)
+          if closetoken
+            open = false
+            buffer = strip_leading_spaces_in_buffer(buffer)
+            output.append(*buffer)
+            buffer = []
+          end
+          (open ? buffer : output).append(line)
+          open = true if opentoken && !closetoken
+        end
+        output.append(*buffer) unless buffer.empty?
+        output.join('')
+      end
+
+      # Strip leading spaces in the buffer
+      # @param lines [Array<String>]
+      # @return [Array<String>]
+      def strip_leading_spaces_in_buffer(buffer)
+        minprefix = buffer.map { |x| x.match(/^ */)[0] }
+                          .min_by(&:length)
+        buffer.map do |line|
+          line.delete_prefix(minprefix)
+        end
+      end
+
       # Word wrapping algorithm
       # @param text [String]
       # @param width [Integer]
       # @return [String]
       def wordwrap(text, width)
-        words = text.split(/( +)/)
+        words = text.split(/( +|<[^>]+>)/)
         output = []
         line = ""
         length = 0
         until words.empty?
           word = words.shift
           wordlength = word.length
-          if wordlength > width
-            words.prepend(word[width..])
-            word = word[..width - 1]
-          end
           if length + wordlength + 1 > width
             output.append(line.lstrip)
             line = word
@@ -176,7 +219,7 @@ module MMMD
         output.join("\n")
       end
 
-      def _render(element, options, inline: false, level: 0)
+      def _render(element, options, inline: false, level: 0, literaltext: false)
         modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
                      element.is_a?(::PointBlank::DOM::Paragraph)
         inline ||= modeswitch
@@ -184,24 +227,39 @@ module MMMD
         text = if element.children.empty?
                  element.content
                else
+                 literal = @mapping[element.class.name][:inline] || literaltext
                  element.children.map do |child|
                    _render(child, options, inline: inline,
-                                           level: level)
+                                           level: level,
+                                           literaltext: literal)
                  end.join(inline ? '' : "\n")
                end
         run_filters(text, element, level: level,
                                    inline: inline,
-                                   modeswitch: modeswitch)
+                                   modeswitch: modeswitch,
+                                   literaltext: literaltext)
       end
 
-      def run_filters(text, element, level:, inline:, modeswitch:)
+      def run_filters(text, element, level:, inline:, modeswitch:,
+                                     literaltext:)
         element_style = @mapping[element.class.name]
-        hsize = 80 - (level * @options["indent"])
+        return text unless element_style
+        return text if literaltext
+
+        hsize = @options["linewrap"] - (level * @options["indent"])
         text = wordwrap(text, hsize) if modeswitch
         if element_style[:sanitize]
           text = MMMD::EntityUtils.encode_entities(text)
         end
-        opentag, closetag = construct_tags(element_style)
+        if element_style[:inline]
+          innerclose(element, element_style, text)
+        else
+          openclose(text, element, element_style, inline)
+        end
+      end
+
+      def openclose(text, element, element_style, inline)
+        opentag, closetag = construct_tags(element_style, element)
         if inline
           opentag + text + closetag
         else
@@ -211,21 +269,54 @@ module MMMD
         end
       end
 
-      def construct_tags(style)
+      def innerclose(element, style, text)
+        props = element.properties
+        tag = "<#{style[:tag]}"
+        tag += " style=#{style[:style].inspect}" if style[:style]
+        tag += " href=#{read_link(element)}" if style[:href]
+        tag += " alt=#{text.inspect}" if style[:alt]
+        tag += " src=#{read_link(element)}" if style[:src]
+        tag += " title=#{read_title(element)}" if style[:title] && props[:title]
+        tag += ">"
+        if style[:outer]
+          outeropen, outerclose = construct_tags(style[:outer], element)
+          tag = outeropen + tag + outerclose
+        end
+        tag
+      end
+
+      def construct_tags(style, element)
         return ["", ""] unless style && style[:tag]
 
+        props = element.properties
         opentag = "<#{style[:tag]}"
         closetag = "</#{style[:tag]}>"
-        opentag += " style=#{style[:style].dump}" if style["style"]
+        opentag += " style=#{style[:style].inspect}" if style[:style]
+        opentag += " href=#{read_link(element)}" if style[:href]
+        opentag += " src=#{read_link(element)}" if style[:src]
+        opentag += " title=#{read_title(element)}" if style[:title] &&
+                                                      props[:title]
         opentag += ">"
         if style[:outer]
-          outeropen, outerclose = construct_tags(style[:outer])
+          outeropen, outerclose = construct_tags(style[:outer], element)
           opentag = outeropen + opentag
           closetag += outerclose
         end
         [opentag, closetag]
       end
 
+      def read_title(element)
+        title = element.properties[:title]
+        title = MMMD::EntityUtils.encode_entities(title)
+        title.dump
+      end
+
+      def read_link(element)
+        link = element.properties[:uri]
+        link = MMMD::EntityUtils.encode_entities(link)
+        link.dump
+      end
+
       def indent(text)
         text.lines.map do |line|
           "#{' ' * @options["indent"]}#{line}"
@@ -233,13 +324,17 @@ module MMMD
       end
 
       def preambule
+        head = @options['head']
+        headinfo = "#{indent(<<~HEAD.rstrip)}\n  " if head
+          <head>
+            #{head.is_a?(Array) ? head.join("\n") : head}
+          </head>
+        HEAD
+        headinfo ||= "  "
         @options['preambule'] or <<~TEXT.rstrip
           <!DOCTYPE HTML>
           <html>
-            <head>
-              #{@options['head']}
-            </head>
-            <body>
+          #{headinfo}<body>
         TEXT
       end
 
diff --git a/lib/mmmd/renderers/plainterm.rb b/lib/mmmd/renderers/plainterm.rb
index 27dec04..8ad1f48 100644
--- a/lib/mmmd/renderers/plainterm.rb
+++ b/lib/mmmd/renderers/plainterm.rb
@@ -312,6 +312,9 @@ module MMMD
         "PointBlank::DOM::QuoteBlock" => {
           leftline: true,
           increase_level: true
+        },
+        "PointBlank::DOM::HorizontalRule" => {
+          underline_full_block: true
         }
       }.freeze
 
diff --git a/lib/mmmd/util.rb b/lib/mmmd/util.rb
index 5b15cf0..f9efe09 100644
--- a/lib/mmmd/util.rb
+++ b/lib/mmmd/util.rb
@@ -22,6 +22,21 @@ module MMMD
       end
     end
 
+    # Encode unsafe html entities in string (ASCII-compatible)
+    # @param string [String]
+    # @return [String]
+    # @sg-ignore
+    def self.encode_entities_ascii(string)
+      string.gsub("&", "&amp;")
+            .gsub("<", "&lt;")
+            .gsub(">", "&gt;")
+            .gsub('"', "&quot;")
+            .gsub("'", "&#39;")
+            .gsub(/[^\x00-\x7F]/) do |match|
+              "&#x#{match.codepoints[0]};"
+            end
+    end
+
     # Encode unsafe html entities in string
     # @param string [String]
     # @return [String]
@@ -32,9 +47,6 @@ module MMMD
             .gsub(">", "&gt;")
             .gsub('"', "&quot;")
             .gsub("'", "&#39;")
-            .gsub(/[^\x00-\x7F]/) do |match|
-              "&#x#{match.codepoints[0]};"
-            end
     end
   end
 end
diff --git a/lib/rubymark b/lib/rubymark
deleted file mode 120000
index 5f5df8f..0000000
--- a/lib/rubymark
+++ /dev/null
@@ -1 +0,0 @@
-mmmd
\ No newline at end of file