compatibility fixes for emphasis

2025-03-13 10:19:44 +04:00 · 2025-03-13 10:19:44 +04:00 · 0863d4cf4a
parent f03f8dfa29
commit 0863d4cf4a
2 changed files with 59 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 /lib/example.md
 /*.gem
+/.yardoc
+test*
--- a/lib/mmmd/blankshell.rb
+++ b/lib/mmmd/blankshell.rb
@ -367,6 +367,7 @@ module PointBlank

      # (see ::PointBlank::Parsing::NullParser#consume)
      def consume(line, parent = nil, lazy: false)
+        puts line.inspect if lazy
        @lazy_triggered = lazy || @lazy_triggered
        return [nil, nil] if line.match?(/\A {0,3}\Z/)
        return [nil, nil] if @closed
@ -957,9 +958,9 @@ module PointBlank
        parts = tokens
        @valid_parsers.each do |parser|
          newparts = []
-          parts.each do |x|
+          parts.each_with_index do |x, i|
            if x.is_a? String
-              newparts.append(*parser.tokenize(x))
+              newparts.append(*parser.tokenize(x, newparts.last, parts[i + 1]))
            else
              newparts.append(x)
            end
@ -1013,8 +1014,10 @@ module PointBlank

      # Tokenize a string
      # @param string [String]
+      # @param before [String, ::PointBlank::DOM::DOMObject]
+      # @param after [String, ::PointBlank::DOM::DOMObject]
      # @return [Array<Array(String, Class, Symbol), String>]
-      def self.tokenize(string)
+      def self.tokenize(string, before, after)
        [string]
      end

@ -1142,7 +1145,7 @@ module PointBlank
    # Code inline parser
    class CodeInline < NullInline
      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, *_lookaround)
        open = {}
        iterate_tokens(string, "`") do |_before, current_text, matched|
          if matched
@ -1182,7 +1185,7 @@ module PointBlank
    # Autolink inline parser
    class AutolinkInline < NullInline
      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, *_lookaround)
        iterate_tokens(string, /[<>]/) do |_before, current_text, matched|
          if matched
            if current_text.start_with?("<")
@ -1238,11 +1241,10 @@ module PointBlank
        linkinfo = capture[-1][2]
        obj = build(capture[1..-2])
        if linkinfo[:label]
-          if (props = doc.root.properties[:linkdefs][linkinfo[:label]])
-            linkinfo = props
-          else
+          unless (props = doc.root.properties[:linkdefs][linkinfo[:label]])
            return nil
          end
+          linkinfo = props
        end
        obj.properties = linkinfo
        obj
@ -1277,7 +1279,7 @@ module PointBlank
      end

      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, *_lookaround)
        iterate_tokens(string, /(?:!\[|\]\()/) do |_before, text, matched|
          next text[0] unless matched
          next ["![", self, :open] if text.start_with? "!["
@ -1296,7 +1298,7 @@ module PointBlank
      end

      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, *_lookaround)
        iterate_tokens(string, /(?:\[|\][(\[])/) do |_before, text, matched|
          next text[0] unless matched
          next ["[", self, :open] if text.start_with? "["
@ -1308,20 +1310,61 @@ module PointBlank
      end
    end

+    # TODO: this seems way too complicated for something that's supposed
+    # to be a goddamn emphasis markup parser. i'd blame it on commonmark's
+    # convoluted specs.
+    # (P.S: it could be possible to make this easier for implementers by
+    # making a claims system with pointers that do not modify the string
+    # while it's being parsed. however that would just move complexity from
+    # the parser into the scanner instead. and it does not resolve the
+    # problem of overlapping claims as efficiently as simply splitting text
+    # into tokens and remaining string bits.)
+
    # Emphasis and strong emphasis inline parser
    class EmphInline < NullInline
      INFIX_TOKENS = /^[^\p{S}\p{P}\p{Zs}_]_++[^\p{S}\p{P}\p{Zs}_]$/
      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, before, after)
+        bfrb = extract_left(before)
+        afra = extract_right(after)
        iterate_tokens(string, /(?:_++|\*++)/) do |bfr, text, matched|
          token, afr = text.match(/^(_++|\*++)(.?)/)[1..2]
-          left = left_token?(bfr[-1] || "", token, afr)
-          right = right_token?(bfr[-1] || "", token, afr)
+          bfr = bfr[-1] || bfrb || ""
+          afr = afr.empty? ? afra || "" : afr
+          left = left_token?(bfr, token, afr)
+          right = right_token?(bfr, token, afr)
          break_into_elements(token, [bfr[-1] || "", token, afr].join(''),
                              left, right, matched)
        end
      end

+      # Extract left-flanking token from before the tokenized string
+      # @param bfr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)]
+      # @return [String]
+      def self.extract_left(bfr)
+        case bfr
+        when String
+          bfr[-1]
+        when ::PointBlank::DOM::DOMObject
+          "."
+        when Array
+          bfr.first[-1]
+        end
+      end
+
+      # Extract right-flanking token from after the tokenized string
+      # @param afr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)]
+      # @return [String]
+      def self.extract_right(afr)
+        case afr
+        when String
+          afr[0]
+        when ::PointBlank::DOM::DOMObject
+          "."
+        when Array
+          afr.first[0]
+        end
+      end
      # Is this token, given these surrounding characters, left-flanking?
      # @param bfr [String]
      # @param token [String]
@ -1431,7 +1474,7 @@ module PointBlank
    # Hard break
    class HardBreakInline < NullInline
      # (see ::PointBlank::Parsing::NullInline#tokenize)
-      def self.tokenize(string)
+      def self.tokenize(string, *_lookaround)
        iterate_tokens(string, /(?:  \n|\\\n)/) do |_before, token, matched|
          next ["\n", self, :close] if token.start_with?("  \n")
          next ["\n", self, :close] if matched