diff --git a/.gitignore b/.gitignore index eef30ba..b931057 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /lib/example.md /*.gem +/.yardoc +test* diff --git a/lib/mmmd/blankshell.rb b/lib/mmmd/blankshell.rb index bd34f02..65b9b3e 100644 --- a/lib/mmmd/blankshell.rb +++ b/lib/mmmd/blankshell.rb @@ -367,6 +367,7 @@ module PointBlank # (see ::PointBlank::Parsing::NullParser#consume) def consume(line, parent = nil, lazy: false) + puts line.inspect if lazy @lazy_triggered = lazy || @lazy_triggered return [nil, nil] if line.match?(/\A {0,3}\Z/) return [nil, nil] if @closed @@ -957,9 +958,9 @@ module PointBlank parts = tokens @valid_parsers.each do |parser| newparts = [] - parts.each do |x| + parts.each_with_index do |x, i| if x.is_a? String - newparts.append(*parser.tokenize(x)) + newparts.append(*parser.tokenize(x, newparts.last, parts[i + 1])) else newparts.append(x) end @@ -1013,8 +1014,10 @@ module PointBlank # Tokenize a string # @param string [String] + # @param before [String, ::PointBlank::DOM::DOMObject] + # @param after [String, ::PointBlank::DOM::DOMObject] # @return [Array] - def self.tokenize(string) + def self.tokenize(string, before, after) [string] end @@ -1142,7 +1145,7 @@ module PointBlank # Code inline parser class CodeInline < NullInline # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, *_lookaround) open = {} iterate_tokens(string, "`") do |_before, current_text, matched| if matched @@ -1182,7 +1185,7 @@ module PointBlank # Autolink inline parser class AutolinkInline < NullInline # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, *_lookaround) iterate_tokens(string, /[<>]/) do |_before, current_text, matched| if matched if current_text.start_with?("<") @@ -1238,11 +1241,10 @@ module PointBlank linkinfo = capture[-1][2] obj = build(capture[1..-2]) if linkinfo[:label] - if (props = doc.root.properties[:linkdefs][linkinfo[:label]]) - linkinfo = props - else + unless (props = doc.root.properties[:linkdefs][linkinfo[:label]]) return nil end + linkinfo = props end obj.properties = linkinfo obj @@ -1277,7 +1279,7 @@ module PointBlank end # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, *_lookaround) iterate_tokens(string, /(?:!\[|\]\()/) do |_before, text, matched| next text[0] unless matched next ["![", self, :open] if text.start_with? "![" @@ -1296,7 +1298,7 @@ module PointBlank end # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, *_lookaround) iterate_tokens(string, /(?:\[|\][(\[])/) do |_before, text, matched| next text[0] unless matched next ["[", self, :open] if text.start_with? "[" @@ -1308,20 +1310,61 @@ module PointBlank end end + # TODO: this seems way too complicated for something that's supposed + # to be a goddamn emphasis markup parser. i'd blame it on commonmark's + # convoluted specs. + # (P.S: it could be possible to make this easier for implementers by + # making a claims system with pointers that do not modify the string + # while it's being parsed. however that would just move complexity from + # the parser into the scanner instead. and it does not resolve the + # problem of overlapping claims as efficiently as simply splitting text + # into tokens and remaining string bits.) + # Emphasis and strong emphasis inline parser class EmphInline < NullInline INFIX_TOKENS = /^[^\p{S}\p{P}\p{Zs}_]_++[^\p{S}\p{P}\p{Zs}_]$/ # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, before, after) + bfrb = extract_left(before) + afra = extract_right(after) iterate_tokens(string, /(?:_++|\*++)/) do |bfr, text, matched| token, afr = text.match(/^(_++|\*++)(.?)/)[1..2] - left = left_token?(bfr[-1] || "", token, afr) - right = right_token?(bfr[-1] || "", token, afr) + bfr = bfr[-1] || bfrb || "" + afr = afr.empty? ? afra || "" : afr + left = left_token?(bfr, token, afr) + right = right_token?(bfr, token, afr) break_into_elements(token, [bfr[-1] || "", token, afr].join(''), left, right, matched) end end + # Extract left-flanking token from before the tokenized string + # @param bfr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)] + # @return [String] + def self.extract_left(bfr) + case bfr + when String + bfr[-1] + when ::PointBlank::DOM::DOMObject + "." + when Array + bfr.first[-1] + end + end + + # Extract right-flanking token from after the tokenized string + # @param afr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)] + # @return [String] + def self.extract_right(afr) + case afr + when String + afr[0] + when ::PointBlank::DOM::DOMObject + "." + when Array + afr.first[0] + end + end # Is this token, given these surrounding characters, left-flanking? # @param bfr [String] # @param token [String] @@ -1431,7 +1474,7 @@ module PointBlank # Hard break class HardBreakInline < NullInline # (see ::PointBlank::Parsing::NullInline#tokenize) - def self.tokenize(string) + def self.tokenize(string, *_lookaround) iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched| next ["\n", self, :close] if token.start_with?(" \n") next ["\n", self, :close] if matched