compatibility fixes for emphasis

This commit is contained in:
Yessiest 2025-03-13 10:19:44 +04:00
parent f03f8dfa29
commit 0863d4cf4a
2 changed files with 59 additions and 14 deletions

2
.gitignore vendored
View File

@ -1,2 +1,4 @@
/lib/example.md
/*.gem
/.yardoc
test*

View File

@ -367,6 +367,7 @@ module PointBlank
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, parent = nil, lazy: false)
puts line.inspect if lazy
@lazy_triggered = lazy || @lazy_triggered
return [nil, nil] if line.match?(/\A {0,3}\Z/)
return [nil, nil] if @closed
@ -957,9 +958,9 @@ module PointBlank
parts = tokens
@valid_parsers.each do |parser|
newparts = []
parts.each do |x|
parts.each_with_index do |x, i|
if x.is_a? String
newparts.append(*parser.tokenize(x))
newparts.append(*parser.tokenize(x, newparts.last, parts[i + 1]))
else
newparts.append(x)
end
@ -1013,8 +1014,10 @@ module PointBlank
# Tokenize a string
# @param string [String]
# @param before [String, ::PointBlank::DOM::DOMObject]
# @param after [String, ::PointBlank::DOM::DOMObject]
# @return [Array<Array(String, Class, Symbol), String>]
def self.tokenize(string)
def self.tokenize(string, before, after)
[string]
end
@ -1142,7 +1145,7 @@ module PointBlank
# Code inline parser
class CodeInline < NullInline
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, *_lookaround)
open = {}
iterate_tokens(string, "`") do |_before, current_text, matched|
if matched
@ -1182,7 +1185,7 @@ module PointBlank
# Autolink inline parser
class AutolinkInline < NullInline
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, *_lookaround)
iterate_tokens(string, /[<>]/) do |_before, current_text, matched|
if matched
if current_text.start_with?("<")
@ -1238,11 +1241,10 @@ module PointBlank
linkinfo = capture[-1][2]
obj = build(capture[1..-2])
if linkinfo[:label]
if (props = doc.root.properties[:linkdefs][linkinfo[:label]])
linkinfo = props
else
unless (props = doc.root.properties[:linkdefs][linkinfo[:label]])
return nil
end
linkinfo = props
end
obj.properties = linkinfo
obj
@ -1277,7 +1279,7 @@ module PointBlank
end
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, *_lookaround)
iterate_tokens(string, /(?:!\[|\]\()/) do |_before, text, matched|
next text[0] unless matched
next ["![", self, :open] if text.start_with? "!["
@ -1296,7 +1298,7 @@ module PointBlank
end
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, *_lookaround)
iterate_tokens(string, /(?:\[|\][(\[])/) do |_before, text, matched|
next text[0] unless matched
next ["[", self, :open] if text.start_with? "["
@ -1308,20 +1310,61 @@ module PointBlank
end
end
# TODO: this seems way too complicated for something that's supposed
# to be a goddamn emphasis markup parser. i'd blame it on commonmark's
# convoluted specs.
# (P.S: it could be possible to make this easier for implementers by
# making a claims system with pointers that do not modify the string
# while it's being parsed. however that would just move complexity from
# the parser into the scanner instead. and it does not resolve the
# problem of overlapping claims as efficiently as simply splitting text
# into tokens and remaining string bits.)
# Emphasis and strong emphasis inline parser
class EmphInline < NullInline
INFIX_TOKENS = /^[^\p{S}\p{P}\p{Zs}_]_++[^\p{S}\p{P}\p{Zs}_]$/
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, before, after)
bfrb = extract_left(before)
afra = extract_right(after)
iterate_tokens(string, /(?:_++|\*++)/) do |bfr, text, matched|
token, afr = text.match(/^(_++|\*++)(.?)/)[1..2]
left = left_token?(bfr[-1] || "", token, afr)
right = right_token?(bfr[-1] || "", token, afr)
bfr = bfr[-1] || bfrb || ""
afr = afr.empty? ? afra || "" : afr
left = left_token?(bfr, token, afr)
right = right_token?(bfr, token, afr)
break_into_elements(token, [bfr[-1] || "", token, afr].join(''),
left, right, matched)
end
end
# Extract left-flanking token from before the tokenized string
# @param bfr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)]
# @return [String]
def self.extract_left(bfr)
case bfr
when String
bfr[-1]
when ::PointBlank::DOM::DOMObject
"."
when Array
bfr.first[-1]
end
end
# Extract right-flanking token from after the tokenized string
# @param afr [String, ::PointBlank::DOM::DOMObject, Array(String, Class, Symbol)]
# @return [String]
def self.extract_right(afr)
case afr
when String
afr[0]
when ::PointBlank::DOM::DOMObject
"."
when Array
afr.first[0]
end
end
# Is this token, given these surrounding characters, left-flanking?
# @param bfr [String]
# @param token [String]
@ -1431,7 +1474,7 @@ module PointBlank
# Hard break
class HardBreakInline < NullInline
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
def self.tokenize(string, *_lookaround)
iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched|
next ["\n", self, :close] if token.start_with?(" \n")
next ["\n", self, :close] if matched