Cleaner conversion system instead of parse_inner hooks, HTML renderer Inline element fallback and fix for multiple wordwrap triggers, experimetnal Tables extension (not compatible with markdown tables)

This commit is contained in:
Yessiest 2025-05-12 03:43:13 +04:00
parent 940f5dd1ef
commit 8f0372d914
4 changed files with 439 additions and 55 deletions

View File

@ -134,6 +134,11 @@ parser = OptionParser.new do |opts|
options[:include].append(inc)
end
opts.on("-e", "--extension [STRING]", String,
"Enable extension") do |inc|
options[:include].append("#{__dir__}/../lib/mmmd/extensions/#{inc}.rb")
end
opts.on("-o", "--option [STRING]", String,
"Add option string. Can be repeated. Format: <key>: <JSON value>\n"\
"<key>: (<\"string\">|<symbol>|<[integer]>)"\

View File

@ -215,7 +215,7 @@ module PointBlank
while line && (status, line = try_open(line)) && status; end
end
close_up(0)
@stack.first
finalize_root(@stack.first)
end
private
@ -273,12 +273,39 @@ module PointBlank
switch = x.parser.close(x)
x.parser = nil
x = transfer(x, switch) if switch
x.parse_inner if x.respond_to? :parse_inner
prepare_conversion(x)
end
@topdepth = @depth = level
@stack = @stack[..level]
end
# Prepare element for conversion
def prepare_conversion(block)
block.root.append_temp_child(block) if block.class.conversion
end
# Finalize conversion
# This has to be done after the document gets processed due to
# the way link definitions have to be handled.
def finalize_root(root)
parse_inner = lambda do |block|
child = block.class.conversion.new
child.parent = block.parent
child.content = block.content.strip
if block.class.conversion_literal
block.append_child(child)
else
scanner = ::PointBlank::Parsing::StackScanner.new(child)
scanner.scan
child.each { |c| block.append_child(c) }
end
block.content = ""
end
root.temp_children.each { |block| parse_inner.call(block) }
root.temp_children.clear
root
end
# Transfer data from class to another class (morph class)
def transfer(block, switchclass)
newblock = switchclass.new
@ -1496,6 +1523,8 @@ module PointBlank
subclass.parser ||= @parser
subclass.scanner ||= @scanner
subclass.unsorted_children ||= @unsorted_children.dup || []
subclass.conversion ||= @conversion
subclass.conversion_literal ||= @conversion_literal
super(subclass)
end
@ -1539,6 +1568,14 @@ module PointBlank
@unsorted_overlays.append([overlay, priority])
end
# Define a conversion class that takes place in transition
# from block mode to inline mode
# @param conversion [Class]
def define_conversion(cls, literal: false)
@conversion = cls
@conversion_literal = literal
end
# Sort overlays by priority
# @return [void]
def sort_overlays
@ -1556,12 +1593,29 @@ module PointBlank
end
# Source parameters from parent (fixes recursive dependency)
def upsource
def upsource(overwrite: false)
superclass&.tap do |sc|
@scanner = sc.scanner
@parser = sc.parser
@unsorted_children = sc.unsorted_children.dup
@unsorted_overlays = sc.unsorted_overlays.dup
if overwrite then
@scanner = sc.scanner
@parser = sc.parser
@unsorted_children = sc.unsorted_children.dup
@unsorted_overlays = sc.unsorted_overlays.dup
@conversion = sc.conversion
@conversion_literal = sc.conversion_literal
else
@scanner ||= sc.scanner
@parser ||= sc.parser
if sc.unsorted_overlays
@unsorted_overlays = (@unsorted_overlays || []) + sc.unsorted_overlays
end
if sc.unsorted_children
@unsorted_children = (@unsorted_children || []) + sc.unsorted_children
end
@conversion ||= sc.conversion
if @conversion_literal.nil?
@conversion_literal = sc.conversion_literal
end
end
end
sort_children
end
@ -1582,7 +1636,8 @@ module PointBlank
attr_accessor :scanner, :parser,
:unsorted_children,
:unsorted_overlays
:unsorted_overlays,
:conversion, :conversion_literal
end
include ::Enumerable
@ -1659,31 +1714,31 @@ module PointBlank
class TempText < DOMObject
end
# Infline formattable text
class InlineElement < DOMObject
end
# Inline text
class Text < DOMObject
class Text < InlineElement
end
# Inline preformatted text
class InlinePre < DOMObject
class InlinePre < InlineElement
define_parser ::PointBlank::Parsing::CodeInline
end
# Hard Linebreak
class InlineBreak < DOMObject
class InlineBreak < InlineElement
define_parser ::PointBlank::Parsing::HardBreakInline
end
# Autolink
class InlineAutolink < DOMObject
class InlineAutolink < InlineElement
define_parser ::PointBlank::Parsing::AutolinkInline
end
# Infline formattable text
class InlineFormattable < DOMObject
end
# Image
class InlineImage < InlineFormattable
class InlineImage < InlineElement
define_parser ::PointBlank::Parsing::ImageInline
define_child ::PointBlank::DOM::InlinePre, 4000
define_child ::PointBlank::DOM::InlineBreak, 9999
@ -1692,7 +1747,7 @@ module PointBlank
end
# Hyperreferenced text
class InlineLink < InlineFormattable
class InlineLink < InlineElement
define_parser ::PointBlank::Parsing::LinkInline
define_child ::PointBlank::DOM::InlinePre, 4000
define_child ::PointBlank::DOM::InlineImage, 5000
@ -1749,43 +1804,16 @@ module PointBlank
# Leaf block (virtual)
class LeafBlock < DOMObject
# Virtual hook to delay inline processing
def parse_inner
self.content = content.strip if content
root.append_temp_child(self)
end
define_conversion ::PointBlank::DOM::InlineRoot
end
# Leaf literal block (virtual)
class LeafLiteralBlock < LeafBlock
# Virtual hook to push inlines in place of leaf blocks
def parse_inner
child = ::PointBlank::DOM::Text.new
child.content = content
append_child(child)
end
define_conversion ::PointBlank::DOM::Text, literal: true
end
# Document root
class Document < Block
# (see ::PointBlank::DOM::DOMObject#parse)
def self.parse(doc)
output = super(doc)
# This has to be done after the document gets processed due to the way link
# definitions have to be handled.
parse_inner = lambda do |block|
child = ::PointBlank::DOM::InlineRoot.new
child.parent = block.parent
child.content = block.content
scanner = ::PointBlank::Parsing::StackScanner.new(child)
scanner.scan
block.content = ""
child.each { |c| block.append_child(c) }
end
output.temp_children.each { |block| parse_inner.call(block) }
output.temp_children.clear
output
end
end
# Paragraph in a document (separated by 2 newlines)
@ -1793,12 +1821,7 @@ module PointBlank
define_parser ::PointBlank::Parsing::ParagraphParser
define_overlay ::PointBlank::Parsing::ParagraphUnderlineOverlay, 0
define_overlay ::PointBlank::Parsing::LinkReferenceOverlay
# Virtual hook to delay inline processing
def parse_inner
self.content = content.strip if content
root.append_temp_child(self)
end
define_conversion ::PointBlank::DOM::InlineRoot
end
# Heading level 1

View File

@ -0,0 +1,355 @@
# frozen_string_literal: true
require_relative '../blankshell'
module PointBlank
module Parsing
# Table overlay
class TableParser < ::PointBlank::Parsing::NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(line)
check_line(line) && !check_separator(line)
end
# Check that a line is a separator
# @param line [String]
# @return [Boolean]
def self.check_separator(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?(?:---+|===+):?$/) }
end
# Check that a line is an actual table line
# @param line [String]
# @return [Boolean]
def self.check_line(line)
line.match?(/^\A {0,3}\S/) &&
find_unescaped(line, "|") &&
line.match?(/[^|]+\|/)
end
# Find the first occurence of an unescaped pattern
# @param string [String]
# @param pattern [Regexp, String]
# @return [Integer, nil]
def self.find_unescaped(string, pattern)
initial = 0
while (index = string.index(pattern, initial))
return index if check_unescaped(index, string)
initial = index + 1
end
nil
end
# Check that the symbol at this index is not escaped
# @param index [Integer]
# @param string [String]
# @return [nil, Integer]
def self.check_unescaped(index, string)
return index if index.zero?
count = 0
index -= 1
while index >= 0 && string[index] == "\\"
count += 1
index -= 1
end
(count % 2).zero?
end
# (see ::PointBlank::Parsing::NullParser#close)
def close(block, lazy: false)
return ::PointBlank::DOM::Paragraph unless @correct
nil
end
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, _parent = nil, lazy: false)
return [nil, nil] if lazy
return [nil, nil] unless check_line(line)
unless @attempted
@enclosed = true if line.match?(/^\s*\|.+?\|\s*$/)
@attempted = true
end
@correct ||= check_separator(line)
[line, nil]
end
attr_reader :enclosed
private
def check_separator(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?===+:?$/) }
end
def check_line(line)
!self.class.find_unescaped(line, "|").nil? &&
line.match?(/[^|]+\|/)
end
end
# Table row
class TableRowParser < ::PointBlank::Parsing::NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(line)
check_line(line) && !check_separator(line)
end
# Check that a line is a separator
# @param line [String]
# @return [Boolean]
def self.check_separator(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?(?:---+|===+):?$/) }
end
# Check that a line is an actual table line
# @param line [String]
# @return [Boolean]
def self.check_line(line)
line.match?(/^\A {0,3}\S/) &&
find_unescaped(line, "|") &&
line.match?(/[^|]+\|/)
end
# Find the first occurence of an unescaped pattern
# @param string [String]
# @param pattern [Regexp, String]
# @return [Integer, nil]
def self.find_unescaped(string, pattern)
initial = 0
while (index = string.index(pattern, initial))
return index if check_unescaped(index, string)
initial = index + 1
end
nil
end
# Check that the symbol at this index is not escaped
# @param index [Integer]
# @param string [String]
# @return [nil, Integer]
def self.check_unescaped(index, string)
return index if index.zero?
count = 0
index -= 1
while index >= 0 && string[index] == "\\"
count += 1
index -= 1
end
(count % 2).zero?
end
def consume(line, parent = nil, lazy: false)
line = line.gsub(/[\\\s]+$/, '')
if parent.parser.enclosed
line = line
.strip
.delete_prefix("|")
.delete_suffix("|")
end
return [nil, nil] if @consumed && !check_separator(line)
@consumed = check_header(line) || check_separator(line)
push("|#{line}|\n")
[line, nil]
end
private
def check_separator(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?---+:?$/) }
end
def check_header(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?===+:?$/) }
end
def check_line(line)
!self.class.find_unescaped(line, "|").nil? &&
line.match?(/[^|]+\|/)
end
end
# Table Row overlay (decides the type of row used)
class TableRowOverlay < NullOverlay
# (see ::PointBlank::Parsing::NullOverlay#tokenize)
def process(block, lazy: false)
output = check_underlines(block.content.lines.last)
block.content = block.content.lines[0..-2].join("")
output
end
private
# Check which type of row this particular row should be
def check_underlines(line)
if check_header(line)
::PointBlank::DOM::TableHeaderRow
else
::PointBlank::DOM::TableRow
end
end
# Check if the line is a header
def check_header(line)
line.split("|")
.reject { |p| p.strip.empty? }
.all? { |p| p.strip.match?(/^:?===+:?$/) }
end
end
# Table column separator
class TableColumnInline < NullInline
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string, *_lookaround)
iterate_tokens(string, /[|\n]/) do |_before, text, matched|
next text unless matched
sym = text[0]
[sym, self, sym == '|' ? :open : :wrap]
end
end
# (see ::PointBlank::Parsing::NullInline#forward_walk)
def self.forward_walk(parts)
buffer = []
current = []
bin_idx = 0
skip_first = true
parts.each_with_index do |part, idx|
next current.append(part) unless part.is_a?(Array) &&
part[1] == self
next (skip_first = false) if skip_first
if part.last == :open
buffer.append([]) if buffer.length < bin_idx + 1
buffer[bin_idx] += current + ["\n"]
bin_idx += 1
else
bin_idx = 0
skip_first = true
end
current = []
end
[build(merge_lines(buffer.first)),
buffer[1..].map { |x| build(merge_lines(x)) }]
end
# Merge line runs so that the content looks correct
# @param current [Array<String, Array>]
def self.merge_lines(current)
result = []
current.each do |part|
next result.append(part) unless part.is_a? String
if result.last.is_a? String
result[-1] += part.lstrip.gsub(/ +\n?/," ")
else
result.append(part.lstrip.gsub(/ +\n?/," "))
end
end
result[-1] = result.last.rstrip if result.last.is_a? String
result
end
end
# Header row table column separator
# (exists because of a bug in handling parser_for)
class TableHeaderColumnInline < TableColumnInline
end
end
module DOM
# Table column
class TableColumn < ::PointBlank::DOM::InlineElement
define_parser ::PointBlank::Parsing::TableColumnInline
end
# Table column root (virtual)
class TableColumnRoot < ::PointBlank::DOM::InlineRoot
define_scanner ::PointBlank::Parsing::StackScanner
define_child TableColumn
end
# Table column
class TableHeaderColumn < ::PointBlank::DOM::InlineElement
define_parser ::PointBlank::Parsing::TableHeaderColumnInline
end
# Table header column root (virtual)
class TableHeaderColumnRoot < ::PointBlank::DOM::InlineRoot
define_scanner ::PointBlank::Parsing::StackScanner
define_child TableHeaderColumn
end
# Table header row
class TableHeaderRow < ::PointBlank::DOM::DOMObject
define_parser ::PointBlank::Parsing::TableRowParser
define_conversion ::PointBlank::DOM::TableHeaderColumnRoot
end
# Table row
class TableRow < ::PointBlank::DOM::DOMObject
define_parser ::PointBlank::Parsing::TableRowParser
define_overlay ::PointBlank::Parsing::TableRowOverlay
define_conversion ::PointBlank::DOM::TableColumnRoot
end
# Table
class Table < ::PointBlank::DOM::DOMObject
define_parser ::PointBlank::Parsing::TableParser
define_child ::PointBlank::DOM::TableRow, 300
define_child ::PointBlank::DOM::TableHeaderRow
end
# Document extension
::PointBlank::DOM::Block.class_eval do
define_child ::PointBlank::DOM::Table, 1500
end
Block.subclasses.map(&:upsource)
end
end
# Touch to do autoloading
MMMD::Renderers::HTML.yield_self
module MMMD
module Renderers
module HTMLConstants
if defined? MapManager
MapManager.define_mapping "PointBlank::DOM::Table", {
tag: "table"
}
MapManager.define_mapping "PointBlank::DOM::TableRow", {
tag: "tr"
}
MapManager.define_mapping "PointBlank::DOM::TableHeaderRow", {
tag: "tr"
}
MapManager.define_mapping "PointBlank::DOM::TableColumn", {
tag: "td"
}
MapManager.define_mapping "PointBlank::DOM::TableHeaderColumn", {
tag: "th"
}
end
end
end
end

View File

@ -232,7 +232,7 @@ module MMMD
end
def _render(element, options, inline: false, level: 0, literaltext: false)
modeswitch = figure_out_modeswitch(element)
modeswitch = figure_out_modeswitch(element) unless inline
inline ||= modeswitch
level += 1 unless inline
text = if element.children.empty?
@ -255,7 +255,8 @@ module MMMD
def figure_out_modeswitch(element)
element.is_a?(::PointBlank::DOM::LeafBlock) ||
element.is_a?(::PointBlank::DOM::Paragraph)
element.is_a?(::PointBlank::DOM::Paragraph) ||
element.is_a?(::PointBlank::DOM::InlineElement)
end
def run_filters(text, element, level:, inline:, modeswitch:,