extra minute details regarding proper parsing
This commit is contained in:
parent
1a9dd30112
commit
af93de6f4d
|
@ -1,5 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# Modular, extensible approach to parsing markdown as close as
|
||||
# it gets to CommonMark spec (as of version 0.31.2).
|
||||
module PointBlank
|
||||
module Parsing
|
||||
module LinkSharedMethods
|
||||
|
@ -112,8 +114,10 @@ module PointBlank
|
|||
def read_properties(text)
|
||||
properties = {}
|
||||
remaining = text
|
||||
warn text.inspect
|
||||
if text.start_with? '[' # link label
|
||||
properties[:label], remaining = read_return_label(remaining)
|
||||
close_bracket = false
|
||||
elsif text.start_with? '(' # link properties
|
||||
destination, remaining = read_destination(remaining[1..])
|
||||
return [nil, text] unless destination
|
||||
|
@ -121,11 +125,12 @@ module PointBlank
|
|||
title, remaining = read_title(remaining)
|
||||
properties[:destination] = destination
|
||||
properties[:title] = title
|
||||
close_bracket = true
|
||||
end
|
||||
if properties.empty? || !remaining.start_with?(')')
|
||||
if properties.empty? || (close_bracket && !remaining.start_with?(')'))
|
||||
[nil, text]
|
||||
else
|
||||
[properties, remaining[1..]]
|
||||
[properties, close_bracket ? remaining[1..] : remaining]
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -651,12 +656,10 @@ module PointBlank
|
|||
end
|
||||
|
||||
# Fenced code block
|
||||
# (TODO: This needs ~~~ as alternative to ticks,
|
||||
# and proper relative indentation)
|
||||
class FencedCodeBlock < NullParser
|
||||
# (see ::PointBlank::Parsing::NullParser#begin?)
|
||||
def self.begin?(line)
|
||||
line.start_with?(/\A {0,3}```[^`]+$/)
|
||||
line.start_with?(/\A {0,3}(?:`{3,}[^`]+$|~{3,}[^~]+$)/)
|
||||
end
|
||||
|
||||
# (see ::PointBlank::Parsing::NullParser#applyprops)
|
||||
|
@ -669,7 +672,7 @@ module PointBlank
|
|||
return [nil, false] if @closed
|
||||
|
||||
try_close(line)
|
||||
push(line) if @open && !@closed
|
||||
push(line.gsub(/^ {0,#{@space}}/, '')) if @open && !@closed
|
||||
self.open(line)
|
||||
["", false]
|
||||
end
|
||||
|
@ -677,13 +680,15 @@ module PointBlank
|
|||
private
|
||||
|
||||
def try_close(line)
|
||||
@closed = true if @open && line.match?(/\A {0,3}```/)
|
||||
@closed = true if @open && line.match?(/\A {0,3}#{@marker}+$/)
|
||||
end
|
||||
|
||||
def open(line)
|
||||
return if @open
|
||||
|
||||
@infoline = line.match(/\A {0,3}```(.*)/)[1]
|
||||
s, m, @infoline = line.match(/\A( {0,3})(`{3,}|~{3,})(.*)/)[1..3]
|
||||
@space = s.length
|
||||
@marker = m
|
||||
@open = true
|
||||
end
|
||||
end
|
||||
|
@ -872,7 +877,9 @@ module PointBlank
|
|||
# @return [::PointBlank::DOM::Text]
|
||||
def construct_text(string)
|
||||
obj = ::PointBlank::DOM::Text.new
|
||||
obj.content = string
|
||||
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
||||
'\\1')
|
||||
obj.content = string.strip
|
||||
obj
|
||||
end
|
||||
|
||||
|
@ -1031,6 +1038,17 @@ module PointBlank
|
|||
# @param string [String]
|
||||
# @return [::PointBlank::DOM::Text]
|
||||
def self.construct_text(string)
|
||||
obj = ::PointBlank::DOM::Text.new
|
||||
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
||||
'\\1')
|
||||
obj.content = string
|
||||
obj
|
||||
end
|
||||
|
||||
# Construct text literal for a string
|
||||
# @param string [String]
|
||||
# @return [::PointBlank::DOM::Text]
|
||||
def self.construct_literal(string)
|
||||
obj = ::PointBlank::DOM::Text.new
|
||||
obj.content = string
|
||||
obj
|
||||
|
@ -1085,7 +1103,7 @@ module PointBlank
|
|||
break (cutoff = idx) if part.first == opening &&
|
||||
part.last == :close
|
||||
end
|
||||
buffer = buffer[opening.length..(-1 - opening.length)]
|
||||
buffer = construct_literal(buffer[opening.length..(-1 - opening.length)])
|
||||
[cutoff.positive? ? build([buffer]) : opening, parts[(cutoff + 1)..]]
|
||||
end
|
||||
end
|
||||
|
@ -1327,6 +1345,25 @@ module PointBlank
|
|||
obj
|
||||
end
|
||||
end
|
||||
|
||||
# Hard break
|
||||
class HardBreakInline < NullInline
|
||||
# (see ::PointBlank::Parsing::NullInline#tokenize)
|
||||
def self.tokenize(string)
|
||||
iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched|
|
||||
next ["\n", self, :close] if token == " \\n"
|
||||
next ["\n", self, :close] if matched
|
||||
|
||||
" "
|
||||
end
|
||||
end
|
||||
|
||||
# (see ::PointBlank::Parsing::NullInline#reverse_walk)
|
||||
def self.reverse_walk(backlog)
|
||||
backlog[-1] = build([])
|
||||
backlog
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Domain object model elements
|
||||
|
@ -1509,6 +1546,16 @@ module PointBlank
|
|||
class InlinePre < DOMObject
|
||||
define_parser ::PointBlank::Parsing::CodeInline
|
||||
end
|
||||
|
||||
# Linebreak
|
||||
class InlineBreak < DOMObject
|
||||
define_parser ::PointBlank::Parsing::HardBreakInline
|
||||
end
|
||||
|
||||
# Autolink
|
||||
class InlineAutolink < DOMObject
|
||||
define_parser ::PointBlank::Parsing::AutolinkInline
|
||||
end
|
||||
|
||||
# Infline formattable text
|
||||
class InlineFormattable < DOMObject
|
||||
|
@ -1518,6 +1565,7 @@ module PointBlank
|
|||
class InlineImage < InlineFormattable
|
||||
define_parser ::PointBlank::Parsing::ImageInline
|
||||
define_child ::PointBlank::DOM::InlinePre, 4000
|
||||
define_child ::PointBlank::DOM::InlineBreak, 9999
|
||||
## that would be really funny lmao
|
||||
# define_child ::PointBlank::DOM::InlineImage
|
||||
end
|
||||
|
@ -1527,19 +1575,11 @@ module PointBlank
|
|||
define_parser ::PointBlank::Parsing::LinkInline
|
||||
define_child ::PointBlank::DOM::InlinePre, 4000
|
||||
define_child ::PointBlank::DOM::InlineImage, 5000
|
||||
define_child ::PointBlank::DOM::InlineBreak, 9999
|
||||
## idk if this makes sense honestly
|
||||
# define_child ::PointBlank::DOM::InlineAutolink
|
||||
end
|
||||
|
||||
# Linebreak
|
||||
class InlineBreak < DOMObject
|
||||
end
|
||||
|
||||
# Autolink
|
||||
class InlineAutolink < DOMObject
|
||||
define_parser ::PointBlank::Parsing::AutolinkInline
|
||||
end
|
||||
|
||||
# Inline root
|
||||
class InlineRoot < DOMObject
|
||||
define_scanner ::PointBlank::Parsing::StackScanner
|
||||
|
@ -1547,6 +1587,7 @@ module PointBlank
|
|||
define_child ::PointBlank::DOM::InlineAutolink, 4000
|
||||
define_child ::PointBlank::DOM::InlineImage, 5000
|
||||
define_child ::PointBlank::DOM::InlineLink, 6000
|
||||
define_child ::PointBlank::DOM::InlineBreak, 9999
|
||||
end
|
||||
|
||||
# Strong emphasis
|
||||
|
@ -1587,6 +1628,19 @@ module PointBlank
|
|||
|
||||
# Leaf block (virtual)
|
||||
class LeafBlock < DOMObject
|
||||
# Virtual hook to push inlines in place of leaf blocks
|
||||
def parse_inner
|
||||
child = ::PointBlank::DOM::InlineRoot.new
|
||||
child.content = content
|
||||
scanner = ::PointBlank::Parsing::StackScanner.new(child)
|
||||
scanner.scan
|
||||
self.content = ""
|
||||
child.each { |c| append_child(c) }
|
||||
end
|
||||
end
|
||||
|
||||
# Leaf literal block (virtual)
|
||||
class LeafLiteralBlock < LeafBlock
|
||||
# Virtual hook to push inlines in place of leaf blocks
|
||||
def parse_inner
|
||||
child = ::PointBlank::DOM::Text.new
|
||||
|
@ -1601,10 +1655,6 @@ module PointBlank
|
|||
|
||||
# Paragraph in a document (separated by 2 newlines)
|
||||
class Paragraph < DOMObject
|
||||
class << self
|
||||
# Define an overlay
|
||||
end
|
||||
|
||||
define_parser ::PointBlank::Parsing::ParagraphParser
|
||||
define_overlay ::PointBlank::Parsing::ParagraphUnderlineOverlay, 0
|
||||
define_overlay ::PointBlank::Parsing::LinkReferenceOverlay
|
||||
|
@ -1661,7 +1711,7 @@ module PointBlank
|
|||
end
|
||||
|
||||
# Preformatted fenced code block
|
||||
class CodeBlock < LeafBlock
|
||||
class CodeBlock < LeafLiteralBlock
|
||||
define_parser ::PointBlank::Parsing::FencedCodeBlock
|
||||
end
|
||||
|
||||
|
@ -1669,10 +1719,6 @@ module PointBlank
|
|||
class QuoteBlock < Block
|
||||
end
|
||||
|
||||
# Table
|
||||
class TableBlock < DOMObject
|
||||
end
|
||||
|
||||
# Unordered list element
|
||||
class ULListElement < Block
|
||||
end
|
||||
|
@ -1696,7 +1742,7 @@ module PointBlank
|
|||
end
|
||||
|
||||
# Indent block
|
||||
class IndentBlock < LeafBlock
|
||||
class IndentBlock < LeafLiteralBlock
|
||||
define_parser ::PointBlank::Parsing::IndentedBlock
|
||||
end
|
||||
|
||||
|
|
786
lib/rbmark.rb
786
lib/rbmark.rb
|
@ -1,786 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module RBMark
|
||||
# Module for representing parsing-related constructs
|
||||
module Parsing
|
||||
# Abstract scanner interface implementation
|
||||
class Scanner
|
||||
def initialize
|
||||
@variants = []
|
||||
end
|
||||
|
||||
# Scan text
|
||||
# @param text [String]
|
||||
# @return [Array<RBMark::DOM::DOMObject>]
|
||||
def scan(_text)
|
||||
raise StandardError, "Abstract method called"
|
||||
# ...
|
||||
end
|
||||
|
||||
attr_accessor :variants
|
||||
end
|
||||
|
||||
# Line-level scanner for blocks
|
||||
class LineScanner < Scanner
|
||||
# (see ::RBMark::Parsing::Scanner#scan)
|
||||
def scan(text, buffer: "", blocks: [], mode: nil)
|
||||
prepare
|
||||
lines = text.lines
|
||||
lines.each_with_index do |line, index|
|
||||
buffer += line
|
||||
ahead = lines.fetch(index + 1, nil)
|
||||
blocks, buffer, mode = try_begin(line,
|
||||
blocks,
|
||||
buffer,
|
||||
mode,
|
||||
lookahead: ahead)
|
||||
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
|
||||
blocks, buffer, mode = flush(blocks, buffer, mode)
|
||||
end
|
||||
end
|
||||
flush(blocks, buffer, mode)[0]
|
||||
end
|
||||
|
||||
# Predict mode for given line
|
||||
# @param line [String]
|
||||
# @return [Object]
|
||||
def select_mode(line, **message)
|
||||
@variants.find do |variant|
|
||||
variant[0].begin?(line, **message)
|
||||
end&.at(0)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Attempt to open a new mode and, if possible, call :begin to prepare the block
|
||||
def try_begin(line, blocks, buffer, mode, lookahead: nil)
|
||||
return blocks, buffer, mode if mode
|
||||
|
||||
mode = select_mode(line, lookahead: lookahead,
|
||||
blocks: blocks,
|
||||
buffer: buffer)
|
||||
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
|
||||
[blocks, buffer, mode]
|
||||
end
|
||||
|
||||
# Assign self as parent to all variants
|
||||
# @return [void]
|
||||
def prepare
|
||||
@variants.each do |variant|
|
||||
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
|
||||
raise StandardError, "#{variant} is not a BlockVariant"
|
||||
end
|
||||
|
||||
variant[0].parent = self
|
||||
end
|
||||
@variants.sort_by!(&:last)
|
||||
end
|
||||
|
||||
# Flush the buffer using given mode
|
||||
# @param blocks [Array<RBMark::DOM::DOMObject>]
|
||||
# @param buffer [String]
|
||||
# @param mode [Object]
|
||||
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
|
||||
def flush(blocks, buffer, mode)
|
||||
return blocks, "" if buffer == ""
|
||||
|
||||
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
|
||||
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
|
||||
if mode.respond_to?(:restructure)
|
||||
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
|
||||
else
|
||||
buffer = ""
|
||||
mode = nil
|
||||
end
|
||||
[blocks, buffer, mode]
|
||||
end
|
||||
end
|
||||
|
||||
# Abstract variant interface
|
||||
class Variant
|
||||
end
|
||||
|
||||
# Abstract block-level variant
|
||||
class BlockVariant < Variant
|
||||
# Check if a block begins on this line
|
||||
# @param line [String]
|
||||
# @param opts [Hash] options hash
|
||||
# @option [String, nil] :lookahead next line over
|
||||
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
|
||||
# @option [String] :buffer current state of buffer
|
||||
# @return [Boolean]
|
||||
def begin?(_line, **_opts)
|
||||
raise StandardError, "Abstract method called"
|
||||
end
|
||||
|
||||
# Check if a block ends on this line
|
||||
# @param line [String]
|
||||
# @param opts [Hash] options hash
|
||||
# @option [String, nil] :lookahead next line over
|
||||
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
|
||||
# @option [String] :buffer current state of buffer
|
||||
# @return [Boolean]
|
||||
def end?(_line, **_opts)
|
||||
raise StandardError, "Abstract method called"
|
||||
end
|
||||
|
||||
# @!method begin(buffer)
|
||||
# Open a block to be later filled in by BlockVariant#end
|
||||
# @param buffer [String]
|
||||
# @return [::RBMark::DOM::DOMObject]
|
||||
|
||||
# @!method end(block, buffer)
|
||||
# Finalize a block opened by begin
|
||||
# @param buffer [String]
|
||||
# @return [void]
|
||||
|
||||
# @!method flush(buffer)
|
||||
# Flush buffer and create a new DOM object
|
||||
# @param buffer [String]
|
||||
# @return [::RBMark::DOM::DOMObject]
|
||||
|
||||
# @!method restructure(blocks, buffer, mode)
|
||||
# Restructure current set of blocks (if method is defined)
|
||||
# @param blocks [Array<::RBMark::DOM::DOMObject>]
|
||||
# @param buffer [String]
|
||||
# @param mode [::RBMark::Parsing::Variant]
|
||||
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
|
||||
|
||||
attr_accessor :parent
|
||||
end
|
||||
|
||||
# Paragraph breaking variant
|
||||
class BreakerVariant < BlockVariant
|
||||
# Check that a paragraph matches the breaker
|
||||
# @param buffer [String]
|
||||
# @return [Boolean]
|
||||
def match?(_buffer)
|
||||
raise StandardError, "Abstract method called"
|
||||
end
|
||||
|
||||
# Process a paragrpah
|
||||
# @param buffer [String]
|
||||
# @return [::RBMark::DOM::DOMObject]
|
||||
def process(_buffer)
|
||||
raise StandardError, "Abstract method called"
|
||||
end
|
||||
|
||||
# @!method preprocess(buffer)
|
||||
# preprocess buffer
|
||||
# @param buffer [String]
|
||||
# @return [String]
|
||||
end
|
||||
|
||||
# Paragraph replacing variant
|
||||
class ModifierVariant < BlockVariant
|
||||
# Check that a buffer matches requirements of the modifier
|
||||
# @param buffer [String]
|
||||
# @return [Class, nil]
|
||||
def match?(_buffer)
|
||||
raise StandardError, "Abstract method called"
|
||||
end
|
||||
end
|
||||
|
||||
# Paragraph variant
|
||||
class ParagraphVariant < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/\S/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(line, lookahead: nil, **_opts)
|
||||
return true if check_paragraph_breakers(line)
|
||||
|
||||
if lookahead
|
||||
return false if check_paragraph_breakers(lookahead)
|
||||
|
||||
return false if lookahead.match(/^ {4}/)
|
||||
|
||||
!parent.select_mode(lookahead).is_a?(self.class)
|
||||
else
|
||||
true
|
||||
end
|
||||
end
|
||||
|
||||
# (see BlockVariant#flush)
|
||||
# @sg-ignore
|
||||
def flush(buffer)
|
||||
obj = ::RBMark::DOM::Paragraph.new
|
||||
obj.content = buffer
|
||||
obj
|
||||
end
|
||||
|
||||
# (see BlockVariant#restructure)
|
||||
def restructure(blocks, _buffer, _mode)
|
||||
p_buffer = blocks.last.content
|
||||
if (block = do_breakers(p_buffer))
|
||||
blocks[-1] = block
|
||||
else
|
||||
unless (blocks, _buffer, _mode = do_modifiers(blocks, p_buffer))
|
||||
blocks[-1] = ::RBMark::DOM::Paragraph.parse(p_buffer)
|
||||
end
|
||||
end
|
||||
[blocks, "", nil]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def do_modifiers(blocks, buffer)
|
||||
breaker = parent.variants.find do |x|
|
||||
x[0].is_a?(::RBMark::Parsing::ModifierVariant) &&
|
||||
x[0].match?(buffer)
|
||||
end&.first
|
||||
breaker&.restructure(blocks, buffer, nil) || [blocks, buffer, nil]
|
||||
end
|
||||
|
||||
def do_breakers(buffer)
|
||||
breaker = parent.variants.find do |x|
|
||||
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
|
||||
x[0].match?(buffer)
|
||||
end&.first
|
||||
breaker&.process(buffer)
|
||||
end
|
||||
|
||||
def check_paragraph_breakers(line)
|
||||
breakers = parent.variants.filter_map do |x|
|
||||
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
|
||||
end
|
||||
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
|
||||
end
|
||||
end
|
||||
|
||||
# Thematic break variant
|
||||
class ThematicBreakVariant < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
|
||||
line.match?(/^ {0,3}[-_*]/) &&
|
||||
(
|
||||
line.count("-") >= 3 ||
|
||||
line.count("_") >= 3 ||
|
||||
line.count("*") >= 3
|
||||
)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, **_opts)
|
||||
true
|
||||
end
|
||||
|
||||
# (see BlockVariant#flush)
|
||||
def flush(_buffer)
|
||||
::RBMark::DOM::HorizontalRule.new
|
||||
end
|
||||
end
|
||||
|
||||
# ATX Heading variant
|
||||
class ATXHeadingVariant < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, **_opts)
|
||||
true
|
||||
end
|
||||
|
||||
# (see BlockVariant#flush)
|
||||
def flush(buffer)
|
||||
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
|
||||
content = content.gsub(/( #+|)$/, "")
|
||||
heading(lvl).parse(content.strip)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def heading(lvl)
|
||||
case lvl.length
|
||||
when 1 then ::RBMark::DOM::Heading1
|
||||
when 2 then ::RBMark::DOM::Heading2
|
||||
when 3 then ::RBMark::DOM::Heading3
|
||||
when 4 then ::RBMark::DOM::Heading4
|
||||
when 5 then ::RBMark::DOM::Heading5
|
||||
when 6 then ::RBMark::DOM::Heading6
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Paragraph closing variant
|
||||
class BlankSeparator < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^ {0,3}$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, **_opts)
|
||||
true
|
||||
end
|
||||
|
||||
# (see BreakerVariant#match)
|
||||
def match?(_buffer)
|
||||
false
|
||||
end
|
||||
|
||||
# (see BlockVariant#restructure)
|
||||
def restructure(blocks, _buffer, _mode)
|
||||
blocks.last.properties[:closed] = true if blocks.last
|
||||
[blocks, "", nil]
|
||||
end
|
||||
end
|
||||
|
||||
# Setext heading variant
|
||||
class SetextHeadingVariant < BreakerVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, breaks_paragraph: nil, **_opts)
|
||||
breaks_paragraph &&
|
||||
line.match?(/^ {0,3}(?:-+|=+) *$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, **_opts)
|
||||
true
|
||||
end
|
||||
|
||||
# (see BreakerVariant#match)
|
||||
def match?(buffer)
|
||||
return nil unless preprocess(buffer).match(/\S/)
|
||||
|
||||
!heading(buffer.lines.last).nil?
|
||||
end
|
||||
|
||||
# (see BreakerVariant#process)
|
||||
def process(buffer)
|
||||
heading = heading(buffer.lines.last)
|
||||
buffer = preprocess(buffer)
|
||||
heading.parse(buffer)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def preprocess(buffer)
|
||||
buffer.lines[..-2].join
|
||||
end
|
||||
|
||||
def heading(buffer)
|
||||
case buffer
|
||||
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
|
||||
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Indented literal block variant
|
||||
class IndentedBlockVariant < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^(?: {4}|\t)/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, lookahead: nil, **_opts)
|
||||
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#flush)
|
||||
def flush(buffer)
|
||||
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
|
||||
block = ::RBMark::DOM::IndentBlock.new
|
||||
block.content = text # TODO: replace this with inline text
|
||||
block
|
||||
end
|
||||
end
|
||||
|
||||
# Fenced code block
|
||||
class FencedCodeBlock < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(line, blocks: nil, buffer: nil, **_opts)
|
||||
buffer.lines.length > 1 and
|
||||
line.strip == blocks.last.properties[:expected_closer]
|
||||
end
|
||||
|
||||
# (see BlockVariant#begin)
|
||||
def begin(buffer)
|
||||
block = ::RBMark::DOM::CodeBlock.new
|
||||
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
|
||||
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
|
||||
.strip
|
||||
block
|
||||
end
|
||||
|
||||
# (see BlockVariant#end)
|
||||
def end(block, buffer)
|
||||
# TODO: replace this with inline text
|
||||
block.properties.delete(:expected_closer)
|
||||
block.content = buffer.lines[1..-2].join
|
||||
end
|
||||
end
|
||||
|
||||
# Quote block
|
||||
class QuoteBlock < BlockVariant
|
||||
# (see BlockVariant#begin?)
|
||||
def begin?(line, **_opts)
|
||||
line.match?(/^ {0,3}(?:>|> .*)$/)
|
||||
end
|
||||
|
||||
# (see BlockVariant#end?)
|
||||
def end?(_line, lookahead: nil, **_opts)
|
||||
!(lookahead && lookahead.match?(/^ {0,3}(?:>|> .*)$/))
|
||||
end
|
||||
|
||||
# (see BlockVariant#flush)
|
||||
def flush(buffer)
|
||||
buffer = buffer.lines.map do |line|
|
||||
line.gsub(/^ {0,3}> ?/, '')
|
||||
end.join
|
||||
|
||||
::RBMark::DOM::QuoteBlock.parse(buffer)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Module for representing abstract object hierarchy
|
||||
module DOM
|
||||
# Abstract container
|
||||
class DOMObject
|
||||
class << self
|
||||
# Hook for initializing variables
|
||||
# @param subclass [Class]
|
||||
def inherited(subclass)
|
||||
super
|
||||
@subclasses ||= []
|
||||
@subclasses.append(subclass)
|
||||
subclass.variants = @variants.dup
|
||||
subclass.variants ||= []
|
||||
subclass.atomic_mode = @atomic_mode
|
||||
subclass.scanner_class = @scanner_class
|
||||
end
|
||||
|
||||
# Add potential sub-element variant
|
||||
# @param cls [Class] DOMObject subclass
|
||||
def variant(cls, prio: 1)
|
||||
unless cls < ::RBMark::Parsing::Variant
|
||||
raise StandardError, "#{cls} is not a DOMObject subclass"
|
||||
end
|
||||
|
||||
@variants.append([cls, prio])
|
||||
@subclasses&.each do |subclass|
|
||||
subclass.variant(cls, prio)
|
||||
end
|
||||
end
|
||||
|
||||
# Set scanner class
|
||||
# @param cls [Class] DOMObject subclass
|
||||
def scanner(cls)
|
||||
unless cls < ::RBMark::Parsing::Scanner
|
||||
raise StandardError, "#{cls} is not a Scanner subclass"
|
||||
end
|
||||
|
||||
@scanner_class = cls
|
||||
@subclasses&.each do |subclass|
|
||||
subclass.scanner(cls)
|
||||
end
|
||||
end
|
||||
|
||||
# Prepare scanner and variants
|
||||
# @return [void]
|
||||
def prepare
|
||||
return if @prepared
|
||||
|
||||
@scanner = @scanner_class.new
|
||||
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
|
||||
end
|
||||
|
||||
# Parse text from the given context
|
||||
# @param text [String]
|
||||
# @return [self]
|
||||
def parse(text)
|
||||
prepare unless @atomic_mode
|
||||
instance = new
|
||||
if @atomic_mode
|
||||
instance.content = text
|
||||
else
|
||||
instance.append(*@scanner.scan(text))
|
||||
end
|
||||
instance
|
||||
end
|
||||
|
||||
# Create a new instance of class or referenced class
|
||||
# @return [self, Class]
|
||||
def create
|
||||
if @alt_for
|
||||
@alt_for.new
|
||||
else
|
||||
new
|
||||
end
|
||||
end
|
||||
|
||||
# Set the atomic flag
|
||||
# @return [void]
|
||||
def atomic
|
||||
@atomic_mode = true
|
||||
end
|
||||
|
||||
# Set the block continuation flag
|
||||
# @return [void]
|
||||
def block
|
||||
@block_mode = true
|
||||
end
|
||||
|
||||
# Allow the block to be empty
|
||||
# @return [void]
|
||||
def empty
|
||||
@permit_empty = true
|
||||
end
|
||||
|
||||
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode,
|
||||
:block_mode, :permit_empty
|
||||
end
|
||||
|
||||
def initialize
|
||||
@content = nil
|
||||
@children = []
|
||||
@properties = {}
|
||||
end
|
||||
|
||||
# Set certain property in the properties hash
|
||||
# @param properties [Hash] proeprties to update
|
||||
def property(**properties)
|
||||
@properties.update(**properties)
|
||||
end
|
||||
|
||||
# Add child to container
|
||||
# @param child [DOMObject]
|
||||
def append(*children)
|
||||
unless children.all? { |x| x.is_a? DOMObject }
|
||||
raise StandardError, "one of #{children.inspect} is not a DOMObject"
|
||||
end
|
||||
|
||||
@children.append(*children)
|
||||
end
|
||||
|
||||
# Insert a child into the container
|
||||
# @param child [DOMObject]
|
||||
# @param index [Integer]
|
||||
def insert(index, child)
|
||||
raise StandardError, "not a DOMObject" unless child.is_a? DOMObject
|
||||
|
||||
@children.insert(index, child)
|
||||
end
|
||||
|
||||
# Delete a child from container
|
||||
# @param index [Integer]
|
||||
def delete_at(index)
|
||||
@children.delete_at(index)
|
||||
end
|
||||
|
||||
# Get a child from the container
|
||||
# @param key [Integer]
|
||||
def [](key)
|
||||
@children[key]
|
||||
end
|
||||
|
||||
# Set text content of a DOMObject
|
||||
# @param text [String]
|
||||
def content=(text)
|
||||
raise StandardError, "not a String" unless text.is_a? String
|
||||
|
||||
@content = text
|
||||
end
|
||||
|
||||
# Get text content of a DOMObject
|
||||
# @return [String, nil]
|
||||
attr_reader :content, :children, :properties
|
||||
end
|
||||
|
||||
# Inline text
|
||||
class Text < DOMObject
|
||||
end
|
||||
|
||||
# Inline preformatted text
|
||||
class InlinePre < DOMObject
|
||||
end
|
||||
|
||||
# Infline formattable text
|
||||
class InlineFormattable < DOMObject
|
||||
atomic
|
||||
end
|
||||
|
||||
# Bold text
|
||||
class InlineBold < InlineFormattable
|
||||
end
|
||||
|
||||
# Italics text
|
||||
class InlineItalics < InlineFormattable
|
||||
end
|
||||
|
||||
# Inline italics text (alternative)
|
||||
class InlineAltItalics < InlineFormattable
|
||||
end
|
||||
|
||||
# Underline text
|
||||
class InlineUnder < InlineFormattable
|
||||
end
|
||||
|
||||
# Strikethrough text
|
||||
class InlineStrike < InlineFormattable
|
||||
end
|
||||
|
||||
# Hyperreferenced text
|
||||
class InlineLink < InlineFormattable
|
||||
end
|
||||
|
||||
# Image
|
||||
class InlineImage < InlinePre
|
||||
end
|
||||
|
||||
# Linebreak
|
||||
class InlineBreak < DOMObject
|
||||
end
|
||||
|
||||
# Block root
|
||||
class Block < DOMObject
|
||||
scanner ::RBMark::Parsing::LineScanner
|
||||
variant ::RBMark::Parsing::ATXHeadingVariant, prio: 100
|
||||
variant ::RBMark::Parsing::ThematicBreakVariant, prio: 200
|
||||
variant ::RBMark::Parsing::SetextHeadingVariant, prio: 300
|
||||
variant ::RBMark::Parsing::IndentedBlockVariant, prio: 400
|
||||
variant ::RBMark::Parsing::FencedCodeBlock, prio: 500
|
||||
variant ::RBMark::Parsing::QuoteBlock, prio: 600
|
||||
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
|
||||
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
|
||||
end
|
||||
|
||||
# Document root
|
||||
class Document < Block
|
||||
class << self
|
||||
# (see ::RBMark::DOM::DOMObject#parse)
|
||||
def parse(text)
|
||||
cleanup(merge(super))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Clean up empty elements
|
||||
# @param doc [::RBMark::DOM::Document]
|
||||
# @return [::RBMark::DOM::Document]
|
||||
def cleanup(doc)
|
||||
_cleanup(doc)
|
||||
doc
|
||||
end
|
||||
|
||||
# Merge open paragraphs upwards
|
||||
# @param doc [::RBMark::DOM::Document]
|
||||
# @return [::RBMark::DOM::Document]
|
||||
def merge(doc)
|
||||
_merge(doc)
|
||||
doc
|
||||
end
|
||||
|
||||
# A function to merge children upward
|
||||
def _merge_step(child, stack, depth)
|
||||
stack
|
||||
end
|
||||
|
||||
# Merge nested block constructs upwards
|
||||
# @param doc [::RBMark::DOM::DOMObject]
|
||||
# @return [void]
|
||||
def _merge(doc, stack = [], depth = 0)
|
||||
stack.append(doc) if stack.length <= depth
|
||||
doc.children.each do |child|
|
||||
stack = _merge_step(child, stack, depth)
|
||||
if child.class.block_mode and child.children.length.positive?
|
||||
_merge(child, stack, depth + 1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Recursively descend through hierarchy and delete empty elements
|
||||
# @param doc [::RBMark::DOM::DOMObject]
|
||||
# @return [Integer]
|
||||
def _cleanup(doc)
|
||||
size = 0
|
||||
doc.children.delete_if do |child|
|
||||
subsize = 0
|
||||
subsize += _cleanup(child) if child.children.length.positive?
|
||||
subsize += child.content&.strip&.length || 0
|
||||
size += subsize
|
||||
subsize.zero? && !child.class.permit_empty
|
||||
end
|
||||
size
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Paragraph in a document (separated by 2 newlines)
|
||||
class Paragraph < InlineFormattable
|
||||
atomic
|
||||
end
|
||||
|
||||
# Heading level 1
|
||||
class Heading1 < InlineFormattable
|
||||
end
|
||||
|
||||
# Heading level 2
|
||||
class Heading2 < Heading1
|
||||
end
|
||||
|
||||
# Heading level 3
|
||||
class Heading3 < Heading1
|
||||
end
|
||||
|
||||
# Heading level 4
|
||||
class Heading4 < Heading1
|
||||
end
|
||||
|
||||
# Heading level 5
|
||||
class Heading5 < Heading1
|
||||
end
|
||||
|
||||
# Heading level 6
|
||||
class Heading6 < Heading1
|
||||
end
|
||||
|
||||
# Preformatted code block
|
||||
class CodeBlock < DOMObject
|
||||
end
|
||||
|
||||
# Quote block
|
||||
class QuoteBlock < Block
|
||||
block
|
||||
end
|
||||
|
||||
# Table
|
||||
class TableBlock < DOMObject
|
||||
end
|
||||
|
||||
# List element
|
||||
class ListElement < Block
|
||||
end
|
||||
|
||||
# Unordered list
|
||||
class ULBlock < DOMObject
|
||||
end
|
||||
|
||||
# Ordered list block
|
||||
class OLBlock < DOMObject
|
||||
end
|
||||
|
||||
# Indent block
|
||||
class IndentBlock < DOMObject
|
||||
end
|
||||
|
||||
# Horizontal rule
|
||||
class HorizontalRule < DOMObject
|
||||
atomic
|
||||
empty
|
||||
end
|
||||
end
|
||||
end
|
21
lib/test.rb
21
lib/test.rb
|
@ -1,21 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'rbmark'
|
||||
|
||||
structure = RBMark::DOM::Document.parse(File.read("example.md"))
|
||||
def red(string)
|
||||
"\033[31m#{string}\033[0m"
|
||||
end
|
||||
def yellow(string)
|
||||
"\033[33m#{string}\033[0m"
|
||||
end
|
||||
|
||||
def prettyprint(doc, indent = 0)
|
||||
closed = doc.properties[:closed]
|
||||
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/,""))}#{red(closed ? "(c)" : "")}: #{doc.content.inspect}"
|
||||
doc.children.each do |child|
|
||||
print red("#{" " * indent} - ")
|
||||
prettyprint(child, indent + 4)
|
||||
end
|
||||
end
|
||||
prettyprint(structure)
|
15
lib/test2.rb
15
lib/test2.rb
|
@ -1,15 +0,0 @@
|
|||
require_relative 'blankshell'
|
||||
pp PointBlank::DOM::Document.parse(<<DOC)
|
||||
Penis
|
||||
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
|
||||
> pee
|
||||
> > 2 pee
|
||||
> peepee
|
||||
> > 3 pee
|
||||
> > 4 pee
|
||||
bee
|
||||
> # IT'S HIP
|
||||
> BEES
|
||||
> > FUCK
|
||||
BEES
|
||||
DOC
|
180
lib/test3.rb
180
lib/test3.rb
|
@ -1,180 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'blankshell'
|
||||
|
||||
doc = <<~DOC
|
||||
Penis
|
||||
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
|
||||
> pee
|
||||
> > 2 pee
|
||||
> peepee
|
||||
and you cum now
|
||||
> > 3 pee
|
||||
> > 4 pee
|
||||
bee
|
||||
# IT'S HIP
|
||||
> # IT'S HIP
|
||||
> BEES
|
||||
> > FUCK
|
||||
BEES
|
||||
PEES
|
||||
=========
|
||||
|
||||
[definition]: /url 'title'
|
||||
[definition
|
||||
2
|
||||
]:
|
||||
/long_url_with_varying_stuff
|
||||
(title)
|
||||
|
||||
> COME ON AND SNIFF THE PAINT
|
||||
>
|
||||
> WITH MEEE
|
||||
> > OH THAT IS SO CUUL
|
||||
> OH THERE'S BLOOD IN MY STOOL
|
||||
> AAAAA IT HURTS
|
||||
>
|
||||
> > WHEN I
|
||||
> PEEEEEEE
|
||||
|
||||
PIIS
|
||||
==========
|
||||
|
||||
but does it end here?
|
||||
> COCK
|
||||
> < PENIS
|
||||
> < > AMONGUS
|
||||
> < CONTINUATION
|
||||
> > BREAKER
|
||||
COCK
|
||||
|
||||
+ Plus block opens
|
||||
and continues.
|
||||
|
||||
This is the next paragraph of a plus block,
|
||||
and this is a continuation line in the block
|
||||
+ This thing continues the outer block and has a plus sign still.
|
||||
next part
|
||||
- SIMPS LMAO
|
||||
continuation
|
||||
|
||||
This by the way should continue the
|
||||
block but should be a separate
|
||||
paragraph
|
||||
- Next shit
|
||||
|
||||
> INCLUDING INNER QUOTES BY THE WAY
|
||||
WITH INNER PARAGRAPH FALL OFF!!!
|
||||
|
||||
also a paragraph inside this thing
|
||||
|
||||
- BUT CAN WE GET EVEN STUPIDER?????
|
||||
|
||||
> YES WE CAN!!!!
|
||||
|
||||
- Another element
|
||||
|
||||
NOW it breaks
|
||||
1. FREDDY FAZBER???
|
||||
HARHAR HAR HAR HAR
|
||||
HAR HAR HARHAR
|
||||
|
||||
HOLY SHITTO FREDDY FASTBER???
|
||||
AR AR HARHAR HAR
|
||||
HURHURHURHUR
|
||||
|
||||
2. fast
|
||||
ber
|
||||
10. BIG
|
||||
still the same OLblock
|
||||
11) OK NOW THIS IS EBIN
|
||||
different block
|
||||
12930192) THIS still continues because idk why really
|
||||
lmao
|
||||
|
||||
> QUONT PARGRAP
|
||||
WHAT THEF UCK BASSBOOSTED
|
||||
|
||||
>```fencedcode block infoline (up to interpretation)
|
||||
> #THIS should have a very specific structure, not modified by anything
|
||||
>
|
||||
> int main() {
|
||||
> int i = 1;
|
||||
> if (i > 0) {
|
||||
> printf("anus\\n");
|
||||
> }
|
||||
> return 0;
|
||||
> }
|
||||
>```
|
||||
|
||||
Also code block
|
||||
|
||||
Hello mario
|
||||
|
||||
also these should continue so that's a thing
|
||||
|
||||
- Thematic break test
|
||||
- - - - - - - - - - - - - - - - - - - - -
|
||||
- Above should be a thematic break, not a list containing a thematic break
|
||||
|
||||
but what if
|
||||
--------------
|
||||
WRONG????
|
||||
|
||||
aaa
|
||||
bbb
|
||||
ccc
|
||||
|
||||
now it's time to CUHHHMMMMMMM
|
||||
- <amongus:thisis_an_autolink>
|
||||
- <amongus:but this isn't>
|
||||
- <peeee:nis> peepee <peee:peeeeeeinis> Pe
|
||||
- `cum on <` hogogwagarts ><cum:on`>hogogwagarts`
|
||||
- ``` test `should work tho `` and this should be continued` ````
|
||||
- \\<amongus:bumpalumpa>
|
||||
- `` \\<cum:amongus> ```
|
||||
- \\```amongus``
|
||||
- ``amongus``\\`
|
||||
- 
|
||||
- moretests)after
|
||||
- more tests  after
|
||||
- more tests  'valid') after
|
||||
- next test
|
||||

|
||||
amongus
|
||||
- 
|
||||
- [outer](/poopoo 'AAAAAA')
|
||||
- [amongus][definition]
|
||||
- *emphasis on multiple words*
|
||||
- **strong emphasis on multiple words**
|
||||
- infix**emphasis**block
|
||||
- no_infix_empahsis
|
||||
- _emphasis_
|
||||
- __strong emphasis__
|
||||
- __nested __strong__ emphasis__
|
||||
- __(__this__)__
|
||||
- *among us*** ***vr*
|
||||
- *among **us*vr****
|
||||
- *among **us *vr****
|
||||
- *among**us*
|
||||
- [*outer*](/poopoo 'AAAAAA')
|
||||
DOC
|
||||
|
||||
structure = PointBlank::DOM::Document.parse(doc)
|
||||
def red(string)
|
||||
"\033[31m#{string}\033[0m"
|
||||
end
|
||||
def yellow(string)
|
||||
"\033[33m#{string}\033[0m"
|
||||
end
|
||||
|
||||
def prettyprint(doc, indent = 0)
|
||||
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/, ''))}: "\
|
||||
"#{doc.content.inspect} "\
|
||||
"#{doc.properties.empty? ? '' : red(doc.properties.inspect)}"
|
||||
doc.children.each do |child|
|
||||
print red("#{' ' * indent} - ")
|
||||
prettyprint(child, indent + 4)
|
||||
end
|
||||
end
|
||||
prettyprint(structure)
|
Loading…
Reference in New Issue