extra minute details regarding proper parsing

This commit is contained in:
Yessiest 2025-03-01 21:51:08 +00:00
parent 1a9dd30112
commit af93de6f4d
5 changed files with 75 additions and 1031 deletions

View File

@ -1,5 +1,7 @@
# frozen_string_literal: true
# Modular, extensible approach to parsing markdown as close as
# it gets to CommonMark spec (as of version 0.31.2).
module PointBlank
module Parsing
module LinkSharedMethods
@ -112,8 +114,10 @@ module PointBlank
def read_properties(text)
properties = {}
remaining = text
warn text.inspect
if text.start_with? '[' # link label
properties[:label], remaining = read_return_label(remaining)
close_bracket = false
elsif text.start_with? '(' # link properties
destination, remaining = read_destination(remaining[1..])
return [nil, text] unless destination
@ -121,11 +125,12 @@ module PointBlank
title, remaining = read_title(remaining)
properties[:destination] = destination
properties[:title] = title
close_bracket = true
end
if properties.empty? || !remaining.start_with?(')')
if properties.empty? || (close_bracket && !remaining.start_with?(')'))
[nil, text]
else
[properties, remaining[1..]]
[properties, close_bracket ? remaining[1..] : remaining]
end
end
@ -651,12 +656,10 @@ module PointBlank
end
# Fenced code block
# (TODO: This needs ~~~ as alternative to ticks,
# and proper relative indentation)
class FencedCodeBlock < NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(line)
line.start_with?(/\A {0,3}```[^`]+$/)
line.start_with?(/\A {0,3}(?:`{3,}[^`]+$|~{3,}[^~]+$)/)
end
# (see ::PointBlank::Parsing::NullParser#applyprops)
@ -669,7 +672,7 @@ module PointBlank
return [nil, false] if @closed
try_close(line)
push(line) if @open && !@closed
push(line.gsub(/^ {0,#{@space}}/, '')) if @open && !@closed
self.open(line)
["", false]
end
@ -677,13 +680,15 @@ module PointBlank
private
def try_close(line)
@closed = true if @open && line.match?(/\A {0,3}```/)
@closed = true if @open && line.match?(/\A {0,3}#{@marker}+$/)
end
def open(line)
return if @open
@infoline = line.match(/\A {0,3}```(.*)/)[1]
s, m, @infoline = line.match(/\A( {0,3})(`{3,}|~{3,})(.*)/)[1..3]
@space = s.length
@marker = m
@open = true
end
end
@ -872,7 +877,9 @@ module PointBlank
# @return [::PointBlank::DOM::Text]
def construct_text(string)
obj = ::PointBlank::DOM::Text.new
obj.content = string
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1')
obj.content = string.strip
obj
end
@ -1031,6 +1038,17 @@ module PointBlank
# @param string [String]
# @return [::PointBlank::DOM::Text]
def self.construct_text(string)
obj = ::PointBlank::DOM::Text.new
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1')
obj.content = string
obj
end
# Construct text literal for a string
# @param string [String]
# @return [::PointBlank::DOM::Text]
def self.construct_literal(string)
obj = ::PointBlank::DOM::Text.new
obj.content = string
obj
@ -1085,7 +1103,7 @@ module PointBlank
break (cutoff = idx) if part.first == opening &&
part.last == :close
end
buffer = buffer[opening.length..(-1 - opening.length)]
buffer = construct_literal(buffer[opening.length..(-1 - opening.length)])
[cutoff.positive? ? build([buffer]) : opening, parts[(cutoff + 1)..]]
end
end
@ -1327,6 +1345,25 @@ module PointBlank
obj
end
end
# Hard break
class HardBreakInline < NullInline
# (see ::PointBlank::Parsing::NullInline#tokenize)
def self.tokenize(string)
iterate_tokens(string, /(?: \n|\\\n)/) do |_before, token, matched|
next ["\n", self, :close] if token == " \\n"
next ["\n", self, :close] if matched
" "
end
end
# (see ::PointBlank::Parsing::NullInline#reverse_walk)
def self.reverse_walk(backlog)
backlog[-1] = build([])
backlog
end
end
end
# Domain object model elements
@ -1509,6 +1546,16 @@ module PointBlank
class InlinePre < DOMObject
define_parser ::PointBlank::Parsing::CodeInline
end
# Linebreak
class InlineBreak < DOMObject
define_parser ::PointBlank::Parsing::HardBreakInline
end
# Autolink
class InlineAutolink < DOMObject
define_parser ::PointBlank::Parsing::AutolinkInline
end
# Infline formattable text
class InlineFormattable < DOMObject
@ -1518,6 +1565,7 @@ module PointBlank
class InlineImage < InlineFormattable
define_parser ::PointBlank::Parsing::ImageInline
define_child ::PointBlank::DOM::InlinePre, 4000
define_child ::PointBlank::DOM::InlineBreak, 9999
## that would be really funny lmao
# define_child ::PointBlank::DOM::InlineImage
end
@ -1527,19 +1575,11 @@ module PointBlank
define_parser ::PointBlank::Parsing::LinkInline
define_child ::PointBlank::DOM::InlinePre, 4000
define_child ::PointBlank::DOM::InlineImage, 5000
define_child ::PointBlank::DOM::InlineBreak, 9999
## idk if this makes sense honestly
# define_child ::PointBlank::DOM::InlineAutolink
end
# Linebreak
class InlineBreak < DOMObject
end
# Autolink
class InlineAutolink < DOMObject
define_parser ::PointBlank::Parsing::AutolinkInline
end
# Inline root
class InlineRoot < DOMObject
define_scanner ::PointBlank::Parsing::StackScanner
@ -1547,6 +1587,7 @@ module PointBlank
define_child ::PointBlank::DOM::InlineAutolink, 4000
define_child ::PointBlank::DOM::InlineImage, 5000
define_child ::PointBlank::DOM::InlineLink, 6000
define_child ::PointBlank::DOM::InlineBreak, 9999
end
# Strong emphasis
@ -1587,6 +1628,19 @@ module PointBlank
# Leaf block (virtual)
class LeafBlock < DOMObject
# Virtual hook to push inlines in place of leaf blocks
def parse_inner
child = ::PointBlank::DOM::InlineRoot.new
child.content = content
scanner = ::PointBlank::Parsing::StackScanner.new(child)
scanner.scan
self.content = ""
child.each { |c| append_child(c) }
end
end
# Leaf literal block (virtual)
class LeafLiteralBlock < LeafBlock
# Virtual hook to push inlines in place of leaf blocks
def parse_inner
child = ::PointBlank::DOM::Text.new
@ -1601,10 +1655,6 @@ module PointBlank
# Paragraph in a document (separated by 2 newlines)
class Paragraph < DOMObject
class << self
# Define an overlay
end
define_parser ::PointBlank::Parsing::ParagraphParser
define_overlay ::PointBlank::Parsing::ParagraphUnderlineOverlay, 0
define_overlay ::PointBlank::Parsing::LinkReferenceOverlay
@ -1661,7 +1711,7 @@ module PointBlank
end
# Preformatted fenced code block
class CodeBlock < LeafBlock
class CodeBlock < LeafLiteralBlock
define_parser ::PointBlank::Parsing::FencedCodeBlock
end
@ -1669,10 +1719,6 @@ module PointBlank
class QuoteBlock < Block
end
# Table
class TableBlock < DOMObject
end
# Unordered list element
class ULListElement < Block
end
@ -1696,7 +1742,7 @@ module PointBlank
end
# Indent block
class IndentBlock < LeafBlock
class IndentBlock < LeafLiteralBlock
define_parser ::PointBlank::Parsing::IndentedBlock
end

View File

@ -1,786 +0,0 @@
# frozen_string_literal: true
module RBMark
# Module for representing parsing-related constructs
module Parsing
# Abstract scanner interface implementation
class Scanner
def initialize
@variants = []
end
# Scan text
# @param text [String]
# @return [Array<RBMark::DOM::DOMObject>]
def scan(_text)
raise StandardError, "Abstract method called"
# ...
end
attr_accessor :variants
end
# Line-level scanner for blocks
class LineScanner < Scanner
# (see ::RBMark::Parsing::Scanner#scan)
def scan(text, buffer: "", blocks: [], mode: nil)
prepare
lines = text.lines
lines.each_with_index do |line, index|
buffer += line
ahead = lines.fetch(index + 1, nil)
blocks, buffer, mode = try_begin(line,
blocks,
buffer,
mode,
lookahead: ahead)
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
blocks, buffer, mode = flush(blocks, buffer, mode)
end
end
flush(blocks, buffer, mode)[0]
end
# Predict mode for given line
# @param line [String]
# @return [Object]
def select_mode(line, **message)
@variants.find do |variant|
variant[0].begin?(line, **message)
end&.at(0)
end
private
# Attempt to open a new mode and, if possible, call :begin to prepare the block
def try_begin(line, blocks, buffer, mode, lookahead: nil)
return blocks, buffer, mode if mode
mode = select_mode(line, lookahead: lookahead,
blocks: blocks,
buffer: buffer)
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
[blocks, buffer, mode]
end
# Assign self as parent to all variants
# @return [void]
def prepare
@variants.each do |variant|
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
raise StandardError, "#{variant} is not a BlockVariant"
end
variant[0].parent = self
end
@variants.sort_by!(&:last)
end
# Flush the buffer using given mode
# @param blocks [Array<RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [Object]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
def flush(blocks, buffer, mode)
return blocks, "" if buffer == ""
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
if mode.respond_to?(:restructure)
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
else
buffer = ""
mode = nil
end
[blocks, buffer, mode]
end
end
# Abstract variant interface
class Variant
end
# Abstract block-level variant
class BlockVariant < Variant
# Check if a block begins on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def begin?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# Check if a block ends on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def end?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# @!method begin(buffer)
# Open a block to be later filled in by BlockVariant#end
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method end(block, buffer)
# Finalize a block opened by begin
# @param buffer [String]
# @return [void]
# @!method flush(buffer)
# Flush buffer and create a new DOM object
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method restructure(blocks, buffer, mode)
# Restructure current set of blocks (if method is defined)
# @param blocks [Array<::RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [::RBMark::Parsing::Variant]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
attr_accessor :parent
end
# Paragraph breaking variant
class BreakerVariant < BlockVariant
# Check that a paragraph matches the breaker
# @param buffer [String]
# @return [Boolean]
def match?(_buffer)
raise StandardError, "Abstract method called"
end
# Process a paragrpah
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
def process(_buffer)
raise StandardError, "Abstract method called"
end
# @!method preprocess(buffer)
# preprocess buffer
# @param buffer [String]
# @return [String]
end
# Paragraph replacing variant
class ModifierVariant < BlockVariant
# Check that a buffer matches requirements of the modifier
# @param buffer [String]
# @return [Class, nil]
def match?(_buffer)
raise StandardError, "Abstract method called"
end
end
# Paragraph variant
class ParagraphVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/\S/)
end
# (see BlockVariant#end?)
def end?(line, lookahead: nil, **_opts)
return true if check_paragraph_breakers(line)
if lookahead
return false if check_paragraph_breakers(lookahead)
return false if lookahead.match(/^ {4}/)
!parent.select_mode(lookahead).is_a?(self.class)
else
true
end
end
# (see BlockVariant#flush)
# @sg-ignore
def flush(buffer)
obj = ::RBMark::DOM::Paragraph.new
obj.content = buffer
obj
end
# (see BlockVariant#restructure)
def restructure(blocks, _buffer, _mode)
p_buffer = blocks.last.content
if (block = do_breakers(p_buffer))
blocks[-1] = block
else
unless (blocks, _buffer, _mode = do_modifiers(blocks, p_buffer))
blocks[-1] = ::RBMark::DOM::Paragraph.parse(p_buffer)
end
end
[blocks, "", nil]
end
private
def do_modifiers(blocks, buffer)
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::ModifierVariant) &&
x[0].match?(buffer)
end&.first
breaker&.restructure(blocks, buffer, nil) || [blocks, buffer, nil]
end
def do_breakers(buffer)
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
x[0].match?(buffer)
end&.first
breaker&.process(buffer)
end
def check_paragraph_breakers(line)
breakers = parent.variants.filter_map do |x|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
end
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
end
end
# Thematic break variant
class ThematicBreakVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
line.match?(/^ {0,3}[-_*]/) &&
(
line.count("-") >= 3 ||
line.count("_") >= 3 ||
line.count("*") >= 3
)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(_buffer)
::RBMark::DOM::HorizontalRule.new
end
end
# ATX Heading variant
class ATXHeadingVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(buffer)
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
content = content.gsub(/( #+|)$/, "")
heading(lvl).parse(content.strip)
end
private
def heading(lvl)
case lvl.length
when 1 then ::RBMark::DOM::Heading1
when 2 then ::RBMark::DOM::Heading2
when 3 then ::RBMark::DOM::Heading3
when 4 then ::RBMark::DOM::Heading4
when 5 then ::RBMark::DOM::Heading5
when 6 then ::RBMark::DOM::Heading6
end
end
end
# Paragraph closing variant
class BlankSeparator < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match?(_buffer)
false
end
# (see BlockVariant#restructure)
def restructure(blocks, _buffer, _mode)
blocks.last.properties[:closed] = true if blocks.last
[blocks, "", nil]
end
end
# Setext heading variant
class SetextHeadingVariant < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}(?:-+|=+) *$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match?(buffer)
return nil unless preprocess(buffer).match(/\S/)
!heading(buffer.lines.last).nil?
end
# (see BreakerVariant#process)
def process(buffer)
heading = heading(buffer.lines.last)
buffer = preprocess(buffer)
heading.parse(buffer)
end
private
def preprocess(buffer)
buffer.lines[..-2].join
end
def heading(buffer)
case buffer
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
end
end
end
# Indented literal block variant
class IndentedBlockVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?: {4}|\t)/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
end
# (see BlockVariant#flush)
def flush(buffer)
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
block = ::RBMark::DOM::IndentBlock.new
block.content = text # TODO: replace this with inline text
block
end
end
# Fenced code block
class FencedCodeBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
end
# (see BlockVariant#end?)
def end?(line, blocks: nil, buffer: nil, **_opts)
buffer.lines.length > 1 and
line.strip == blocks.last.properties[:expected_closer]
end
# (see BlockVariant#begin)
def begin(buffer)
block = ::RBMark::DOM::CodeBlock.new
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
.strip
block
end
# (see BlockVariant#end)
def end(block, buffer)
# TODO: replace this with inline text
block.properties.delete(:expected_closer)
block.content = buffer.lines[1..-2].join
end
end
# Quote block
class QuoteBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}(?:>|> .*)$/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!(lookahead && lookahead.match?(/^ {0,3}(?:>|> .*)$/))
end
# (see BlockVariant#flush)
def flush(buffer)
buffer = buffer.lines.map do |line|
line.gsub(/^ {0,3}> ?/, '')
end.join
::RBMark::DOM::QuoteBlock.parse(buffer)
end
end
end
# Module for representing abstract object hierarchy
module DOM
# Abstract container
class DOMObject
class << self
# Hook for initializing variables
# @param subclass [Class]
def inherited(subclass)
super
@subclasses ||= []
@subclasses.append(subclass)
subclass.variants = @variants.dup
subclass.variants ||= []
subclass.atomic_mode = @atomic_mode
subclass.scanner_class = @scanner_class
end
# Add potential sub-element variant
# @param cls [Class] DOMObject subclass
def variant(cls, prio: 1)
unless cls < ::RBMark::Parsing::Variant
raise StandardError, "#{cls} is not a DOMObject subclass"
end
@variants.append([cls, prio])
@subclasses&.each do |subclass|
subclass.variant(cls, prio)
end
end
# Set scanner class
# @param cls [Class] DOMObject subclass
def scanner(cls)
unless cls < ::RBMark::Parsing::Scanner
raise StandardError, "#{cls} is not a Scanner subclass"
end
@scanner_class = cls
@subclasses&.each do |subclass|
subclass.scanner(cls)
end
end
# Prepare scanner and variants
# @return [void]
def prepare
return if @prepared
@scanner = @scanner_class.new
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
end
# Parse text from the given context
# @param text [String]
# @return [self]
def parse(text)
prepare unless @atomic_mode
instance = new
if @atomic_mode
instance.content = text
else
instance.append(*@scanner.scan(text))
end
instance
end
# Create a new instance of class or referenced class
# @return [self, Class]
def create
if @alt_for
@alt_for.new
else
new
end
end
# Set the atomic flag
# @return [void]
def atomic
@atomic_mode = true
end
# Set the block continuation flag
# @return [void]
def block
@block_mode = true
end
# Allow the block to be empty
# @return [void]
def empty
@permit_empty = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode,
:block_mode, :permit_empty
end
def initialize
@content = nil
@children = []
@properties = {}
end
# Set certain property in the properties hash
# @param properties [Hash] proeprties to update
def property(**properties)
@properties.update(**properties)
end
# Add child to container
# @param child [DOMObject]
def append(*children)
unless children.all? { |x| x.is_a? DOMObject }
raise StandardError, "one of #{children.inspect} is not a DOMObject"
end
@children.append(*children)
end
# Insert a child into the container
# @param child [DOMObject]
# @param index [Integer]
def insert(index, child)
raise StandardError, "not a DOMObject" unless child.is_a? DOMObject
@children.insert(index, child)
end
# Delete a child from container
# @param index [Integer]
def delete_at(index)
@children.delete_at(index)
end
# Get a child from the container
# @param key [Integer]
def [](key)
@children[key]
end
# Set text content of a DOMObject
# @param text [String]
def content=(text)
raise StandardError, "not a String" unless text.is_a? String
@content = text
end
# Get text content of a DOMObject
# @return [String, nil]
attr_reader :content, :children, :properties
end
# Inline text
class Text < DOMObject
end
# Inline preformatted text
class InlinePre < DOMObject
end
# Infline formattable text
class InlineFormattable < DOMObject
atomic
end
# Bold text
class InlineBold < InlineFormattable
end
# Italics text
class InlineItalics < InlineFormattable
end
# Inline italics text (alternative)
class InlineAltItalics < InlineFormattable
end
# Underline text
class InlineUnder < InlineFormattable
end
# Strikethrough text
class InlineStrike < InlineFormattable
end
# Hyperreferenced text
class InlineLink < InlineFormattable
end
# Image
class InlineImage < InlinePre
end
# Linebreak
class InlineBreak < DOMObject
end
# Block root
class Block < DOMObject
scanner ::RBMark::Parsing::LineScanner
variant ::RBMark::Parsing::ATXHeadingVariant, prio: 100
variant ::RBMark::Parsing::ThematicBreakVariant, prio: 200
variant ::RBMark::Parsing::SetextHeadingVariant, prio: 300
variant ::RBMark::Parsing::IndentedBlockVariant, prio: 400
variant ::RBMark::Parsing::FencedCodeBlock, prio: 500
variant ::RBMark::Parsing::QuoteBlock, prio: 600
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
end
# Document root
class Document < Block
class << self
# (see ::RBMark::DOM::DOMObject#parse)
def parse(text)
cleanup(merge(super))
end
private
# Clean up empty elements
# @param doc [::RBMark::DOM::Document]
# @return [::RBMark::DOM::Document]
def cleanup(doc)
_cleanup(doc)
doc
end
# Merge open paragraphs upwards
# @param doc [::RBMark::DOM::Document]
# @return [::RBMark::DOM::Document]
def merge(doc)
_merge(doc)
doc
end
# A function to merge children upward
def _merge_step(child, stack, depth)
stack
end
# Merge nested block constructs upwards
# @param doc [::RBMark::DOM::DOMObject]
# @return [void]
def _merge(doc, stack = [], depth = 0)
stack.append(doc) if stack.length <= depth
doc.children.each do |child|
stack = _merge_step(child, stack, depth)
if child.class.block_mode and child.children.length.positive?
_merge(child, stack, depth + 1)
end
end
end
# Recursively descend through hierarchy and delete empty elements
# @param doc [::RBMark::DOM::DOMObject]
# @return [Integer]
def _cleanup(doc)
size = 0
doc.children.delete_if do |child|
subsize = 0
subsize += _cleanup(child) if child.children.length.positive?
subsize += child.content&.strip&.length || 0
size += subsize
subsize.zero? && !child.class.permit_empty
end
size
end
end
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
atomic
end
# Heading level 1
class Heading1 < InlineFormattable
end
# Heading level 2
class Heading2 < Heading1
end
# Heading level 3
class Heading3 < Heading1
end
# Heading level 4
class Heading4 < Heading1
end
# Heading level 5
class Heading5 < Heading1
end
# Heading level 6
class Heading6 < Heading1
end
# Preformatted code block
class CodeBlock < DOMObject
end
# Quote block
class QuoteBlock < Block
block
end
# Table
class TableBlock < DOMObject
end
# List element
class ListElement < Block
end
# Unordered list
class ULBlock < DOMObject
end
# Ordered list block
class OLBlock < DOMObject
end
# Indent block
class IndentBlock < DOMObject
end
# Horizontal rule
class HorizontalRule < DOMObject
atomic
empty
end
end
end

View File

@ -1,21 +0,0 @@
# frozen_string_literal: true
require_relative 'rbmark'
structure = RBMark::DOM::Document.parse(File.read("example.md"))
def red(string)
"\033[31m#{string}\033[0m"
end
def yellow(string)
"\033[33m#{string}\033[0m"
end
def prettyprint(doc, indent = 0)
closed = doc.properties[:closed]
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/,""))}#{red(closed ? "(c)" : "")}: #{doc.content.inspect}"
doc.children.each do |child|
print red("#{" " * indent} - ")
prettyprint(child, indent + 4)
end
end
prettyprint(structure)

View File

@ -1,15 +0,0 @@
require_relative 'blankshell'
pp PointBlank::DOM::Document.parse(<<DOC)
Penis
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
> pee
> > 2 pee
> peepee
> > 3 pee
> > 4 pee
bee
> # IT'S HIP
> BEES
> > FUCK
BEES
DOC

View File

@ -1,180 +0,0 @@
# frozen_string_literal: true
require_relative 'blankshell'
doc = <<~DOC
Penis
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
> pee
> > 2 pee
> peepee
and you cum now
> > 3 pee
> > 4 pee
bee
# IT'S HIP
> # IT'S HIP
> BEES
> > FUCK
BEES
PEES
=========
[definition]: /url 'title'
[definition
2
]:
/long_url_with_varying_stuff
(title)
> COME ON AND SNIFF THE PAINT
>
> WITH MEEE
> > OH THAT IS SO CUUL
> OH THERE'S BLOOD IN MY STOOL
> AAAAA IT HURTS
>
> > WHEN I
> PEEEEEEE
PIIS
==========
but does it end here?
> COCK
> < PENIS
> < > AMONGUS
> < CONTINUATION
> > BREAKER
COCK
+ Plus block opens
and continues.
This is the next paragraph of a plus block,
and this is a continuation line in the block
+ This thing continues the outer block and has a plus sign still.
next part
- SIMPS LMAO
continuation
This by the way should continue the
block but should be a separate
paragraph
- Next shit
> INCLUDING INNER QUOTES BY THE WAY
WITH INNER PARAGRAPH FALL OFF!!!
also a paragraph inside this thing
- BUT CAN WE GET EVEN STUPIDER?????
> YES WE CAN!!!!
- Another element
NOW it breaks
1. FREDDY FAZBER???
HARHAR HAR HAR HAR
HAR HAR HARHAR
HOLY SHITTO FREDDY FASTBER???
AR AR HARHAR HAR
HURHURHURHUR
2. fast
ber
10. BIG
still the same OLblock
11) OK NOW THIS IS EBIN
different block
12930192) THIS still continues because idk why really
lmao
> QUONT PARGRAP
WHAT THEF UCK BASSBOOSTED
>```fencedcode block infoline (up to interpretation)
> #THIS should have a very specific structure, not modified by anything
>
> int main() {
> int i = 1;
> if (i > 0) {
> printf("anus\\n");
> }
> return 0;
> }
>```
Also code block
Hello mario
also these should continue so that's a thing
- Thematic break test
- - - - - - - - - - - - - - - - - - - - -
- Above should be a thematic break, not a list containing a thematic break
but what if
--------------
WRONG????
aaa
bbb
ccc
now it's time to CUHHHMMMMMMM
- <amongus:thisis_an_autolink>
- <amongus:but this isn't>
- <peeee:nis> peepee <peee:peeeeeeinis> Pe
- `cum on <` hogogwagarts ><cum:on`>hogogwagarts`
- ``` test `should work tho `` and this should be continued` ````
- \\<amongus:bumpalumpa>
- `` \\<cum:amongus> ```
- \\```amongus``
- ``amongus``\\`
- ![image](/test.jpg 'title')
- moretests![image](/test.jpg (title))after
- more tests ![image](/invalid(link 'valid') after
- more tests ![image](/valid(link) 'valid') after
- next test
![image `inner block` etc](/should_be_valid "should be valid")
amongus
- ![image `this shouldn't be allowed to be an image](/shouldn't be valid `technicallynotatitle`)
- [outer![inner](/AAAAAA 'peepee')](/poopoo 'AAAAAA')
- [amongus][definition]
- *emphasis on multiple words*
- **strong emphasis on multiple words**
- infix**emphasis**block
- no_infix_empahsis
- _emphasis_
- __strong emphasis__
- __nested __strong__ emphasis__
- __(__this__)__
- *among us*** ***vr*
- *among **us*vr****
- *among **us *vr****
- *among**us*
- [*outer*![****inner****](/AAAAAA 'peepee')](/poopoo 'AAAAAA')
DOC
structure = PointBlank::DOM::Document.parse(doc)
def red(string)
"\033[31m#{string}\033[0m"
end
def yellow(string)
"\033[33m#{string}\033[0m"
end
def prettyprint(doc, indent = 0)
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/, ''))}: "\
"#{doc.content.inspect} "\
"#{doc.properties.empty? ? '' : red(doc.properties.inspect)}"
doc.children.each do |child|
print red("#{' ' * indent} - ")
prettyprint(child, indent + 4)
end
end
prettyprint(structure)