This commit is contained in:
Yessiest 2025-01-24 22:07:17 +00:00
parent 40e9144010
commit 468bd043ca
5 changed files with 907 additions and 30 deletions

630
lib/blankshell.rb Normal file
View File

@ -0,0 +1,630 @@
# frozen_string_literal: true
module PointBlank
module Parsing
class LineScanner
def initialize(text, doc)
@text = text
@document = doc
@stack = [@document]
@depth = 0
@topdepth = 0
end
# Scan document and return scanned structure
def scan
@text.each_line do |line|
# Consume markers from lines to keep the levels open
line = consume_markers(line)
# DO NOT RHEDEEM line if it's empty
line = line&.strip&.empty? ? nil : line
# Open up a new block on the line out of all allowed child types
while line && (status, line = try_open(line)) && status; end
end
close_up(0)
@stack.first
end
private
# Try to open a new block on the line
def try_open(line)
return [false, line] unless topclass.parser && line
topclass.valid_children.each do |cand|
next unless cand.parser.begin?(line)
@depth += 1
@topdepth = @depth if @topdepth < @depth
@stack[@depth] = cand.new
@stack[@depth - 1].append_child(toplevel)
toplevel.parser = cand.parser.new
line, _implicit = toplevel.parser.consume(line, @stack[@depth - 1])
return [true, line]
end
[false, line]
end
# Attempt to consume markers for all valid stack elements
def consume_markers(line)
climb = -1
previous = nil
implicit = nil
@stack[..@depth].each do |element|
newline, impl = element.parser.consume(line, previous)
implicit = impl unless impl.nil?
line = newline if newline
break unless newline
climb += 1
previous = element
end
if climb < @depth
if implicit && @stack[@topdepth].is_a?(::PointBlank::DOM::Paragraph)
backref = @stack[@topdepth]
remaining, = backref.parser.consume(line, previous, lazy: true)
return nil if remaining
end
close_up(climb)
end
line
end
# Close upper levels than picked level
def close_up(level)
((level + 1)..(@stack.length - 1)).each do |index|
x = @stack[index]
switch = x.parser.close
x.content = x.parser.parsed_content
x.parser.applyprops(x) if x.parser.respond_to? :applyprops
x.parser = nil
x = transfer(x, switch) if switch
x.parse_inner if x.respond_to? :parse_inner
end
@topdepth = @depth = level
@stack = @stack[..level]
end
# Transfer data from class to another class (morph class)
def transfer(block, switchclass)
newblock = switchclass.new
newblock.content = block.content
newblock.parser = nil
block.parent[block.position] = newblock
newblock
end
# Get top level element at the current moment
def toplevel
@stack[@depth]
end
# Get top level element class
def topclass
@stack[@depth].class
end
# Debug ifno
def debug(line)
warn "#{@depth}:#{@topdepth} #{line.inspect}"
end
end
# Null parser
class NullParser
# Check that a parser parses this line as a beginning of a block
# @param line [String]
# @return [Boolean]
def self.begin?(_line)
false
end
# Instantiate a new parser object
def initialize
@buffer = []
end
# Close parser
# @return [nil, Class]
def close; end
# Return parsed content
# @return [String]
def parsed_content
@buffer.join(" ")
end
# Consume line markers
# @param line [String]
# @return [Array(String, Boolean)]
def consume(line, _parent = nil, **_hargs)
[line, false]
end
private
# Push a new parsed line
# @param line [String]
# @return [void]
def push(line)
@buffer.append(line)
end
end
# Paragraph parser
class ParagraphParser < NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(_line)
true
end
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, parent = nil, lazy: false)
return [nil, nil] if line.match?(/\A {0,3}\Z/)
return ["", nil] if check_underlines(line, parent, lazy)
return [nil, nil] if check_candidates(line, parent)
return [nil, nil] if @closed
push(line)
["", nil]
end
# (see ::PointBlank::Parsing::NullParser#close)
def close
@next_class if @closed and @next_class
end
private
# Check if the current line is an underline (morphs class)
def check_underlines(line, _parent, lazy)
return false if lazy
::PointBlank::DOM::Paragraph.valid_children.each do |underline|
next unless underline.parser.begin? line
@next_class = underline
@closed = true
return true
end
false
end
# Check that there are no other candidates for line beginning
def check_candidates(line, parent)
return false unless parent
other = parent.class.valid_children.filter do |x|
x != ::PointBlank::DOM::Paragraph
end
other.any? do |x|
x.parser.begin? line
end
end
end
# ATX heading
# @abstract
class ATXParser < NullParser
class << self
attr_accessor :level
# (see ::PointBlank::Parsing::NullParser#begin?)
def begin?(line)
line.match?(/^ {0,3}\#{#{@level}}(?: .*|)$/)
end
end
def initialize
super
@matched = false
end
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, _parent, **_hargs)
return [nil, false] if @matched
@matched = true
push(line
.gsub(/\A {0,3}\#{#{self.class.level}} */, '')
.gsub(/( #+|)\Z/, ''))
[line, false]
end
end
# ATX heading level 1
class ATXParserLV1 < ATXParser
self.level = 1
end
# ATX heading level 2
class ATXParserLV2 < ATXParser
self.level = 2
end
# ATX heading level 3
class ATXParserLV3 < ATXParser
self.level = 3
end
# ATX heading level 4
class ATXParserLV4 < ATXParser
self.level = 4
end
# ATX heading level 5
class ATXParserLV5 < ATXParser
self.level = 5
end
# ATX heading level 6
class ATXParserLV6 < ATXParser
self.level = 6
end
# Underline parser
# @abstract
class UnderlineParser < NullParser
# Checks whether a paragraph underline is on this line.
# Should match an entire underline.
# @param line [String]
# @return [boolean]
def self.begin?(_line)
false
end
end
# Setext parser level 1
class SetextParserLV1 < UnderlineParser
# (see ::PointBlank::Parsing::UnderlineParser)
def self.begin?(line)
line.match?(/\A {0,3}={3,}\s*\z/)
end
end
# Setext parser level 2
class SetextParserLV2 < UnderlineParser
# (see ::PointBlank::Parsing::UnderlineParser)
def self.begin?(line)
line.match?(/\A {0,3}-{3,}\s*\z/)
end
end
# Unordered list block (group)
class ULParser < NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(line)
@marker, @offset = line.match(/\A {0,3}([-+*])(\S+)/)&.captures
true if @marker
end
# (see ::PointBlank::Parsing::NullParser#close)
def applyprops(block)
block.each do |child|
child.properties["marker"] = @marker
end
end
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, _parent = nil, **_hargs)
return [nil, true] unless continues?(line)
[line.lstrip.delete_prefix("@marker").lstrip, true]
end
private
# Check if a line continues this ULParser block
def continues?(line)
line.start_with?(/\A(?: {0,3}#{@marker}| )#{@offset}/)
end
end
# Quote block
class QuoteParser < NullParser
# (see ::PointBlank::Parsing::NullParser#begin?)
def self.begin?(line)
line.start_with?(/\A {0,3}>(?: \S|)/)
end
# (see ::PointBlank::Parsing::NullParser#consume)
def consume(line, _parent = nil, **_hargs)
return [nil, true] unless line.start_with?(/\A {0,3}>(?: \S|)/)
[line.lstrip.delete_prefix('>').lstrip, true]
end
end
end
module DOM
class DOMError < StandardError; end
# DOM Object
class DOMObject
class << self
# Make subclasses inherit scanner and valid children
def inherited(subclass)
subclass.parser ||= @parser
subclass.scanner ||= @scanner
subclass.unsorted_children ||= @unsorted_children.dup || []
super(subclass)
end
# Sort children by priority
# @return [void]
def sort_children
@valid_children = @unsorted_children.sort_by(&:last).map(&:first)
end
# Define valid child for this DOMObject class
# @param child [Class]
# @return [void]
def define_child(child, priority = 9999)
@unsorted_children ||= []
@unsorted_children.append([child, priority])
end
# Define child element scanner for this DOMObject class
# @param child [Class]
# @return [void]
def define_scanner(scanner)
@scanner = scanner
end
# Define self parser for this DOMObject class
# @param child [::PointBlank::Parsing::NullParser]
# @return [void]
def define_parser(parser)
@parser = parser
end
# Define if this DOMObject class is overflowable
# @return [void]
def enable_overflow
@overflow = true
end
# Parse a document
# @return [self]
def parse(doc)
newdoc = new
newdoc.parser = parser.new
scan = @scanner.new(doc, newdoc)
scan.scan
end
# Source parameters from parent (fixes recursive dependency)
def upsource
superclass&.tap do |sc|
@scanner = sc.scanner
@parser = sc.parser
@unsorted_children = sc.unsorted_children.dup
end
sort_children
end
# Get array of valid children sorted by priority
def valid_children
sort_children unless @valid_children
@valid_children
end
attr_accessor :scanner, :parser, :overflow,
:unsorted_children
end
include ::Enumerable
def initialize
@children = []
@properties = {}
@content = ""
end
# Set element at position
# @param index [Integer]
# @param element [DOMObject]
# @return [DOMObject]
def []=(index, element)
unless element.is_a? ::PointBlank::DOM::DOMObject
raise DOMError, "invalid DOM class #{element.class}"
end
@children[index] = element
end
# Get element at position
# @param index [Integer]
# @return [DOMObject]
def [](index)
@children[index]
end
# Iterate over each child of DOMObject
# @param block [#call]
def each(&block)
@children.each(&block)
end
# Return an array duplicate of all children
# @return [Array<DOMObject>]
def children
@children.dup
end
# Append child
# @param child [DOMObject]
def append_child(child)
unless child.is_a? ::PointBlank::DOM::DOMObject
raise DOMError, "invalid DOM class #{child.class}"
end
child.parent = self
child.position = @children.length
@children.append(child)
end
attr_accessor :content, :parser, :parent, :position
attr_reader :properties
end
# Inline text
class Text < DOMObject
end
# Inline preformatted text
class InlinePre < DOMObject
end
# Infline formattable text
class InlineFormattable < DOMObject
end
# Bold text
class InlineBold < InlineFormattable
end
# Italics text
class InlineItalics < InlineFormattable
end
# Inline italics text (alternative)
class InlineAltItalics < InlineFormattable
end
# Underline text
class InlineUnder < InlineFormattable
end
# Strikethrough text
class InlineStrike < InlineFormattable
end
# Hyperreferenced text
class InlineLink < InlineFormattable
end
# Image
class InlineImage < InlinePre
end
# Linebreak
class InlineBreak < DOMObject
end
# Block root (virtual)
class Block < DOMObject
end
# Document root
class Document < Block
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
define_parser ::PointBlank::Parsing::ParagraphParser
end
# Heading level 1
class SetextHeading1 < InlineFormattable
define_parser ::PointBlank::Parsing::SetextParserLV1
end
# Heading level 2
class SetextHeading2 < SetextHeading1
define_parser ::PointBlank::Parsing::SetextParserLV2
end
# Heading level 1
class ATXHeading1 < InlineFormattable
define_parser ::PointBlank::Parsing::ATXParserLV1
end
# Heading level 2
class ATXHeading2 < ATXHeading1
define_parser ::PointBlank::Parsing::ATXParserLV2
end
# Heading level 3
class ATXHeading3 < ATXHeading1
define_parser ::PointBlank::Parsing::ATXParserLV3
end
# Heading level 4
class ATXHeading4 < ATXHeading1
define_parser ::PointBlank::Parsing::ATXParserLV4
end
# Heading level 5
class ATXHeading5 < ATXHeading1
define_parser ::PointBlank::Parsing::ATXParserLV5
end
# Heading level 6
class ATXHeading6 < ATXHeading1
define_parser ::PointBlank::Parsing::ATXParserLV6
end
# Preformatted code block
class CodeBlock < DOMObject
end
# Quote block
class QuoteBlock < Block
end
# Table
class TableBlock < DOMObject
end
# List element
class ListElement < Block
end
# Unordered list
class ULBlock < Block
end
# Ordered list block
class OLBlock < Block
end
# Indent block
class IndentBlock < DOMObject
end
# Horizontal rule
class HorizontalRule < DOMObject
end
# Block root (real)
Block.class_eval do
define_scanner ::PointBlank::Parsing::LineScanner
define_parser ::PointBlank::Parsing::NullParser
define_child ::PointBlank::DOM::Paragraph
define_child ::PointBlank::DOM::ATXHeading1, 600
define_child ::PointBlank::DOM::ATXHeading2, 600
define_child ::PointBlank::DOM::ATXHeading3, 600
define_child ::PointBlank::DOM::ATXHeading4, 600
define_child ::PointBlank::DOM::ATXHeading5, 600
define_child ::PointBlank::DOM::ATXHeading6, 600
define_child ::PointBlank::DOM::QuoteBlock, 600
define_child ::PointBlank::DOM::ULBlock, 500
end
Paragraph.class_eval do
define_child ::PointBlank::DOM::SetextHeading1, 1
define_child ::PointBlank::DOM::SetextHeading2, 2
end
Block.subclasses.each(&:upsource)
QuoteBlock.class_eval do
define_parser ::PointBlank::Parsing::QuoteParser
end
ULBlock.class_eval do
define_parser ::PointBlank::Parsing::ULParser
end
end
end

View File

@ -153,8 +153,15 @@ module RBMark
class BreakerVariant < BlockVariant
# Check that a paragraph matches the breaker
# @param buffer [String]
# @return [Class, nil]
def match(_buffer)
# @return [Boolean]
def match?(_buffer)
raise StandardError, "Abstract method called"
end
# Process a paragrpah
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
def process(_buffer)
raise StandardError, "Abstract method called"
end
@ -164,6 +171,16 @@ module RBMark
# @return [String]
end
# Paragraph replacing variant
class ModifierVariant < BlockVariant
# Check that a buffer matches requirements of the modifier
# @param buffer [String]
# @return [Class, nil]
def match?(_buffer)
raise StandardError, "Abstract method called"
end
end
# Paragraph variant
class ParagraphVariant < BlockVariant
# (see BlockVariant#begin?)
@ -189,17 +206,42 @@ module RBMark
# (see BlockVariant#flush)
# @sg-ignore
def flush(buffer)
dom_class = nil
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
(dom_class = x[0].match(buffer))
end&.first
buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess)
(dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip)
obj = ::RBMark::DOM::Paragraph.new
obj.content = buffer
obj
end
# (see BlockVariant#restructure)
def restructure(blocks, _buffer, _mode)
p_buffer = blocks.last.content
if (block = do_breakers(p_buffer))
blocks[-1] = block
else
unless (blocks, _buffer, _mode = do_modifiers(blocks, p_buffer))
blocks[-1] = ::RBMark::DOM::Paragraph.parse(p_buffer)
end
end
[blocks, "", nil]
end
private
def do_modifiers(blocks, buffer)
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::ModifierVariant) &&
x[0].match?(buffer)
end&.first
breaker&.restructure(blocks, buffer, nil) || [blocks, buffer, nil]
end
def do_breakers(buffer)
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
x[0].match?(buffer)
end&.first
breaker&.process(buffer)
end
def check_paragraph_breakers(line)
breakers = parent.variants.filter_map do |x|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
@ -266,11 +308,10 @@ module RBMark
end
# Paragraph closing variant
class BlankSeparator < BreakerVariant
class BlankSeparator < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}$/)
def begin?(line, **_opts)
line.match?(/^ {0,3}$/)
end
# (see BlockVariant#end?)
@ -279,8 +320,14 @@ module RBMark
end
# (see BreakerVariant#match)
def match(_buffer)
nil
def match?(_buffer)
false
end
# (see BlockVariant#restructure)
def restructure(blocks, _buffer, _mode)
blocks.last.properties[:closed] = true if blocks.last
[blocks, "", nil]
end
end
@ -298,19 +345,25 @@ module RBMark
end
# (see BreakerVariant#match)
def match(buffer)
def match?(buffer)
return nil unless preprocess(buffer).match(/\S/)
heading(buffer.lines.last)
!heading(buffer.lines.last).nil?
end
# (see BreakerVariant#preprocess)
def preprocess(buffer)
buffer.lines[..-2].join
# (see BreakerVariant#process)
def process(buffer)
heading = heading(buffer.lines.last)
buffer = preprocess(buffer)
heading.parse(buffer)
end
private
def preprocess(buffer)
buffer.lines[..-2].join
end
def heading(buffer)
case buffer
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
@ -369,6 +422,28 @@ module RBMark
block.content = buffer.lines[1..-2].join
end
end
# Quote block
class QuoteBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}(?:>|> .*)$/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!(lookahead && lookahead.match?(/^ {0,3}(?:>|> .*)$/))
end
# (see BlockVariant#flush)
def flush(buffer)
buffer = buffer.lines.map do |line|
line.gsub(/^ {0,3}> ?/, '')
end.join
::RBMark::DOM::QuoteBlock.parse(buffer)
end
end
end
# Module for representing abstract object hierarchy
@ -453,7 +528,20 @@ module RBMark
@atomic_mode = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode
# Set the block continuation flag
# @return [void]
def block
@block_mode = true
end
# Allow the block to be empty
# @return [void]
def empty
@permit_empty = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode,
:block_mode, :permit_empty
end
def initialize
@ -557,18 +645,80 @@ module RBMark
class InlineBreak < DOMObject
end
# Document root
class Document < DOMObject
# Block root
class Block < DOMObject
scanner ::RBMark::Parsing::LineScanner
variant ::RBMark::Parsing::ATXHeadingVariant
variant ::RBMark::Parsing::ThematicBreakVariant
variant ::RBMark::Parsing::SetextHeadingVariant
variant ::RBMark::Parsing::IndentedBlockVariant
variant ::RBMark::Parsing::FencedCodeBlock
variant ::RBMark::Parsing::ATXHeadingVariant, prio: 100
variant ::RBMark::Parsing::ThematicBreakVariant, prio: 200
variant ::RBMark::Parsing::SetextHeadingVariant, prio: 300
variant ::RBMark::Parsing::IndentedBlockVariant, prio: 400
variant ::RBMark::Parsing::FencedCodeBlock, prio: 500
variant ::RBMark::Parsing::QuoteBlock, prio: 600
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
end
# Document root
class Document < Block
class << self
# (see ::RBMark::DOM::DOMObject#parse)
def parse(text)
cleanup(merge(super))
end
private
# Clean up empty elements
# @param doc [::RBMark::DOM::Document]
# @return [::RBMark::DOM::Document]
def cleanup(doc)
_cleanup(doc)
doc
end
# Merge open paragraphs upwards
# @param doc [::RBMark::DOM::Document]
# @return [::RBMark::DOM::Document]
def merge(doc)
_merge(doc)
doc
end
# A function to merge children upward
def _merge_step(child, stack, depth)
stack
end
# Merge nested block constructs upwards
# @param doc [::RBMark::DOM::DOMObject]
# @return [void]
def _merge(doc, stack = [], depth = 0)
stack.append(doc) if stack.length <= depth
doc.children.each do |child|
stack = _merge_step(child, stack, depth)
if child.class.block_mode and child.children.length.positive?
_merge(child, stack, depth + 1)
end
end
end
# Recursively descend through hierarchy and delete empty elements
# @param doc [::RBMark::DOM::DOMObject]
# @return [Integer]
def _cleanup(doc)
size = 0
doc.children.delete_if do |child|
subsize = 0
subsize += _cleanup(child) if child.children.length.positive?
subsize += child.content&.strip&.length || 0
size += subsize
subsize.zero? && !child.class.permit_empty
end
size
end
end
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
atomic
@ -603,7 +753,8 @@ module RBMark
end
# Quote block
class QuoteBlock < Document
class QuoteBlock < Block
block
end
# Table
@ -611,7 +762,7 @@ module RBMark
end
# List element
class ListElement < Document
class ListElement < Block
end
# Unordered list
@ -629,6 +780,7 @@ module RBMark
# Horizontal rule
class HorizontalRule < DOMObject
atomic
empty
end
end
end

21
lib/test.rb Normal file
View File

@ -0,0 +1,21 @@
# frozen_string_literal: true
require_relative 'rbmark'
structure = RBMark::DOM::Document.parse(File.read("example.md"))
def red(string)
"\033[31m#{string}\033[0m"
end
def yellow(string)
"\033[33m#{string}\033[0m"
end
def prettyprint(doc, indent = 0)
closed = doc.properties[:closed]
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/,""))}#{red(closed ? "(c)" : "")}: #{doc.content.inspect}"
doc.children.each do |child|
print red("#{" " * indent} - ")
prettyprint(child, indent + 4)
end
end
prettyprint(structure)

15
lib/test2.rb Normal file
View File

@ -0,0 +1,15 @@
require_relative 'blankshell'
pp PointBlank::DOM::Document.parse(<<DOC)
Penis
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
> pee
> > 2 pee
> peepee
> > 3 pee
> > 4 pee
bee
> # IT'S HIP
> BEES
> > FUCK
BEES
DOC

59
lib/test3.rb Normal file
View File

@ -0,0 +1,59 @@
# frozen_string_literal: true
require_relative 'blankshell'
structure = PointBlank::DOM::Document.parse(<<~DOC)
Penis
# STREEMER VIN SAUCE JORKS HIS PEANUTS ON S TREeAM
> pee
> > 2 pee
> peepee
and you cum now
> > 3 pee
> > 4 pee
bee
# IT'S HIP
> # IT'S HIP
> BEES
> > FUCK
BEES
PEES
=========
> COME ON AND SNIFF THE PAINT
>
> WITH MEEE
> > OH THAT IS SO CUUL
> OH THERE'S BLOOD IN MY STOOL
> AAAAA IT HURTS
>
> > WHEN I
> PEEEEEEE
PIIS
==========
but does it end here?
> COCK
> < PENIS
> < > AMONGUS
> < CONTINUATION
> > BREAKER
COCK
DOC
def red(string)
"\033[31m#{string}\033[0m"
end
def yellow(string)
"\033[33m#{string}\033[0m"
end
def prettyprint(doc, indent = 0)
closed = doc.properties[:closed]
puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/,""))}#{red(closed ? "(c)" : "")}: #{doc.content.inspect}"
doc.children.each do |child|
print red("#{" " * indent} - ")
prettyprint(child, indent + 4)
end
end
prettyprint(structure)