rewrite 3

This commit is contained in:
Yessiest 2024-05-11 17:29:13 +04:00
parent a4242dcac9
commit ead3126a46
9 changed files with 1268 additions and 430 deletions

View File

@ -1,55 +1,373 @@
# frozen_string_literal: true
module RBMark
# Parser class
class Parser
def initialize(variants, default)
@default = default
@variants = variants
@markers = @variants.map { |x| [x.begin, x] }.to_h
end
# Parse text using the given variants
# @param text [String]
# @return [Array<::RBMark::DOM::DOMObject>]
def parse(text)
chunks = []
until text.nil? or text.empty?
before, chunk, text = get_chunk(text)
chunks.append(@default.parse(before)) unless before.empty?
next unless chunk
chunks.append(chunk)
# Module for representing parsing-related constructs
module Parsing
# Abstract scanner interface implementation
class Scanner
def initialize
@variants = []
end
chunks.flatten
end
private
def get_chunk(text)
element, match = get_element(text)
if element
chunk, after = finalize_element(text[match.offset(0)[0]..], element)
return [match.pre_match, chunk, after] if chunk
return [match.pre_match, nil, match.post_match]
# Scan text
# @param text [String]
# @return [Array<RBMark::DOM::DOMObject>]
def scan(_text)
raise StandardError, "Abstract method called"
# ...
end
[text, nil, nil]
attr_accessor :variants
end
def get_element(text)
@markers.filter_map do |marker|
[marker[1], text.match(marker[0])] if text.match(marker[0])
end.min_by { |x| x[1].offset(0)[0] }
# Line-level scanner for blocks
class LineScanner < Scanner
# (see ::RBMark::Parsing::Scanner#scan)
def scan(text, buffer: "", blocks: [], mode: nil)
prepare
lines = text.lines
lines.each_with_index do |line, index|
buffer += line
ahead = lines.fetch(index + 1, nil)
blocks, buffer, mode = try_begin(line,
blocks,
buffer,
mode,
lookahead: ahead)
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
blocks, buffer, mode = flush(blocks, buffer, mode)
end
end
flush(blocks, buffer, mode)[0]
end
# Predict mode for given line
# @param line [String]
# @return [Object]
def select_mode(line, **message)
@variants.find do |variant|
variant[0].begin?(line, **message)
end&.at(0)
end
private
# Attempt to open a new mode and, if possible, call :begin to prepare the block
def try_begin(line, blocks, buffer, mode, lookahead: nil)
return blocks, buffer, mode if mode
mode = select_mode(line, lookahead: lookahead,
blocks: blocks,
buffer: buffer)
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
[blocks, buffer, mode]
end
# Assign self as parent to all variants
# @return [void]
def prepare
@variants.each do |variant|
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
raise StandardError, "#{variant} is not a BlockVariant"
end
variant[0].parent = self
end
@variants.sort_by!(&:last)
end
# Flush the buffer using given mode
# @param blocks [Array<RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [Object]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
def flush(blocks, buffer, mode)
return blocks, "" if buffer == ""
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
if mode.respond_to?(:restructure)
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
else
buffer = ""
mode = nil
end
[blocks, buffer, mode]
end
end
def finalize_element(text, element)
match = text.match(element.end)
return nil, nil unless match
# Abstract variant interface
class Variant
end
chunk_text = text[..(match.offset(0)[1] - 1)]
remaining_text = text[match.offset(0)[1]..]
[element.parse(chunk_text), remaining_text]
# Abstract block-level variant
class BlockVariant < Variant
# Check if a block begins on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def begin?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# Check if a block ends on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def end?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# @!method begin(buffer)
# Open a block to be later filled in by BlockVariant#end
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method end(block, buffer)
# Finalize a block opened by begin
# @param buffer [String]
# @return [void]
# @!method flush(buffer)
# Flush buffer and create a new DOM object
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method restructure(blocks, buffer, mode)
# Restructure current set of blocks (if method is defined)
# @param blocks [Array<::RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [::RBMark::Parsing::Variant]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
attr_accessor :parent
end
# Paragraph breaking variant
class BreakerVariant < BlockVariant
# Check that a paragraph matches the breaker
# @param buffer [String]
# @return [Class, nil]
def match(_buffer)
raise StandardError, "Abstract method called"
end
# @!method preprocess(buffer)
# preprocess buffer
# @param buffer [String]
# @return [String]
end
# Paragraph variant
class ParagraphVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/\S/)
end
# (see BlockVariant#end?)
def end?(line, lookahead: nil, **_opts)
return true if check_paragraph_breakers(line)
if lookahead
return false if check_paragraph_breakers(lookahead)
return false if lookahead.match(/^ {4}/)
!parent.select_mode(lookahead).is_a?(self.class)
else
true
end
end
# (see BlockVariant#flush)
# @sg-ignore
def flush(buffer)
dom_class = nil
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
(dom_class = x[0].match(buffer))
end&.first
buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess)
(dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip)
end
private
def check_paragraph_breakers(line)
breakers = parent.variants.filter_map do |x|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
end
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
end
end
# Thematic break variant
class ThematicBreakVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
line.match?(/^ {0,3}[-_*]/) &&
(
line.count("-") >= 3 ||
line.count("_") >= 3 ||
line.count("*") >= 3
)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(_buffer)
::RBMark::DOM::HorizontalRule.new
end
end
# ATX Heading variant
class ATXHeadingVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(buffer)
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
content = content.gsub(/( #+|)$/, "")
heading(lvl).parse(content.strip)
end
private
def heading(lvl)
case lvl.length
when 1 then ::RBMark::DOM::Heading1
when 2 then ::RBMark::DOM::Heading2
when 3 then ::RBMark::DOM::Heading3
when 4 then ::RBMark::DOM::Heading4
when 5 then ::RBMark::DOM::Heading5
when 6 then ::RBMark::DOM::Heading6
end
end
end
# Paragraph closing variant
class BlankSeparator < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(_buffer)
nil
end
end
# Setext heading variant
class SetextHeadingVariant < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}(?:-+|=+) *$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(buffer)
return nil unless preprocess(buffer).match(/\S/)
heading(buffer.lines.last)
end
# (see BreakerVariant#preprocess)
def preprocess(buffer)
buffer.lines[..-2].join
end
private
def heading(buffer)
case buffer
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
end
end
end
# Indented literal block variant
class IndentedBlockVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?: {4}|\t)/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
end
# (see BlockVariant#flush)
def flush(buffer)
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
block = ::RBMark::DOM::IndentBlock.new
block.content = text # TODO: replace this with inline text
block
end
end
# Fenced code block
class FencedCodeBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
end
# (see BlockVariant#end?)
def end?(line, blocks: nil, buffer: nil, **_opts)
buffer.lines.length > 1 and
line.strip == blocks.last.properties[:expected_closer]
end
# (see BlockVariant#begin)
def begin(buffer)
block = ::RBMark::DOM::CodeBlock.new
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
.strip
block
end
# (see BlockVariant#end)
def end(block, buffer)
# TODO: replace this with inline text
block.properties.delete(:expected_closer)
block.content = buffer.lines[1..-2].join
end
end
end
@ -66,42 +384,56 @@ module RBMark
@subclasses.append(subclass)
subclass.variants = @variants.dup
subclass.variants ||= []
subclass.default_class = @default_class
subclass.atomic_mode = @atomic_mode
subclass.scanner_class = @scanner_class
end
# Add potential sub-element variant
# @param cls [Class] DOMObject subclass
def variant(cls)
unless cls < ::RBMark::DOM::DOMObject
def variant(cls, prio: 1)
unless cls < ::RBMark::Parsing::Variant
raise StandardError, "#{cls} is not a DOMObject subclass"
end
@variants.append(cls)
@variants.append([cls, prio])
@subclasses&.each do |subclass|
subclass.variant(cls)
subclass.variant(cls, prio)
end
end
# Set default element class
# Set scanner class
# @param cls [Class] DOMObject subclass
def default(cls)
unless cls < ::RBMark::DOM::DOMObject
raise StandardError, "#{cls} is not a DOMObject subclass"
def scanner(cls)
unless cls < ::RBMark::Parsing::Scanner
raise StandardError, "#{cls} is not a Scanner subclass"
end
@default_class = cls
@scanner_class = cls
@subclasses&.each do |subclass|
subclass.default(cls)
subclass.scanner(cls)
end
end
# Prepare scanner and variants
# @return [void]
def prepare
return if @prepared
@scanner = @scanner_class.new
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
end
# Parse text from the given context
# @param text [String]
# @return [self]
def parse(text)
parser = ::RBMark::Parser.new(@variants, @default_class)
instance = create
instance.append(*parser.parse(text))
prepare unless @atomic_mode
instance = new
if @atomic_mode
instance.content = text
else
instance.append(*@scanner.scan(text))
end
instance
end
@ -115,7 +447,13 @@ module RBMark
end
end
attr_accessor :variants, :begin, :end, :default_class, :alt_for
# Set the atomic flag
# @return [void]
def atomic
@atomic_mode = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode
end
def initialize
@ -169,12 +507,6 @@ module RBMark
@content = text
end
# Get length of text contents
# @return [Integer]
def length
@children.map(&:length).sum
end
# Get text content of a DOMObject
# @return [String, nil]
attr_reader :content, :children, :properties
@ -182,350 +514,96 @@ module RBMark
# Inline text
class Text < DOMObject
# Stub parser for inline text element
# @param text [String]
# @return [self]
def self.parse(text)
instance = new
instance.content = text.gsub("\n", ' ').gsub(/\s+/, " ")
instance
end
# Get length of inline text
# @return [Integer]
def length
@content.length
end
end
# Inline preformatted text
class InlinePre < DOMObject
self.begin = /(?<!\\)`(?!`)/
self.end = /`.+?`/
# Stub parser for inline text element
# @param text [String]
# @return [self]
def self.parse(text)
instance = new
element = ::RBMark::DOM::Text.new
element.content = text
instance.append element
instance
end
# Get length of inline preformatted text
# @return [Integer]
def length
@content ? @content.length : 0
end
end
# Infline formattable text
class InlineFormattable < DOMObject
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
cleanup(super(text))
end
# Clean up internal text chunks
def self.cleanup(paragraph)
previous = nil
delete = []
paragraph.children.each_with_index do |child, index|
if previous.nil? || previous.is_a?(::RBMark::DOM::InlineBreak)
if child.is_a? ::RBMark::DOM::Text
child.content = child.content.lstrip
end
delete.append(index) if child.is_a? ::RBMark::DOM::InlineBreak
end
previous = child
end
delete.reverse_each do |index|
paragraph.delete_at(index)
end
paragraph
end
atomic
end
# Bold text
class InlineBold < InlineFormattable
self.begin = /(?<!\\)\*\*(?!\*\*)/
self.end = /\*\*.+?\*\*/
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text[2..-3])
end
end
# Italics text
class InlineItalics < InlineFormattable
self.begin = /(?<!\\)\*(?!\*)/
self.end = /\*.+?\*/
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text[1..-2])
end
end
# Inline italics text (alternative)
class InlineAltItalics < InlineFormattable
self.begin = /(?<!\\)_(?!_)/
self.end = /_.+?_/
self.alt_for = ::RBMark::DOM::InlineItalics
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text[1..-2])
end
end
# Underline text
class InlineUnder < InlineFormattable
self.begin = /(?<!\\)__(?!__)/
self.end = /__.+?__/
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text[2..-3])
end
end
# Strikethrough text
class InlineStrike < InlineFormattable
self.begin = /(?<!\\)~~(?!~~)/
self.end = /~~.+?~~/
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text[2..-3])
end
end
# Hyperreferenced text
class InlineLink < InlineFormattable
self.begin = /(?<![!\\])\[[^\]]+?\]\([^)]+?\)/
self.end = self.begin
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
text, link = text.match(/\[([^\]]+?)\]\(([^)]+?)\)/)[1..2]
instance = super(text)
instance.property link: link
instance
end
end
# Image
class InlineImage < InlinePre
self.begin = /(?<!\\)!\[[^\]]+?\]\([^)]+?\)/
self.end = self.begin
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
text, link = text.match(/!\[([^\]]+?)\]\(([^)]+?)\)/)[1..2]
instance = super(text)
instance.property link: link
instance
end
end
# Linebreak
class InlineBreak < DOMObject
self.begin = / /
self.end = / /
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(_text)
new
end
# Stub for inline break length
def length
0
end
end
# Document root
class Document < DOMObject
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
merge(vacuum(super(normalize_newlines(text))))
end
# Replace all forms of line endings with UNIX format newline
def self.normalize_newlines(text)
text.gsub(/(?:\r\n|\n\r|\r|\n)/, "\n")
end
# Remove all elements with absolute lenght of 0
def self.vacuum(document)
delete = []
document.children.each_with_index do |element, index|
delete.append(index) unless element.length.positive?
end
delete.reverse_each do |index|
document.delete_at(index)
end
document
end
# Merge adjacent lists with indent blocks as per markdownguide guidelines
def self.merge(document)
last_list = nil
delete_deferred = []
document.children.each_with_index do |child, index|
if !last_list and [::RBMark::DOM::ULBlock,
::RBMark::DOM::OLBlock].include? child.class
last_list = child
elsif last_list and mergeable?(last_list, child)
merge_adjacent(last_list, child)
delete_deferred.append(index)
else
last_list = nil
end
end
delete_deferred.reverse_each { |index| document.delete_at(index) }
document
end
# Check if 2 elements can be merged
def self.mergeable?(list, child)
if child.is_a? ::RBMark::DOM::IndentBlock or
(child.is_a? ::RBMark::DOM::ULBlock and
list.is_a? ::RBMark::DOM::ULBlock) or
(child.is_a? ::RBMark::DOM::OLBlock and
list.is_a? ::RBMark::DOM::OLBlock and
child.children.first.properties[:number] >
list.children.last.properties[:number])
true
else
false
end
end
# Merge 2 elements
def self.merge_adjacent(left, right)
if right.is_a? ::RBMark::DOM::ULBlock or
right.is_a? ::RBMark::DOM::OLBlock
right.children.each do |child|
left.append(child)
end
elsif right.is_a? ::RBMark::DOM::IndentBlock
left.children.last.append(
*::RBMark::DOM::ListElement.parse(right.children.first.content)
.children
)
end
end
scanner ::RBMark::Parsing::LineScanner
variant ::RBMark::Parsing::ATXHeadingVariant
variant ::RBMark::Parsing::ThematicBreakVariant
variant ::RBMark::Parsing::SetextHeadingVariant
variant ::RBMark::Parsing::IndentedBlockVariant
variant ::RBMark::Parsing::FencedCodeBlock
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
# (see ::RBMark::DOM::DOMObject.parse)
# @return [Array<self>]
def self.parse(text)
text.split("\n\n").map do |chunk|
super(chunk)
end
end
atomic
end
# Heading level 1
class Heading1 < InlineFormattable
self.begin = /^# /
self.end = /#?$/
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text.gsub(self.begin, '').gsub(self.end, ''))
end
end
# Heading level 2
class Heading2 < Heading1
self.begin = /^## /
self.end = /(?:##)?$/
end
# Heading level 3
class Heading3 < Heading1
self.begin = /^### /
self.end = /(?:###)?$/
end
# Heading level 4
class Heading4 < Heading1
self.begin = /^#### /
self.end = /(?:####)?$/
end
# Heading level 5
class Heading5 < Heading1
self.begin = /^##### /
self.end = /(?:#####)?$/
end
# Heading level 6
class Heading6 < Heading1
self.begin = /^###### /
self.end = /(?:######)?$/
end
# Alternative heading 1
class AltHeading1 < InlineFormattable
self.begin = /^[^\n]+\n={3,}$/m
self.end = /={3,}$/
self.alt_for = ::RBMark::DOM::Heading1
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text.match(/\A[^\n]+$/)[0].strip)
end
end
# Alternative heading 2
class AltHeading2 < InlineFormattable
self.begin = /^[^\n]+\n-{3,}$/m
self.end = /-{3,}$/
self.alt_for = ::RBMark::DOM::Heading2
# (see ::RBMark::DOM::DOMObject.parse)
def self.parse(text)
super(text.match(/\A[^\n]+$/)[0].strip)
end
end
# Preformatted code block
class CodeBlock < DOMObject
self.begin = /^```[^\n]*$/
self.end = /^```[^\n]*\n.*?\n```$/m
# Stub parser for block text element
# @param text [String]
# @return [self]
def self.parse(text)
instance = new
language = text.match(/\A```([^\n]*)/)[1].strip
element = ::RBMark::DOM::Text.new
element.content = text.lines[1..-2].join('').rstrip
instance.append(element)
instance.property language: language
instance
end
end
# Quote block
class QuoteBlock < Document
self.begin = /^> \S/
self.end = /(?:^(?!>)|\Z)/
# stub
def self.parse(text)
super(text.lines.map { |x| x[2..] }.join(''))
end
end
# Table
@ -538,122 +616,19 @@ module RBMark
# Unordered list
class ULBlock < DOMObject
self.begin = /^- +\S+/
self.end = /(?:^(?!- +\S+| )|\Z)/
# (see RBMark::DOM::DOMObject.parse)
def self.parse(text)
block = []
instance = new
text.lines.each do |line|
if line.start_with?("- ")
unless block.empty?
instance.append(::RBMark::DOM::ListElement.parse(block.join('')))
end
block = [line[2..]]
else
block.append(line[2..])
end
end
instance.append(::RBMark::DOM::ListElement.parse(block.join('')))
instance
end
end
# Ordered list block
class OLBlock < DOMObject
self.begin = /^\d+\. +\S+/
self.end = /(?:^(?!\d+\. +\S+| {4})|\Z)/
# (see RBMark::DOM::DOMObject.parse)
def self.parse(text)
block = []
instance = new
counter = nil
text.lines.each do |line|
if line.start_with?(/^\d+\. /)
unless block.empty?
instance.append(element(block.join(''), counter))
end
counter = line.match(/^(\d+)\. /)[1]
block = [line.gsub(/^(?:\d+\. | {4})/, '')]
else
block.append(line.gsub(/^(?:\d+\. | {4})/, ''))
end
end
instance.append(element(block.join(''), counter))
instance
end
# Construct a new ListElement
def self.element(text, counter)
instance = ::RBMark::DOM::ListElement.parse(text)
instance.property number: counter
instance
end
end
# Indent block
class IndentBlock < DOMObject
self.begin = /^ {4}/
self.end = /(?:^(?! {4})|\Z)/
# (see RBMark::DOM::DOMObject.parse)
def self.parse(text)
instance = new
element = ::RBMark::DOM::Text.new
element.content = text.lines.map { |x| x[4..] }.join('')
instance.append(element)
instance
end
end
# Horizontal rule
class HorizontalRule < DOMObject
self.begin = /^-{3,}$/
self.end = /$/
# stub for HR
def self.parse(_text)
new
end
# Stub for HR length
# @return [Integer]
def length
1
end
end
InlineFormattable.class_exec do
default ::RBMark::DOM::Text
variant ::RBMark::DOM::InlineBold
variant ::RBMark::DOM::InlineItalics
variant ::RBMark::DOM::InlineAltItalics
variant ::RBMark::DOM::InlineUnder
variant ::RBMark::DOM::InlineImage
variant ::RBMark::DOM::InlineLink
variant ::RBMark::DOM::InlinePre
variant ::RBMark::DOM::InlineStrike
variant ::RBMark::DOM::InlineBreak
end
Document.class_exec do
default ::RBMark::DOM::Paragraph
variant ::RBMark::DOM::Heading1
variant ::RBMark::DOM::Heading2
variant ::RBMark::DOM::Heading3
variant ::RBMark::DOM::Heading4
variant ::RBMark::DOM::Heading5
variant ::RBMark::DOM::Heading6
variant ::RBMark::DOM::AltHeading1
variant ::RBMark::DOM::AltHeading2
variant ::RBMark::DOM::QuoteBlock
variant ::RBMark::DOM::CodeBlock
variant ::RBMark::DOM::ULBlock
variant ::RBMark::DOM::OLBlock
variant ::RBMark::DOM::IndentBlock
variant ::RBMark::DOM::HorizontalRule
atomic
end
end
end

9
lib/rbmark/renderers.rb Normal file
View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
module RBMark
# Renderers from Markdown to expected output format
module Renderers
end
end
require_relative 'renderers/html'

View File

@ -0,0 +1,132 @@
# frozen_string_literal: true
require 'rbmark'
module RBMark
module Renderers
# HTML Renderer
class HTML
ELEMENT_MAP = {
"RBMark::DOM::InlinePre" => {
tag: "code",
inline: true
},
"RBMark::DOM::InlineBreak" => {
tag: "br",
inline: true
},
"RBMark::DOM::InlineBold" => {
tag: "strong",
inline: true
},
"RBMark::DOM::InlineItalics" => {
tag: "em",
inline: true
},
"RBMark::DOM::InlineUnder" => {
tag: "span",
inline: true,
style: "text-decoration: underline;"
},
"RBMark::DOM::InlineStrike" => {
tag: "s",
inline: true
},
"RBMark::DOM::InlineLink" => {
tag: "link",
href: true,
inline: true
},
"RBMark::DOM::InlineImage" => {
tag: "img",
src: true,
inline: true
},
"RBMark::DOM::ULBlock" => {
tag: "ul"
},
"RBMark::DOM::OLBlock" => {
tag: "ol"
},
"RBMark::DOM::IndentBlock" => {
tag: "pre"
},
"RBMark::DOM::ListElement" => {
tag: "li"
},
"RBMark::DOM::Paragraph" => {
tag: "p"
},
"RBMark::DOM::Heading1" => {
tag: "h1"
},
"RBMark::DOM::Heading2" => {
tag: "h2"
},
"RBMark::DOM::Heading3" => {
tag: "h3"
},
"RBMark::DOM::Heading4" => {
tag: "h4"
},
"RBMark::DOM::Heading5" => {
tag: "h5"
},
"RBMark::DOM::Heading6" => {
tag: "h6"
},
"RBMark::DOM::Document" => {
tag: "main"
},
"RBMark::DOM::CodeBlock" => {
tag: "pre",
outer: {
tag: "code"
}
},
"RBMark::DOM::QuoteBlock" => {
tag: "blockquote"
},
"RBMark::DOM::HorizontalRule" => {
tag: "hr"
},
"RBMark::DOM::Text" => nil
}.freeze
def initialize(dom, options)
@document = dom
@options = options
end
# Render document to HTML
def render
preambule if @options['preambule']
_render(@document, indent = 2)
postambule if @options['postambule']
end
private
def _render(element, indent = 0)
def preambule
@options['preambule'] or <<~TEXT
<!DOCTYPE HTML>
<html>
<head>
#{@document['head']}
</head>
<body>
TEXT
end
def postambule
@options['postambule'] or <<~TEXT
</body>
</html>
TEXT
end
end
end
end

102
test/test_atx_headers.rb Normal file
View File

@ -0,0 +1,102 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test ATX Heading parsing compliance with CommonMark v0.31.2
class TestATXHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# ATX Heading level 1
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_simple_heading2
doc = ::RBMark::DOM::Document.parse(<<~DOC)
## ATX Heading level 2
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
end
def test_simple_heading3
doc = ::RBMark::DOM::Document.parse(<<~DOC)
### ATX Heading level 3
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading3, doc.children[0])
end
def test_simple_heading4
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### ATX Heading level 4
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
end
def test_simple_heading5
doc = ::RBMark::DOM::Document.parse(<<~DOC)
##### ATX Heading level 5
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading5, doc.children[0])
end
def test_simple_heading6
doc = ::RBMark::DOM::Document.parse(<<~DOC)
###### ATX Heading level 6
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading6, doc.children[0])
end
def test_simple_not_a_heading
doc = ::RBMark::DOM::Document.parse(<<~DOC)
####### NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_breaking_paragrpah
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph 1
# ATX Heading level 1
Paragraph 2
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_heading_sans_space
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#NOT an ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_heading_escaped
doc = ::RBMark::DOM::Document.parse(<<~DOC)
\\# Escaped ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### Heading level 4
### Heading level 3
## Heading level 2
# Heading level 1
# NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading3, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[4])
end
end

View File

@ -0,0 +1,147 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar*
=========
Foo *bar*
---------
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
def test_multiline_span
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar
baz*
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_span_inlining
doc = ::RBMark::DOM::Document.parse(<<~DOC)
start
Foo *bar
baz
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
skip
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
end
def test_content_indent
skip # TODO: implement this
end
def test_marker_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
Foo
=
Foo
=
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
end
def test_no_internal_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
-- - -
Foo
== =
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_block_level_priority
doc = ::RBMark::DOM::Document.parse(<<~DOC)
` Foo
------
`
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1])
end
def test_paragraph_breaking_only
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
------
DOC
skip # TODO: implement this
end
def test_paragraph_breaking_only_lazy_continuation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
continuation line
------
DOC
skip # TODO: implement this
end
def test_headings_back_to_back
doc = ::RBMark::DOM::Document.parse(<<~DOC)
heading1
------
heading2
------
heading3
======
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
end
def test_no_empty_headings
doc = ::RBMark::DOM::Document.parse(<<~DOC)
======
DOC
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_thematic_breaks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
----
----
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
end

View File

@ -0,0 +1,102 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test ATX Heading parsing compliance with CommonMark v0.31.2
class TestATXHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# ATX Heading level 1
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_simple_heading2
doc = ::RBMark::DOM::Document.parse(<<~DOC)
## ATX Heading level 2
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
end
def test_simple_heading3
doc = ::RBMark::DOM::Document.parse(<<~DOC)
### ATX Heading level 3
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading3, doc.children[0])
end
def test_simple_heading4
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### ATX Heading level 4
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
end
def test_simple_heading5
doc = ::RBMark::DOM::Document.parse(<<~DOC)
##### ATX Heading level 5
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading5, doc.children[0])
end
def test_simple_heading6
doc = ::RBMark::DOM::Document.parse(<<~DOC)
###### ATX Heading level 6
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading6, doc.children[0])
end
def test_simple_not_a_heading
doc = ::RBMark::DOM::Document.parse(<<~DOC)
####### NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_breaking_paragrpah
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph 1
# ATX Heading level 1
Paragraph 2
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_heading_sans_space
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#NOT an ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_heading_escaped
doc = ::RBMark::DOM::Document.parse(<<~DOC)
\\# Escaped ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### Heading level 4
### Heading level 3
## Heading level 2
# Heading level 1
# NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading3, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[4])
end
end

97
test/test_indent_block.rb Normal file
View File

@ -0,0 +1,97 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
indented code block
without space mangling
int main() {
printf("Hello world!\\n");
}
DOC
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
end
def test_list_item_precedence
skip # TODO: implement this
end
def test_numbered_list_item_precednce
skip # TODO: implement this
end
def test_check_indent_contents
skip # TODO: yet again please implement this at some point thanks
end
def test_long_chunk
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
indented code block
without space mangling
int main() {
printf("Hello world!\\n");
}
there are many space changes here and blank lines that
should *NOT* affect the way this is parsed
DOC
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
end
def test_does_not_interrupt_paragraph
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph begins here
paragraph does the stupid wacky shit that somebody thinks is very funny
paragraph keeps doing that shit
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_begins_at_first_sight_of_four_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
This is an indent block
This is a paragraph
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_interrupts_all_other_blocks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# Heading
foo
Heading
------
foo
----
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[3])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4])
end
def test_check_blank_lines_contents
skip # TODO: PLEASE I FUCKING BEG YOU IMPLEMENT THIS
end
def test_check_contents_trailing_spaces
skip # TODO: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa
end
end

147
test/test_setext_headers.rb Normal file
View File

@ -0,0 +1,147 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar*
=========
Foo *bar*
---------
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
def test_multiline_span
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar
baz*
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_span_inlining
doc = ::RBMark::DOM::Document.parse(<<~DOC)
start
Foo *bar
baz
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
skip
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
end
def test_content_indent
skip # TODO: implement this
end
def test_marker_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
Foo
=
Foo
=
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
end
def test_no_internal_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
-- - -
Foo
== =
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_block_level_priority
doc = ::RBMark::DOM::Document.parse(<<~DOC)
` Foo
------
`
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1])
end
def test_paragraph_breaking_only
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
------
DOC
skip # TODO: implement this
end
def test_paragraph_breaking_only_lazy_continuation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
continuation line
------
DOC
skip # TODO: implement this
end
def test_headings_back_to_back
doc = ::RBMark::DOM::Document.parse(<<~DOC)
heading1
------
heading2
------
heading3
======
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
end
def test_no_empty_headings
doc = ::RBMark::DOM::Document.parse(<<~DOC)
======
DOC
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_thematic_breaks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
----
----
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
end

View File

@ -0,0 +1,127 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test thematic break parsing compliance with CommonMark v0.31.2
class TestThematicBreaks < Minitest::Test
def test_simple
doc = ::RBMark::DOM::Document.parse(<<~DOC)
---
***
___
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
end
def test_simple_invalid
doc = ::RBMark::DOM::Document.parse(<<~DOC)
+++
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
doc = ::RBMark::DOM::Document.parse(<<~DOC)
===
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_simple_less_characters
doc = ::RBMark::DOM::Document.parse(<<~DOC)
--
**
__
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
end
def test_indentation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
***
***
***
***
***
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4])
end
def test_indentation_mixed_classes
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
***
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children.last)
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
_________________________________
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_mixed_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
- - -
** * ** * ** * **
- - - -
- - - -
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
end
def test_mixed_characters
doc = ::RBMark::DOM::Document.parse(<<~DOC)
_ _ _ _ a
a------
---a---
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
end
def test_mixed_markets
doc = ::RBMark::DOM::Document.parse(<<~DOC)
*-*
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_interrupt_list
doc = ::RBMark::DOM::Document.parse(<<~DOC)
- foo
***
- bar
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
end
def test_interrupt_paragraph
doc = ::RBMark::DOM::Document.parse(<<~DOC)
foo
***
bar
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
end
end