diff --git a/lib/rbmark.rb b/lib/rbmark.rb index ff28477..5895aea 100644 --- a/lib/rbmark.rb +++ b/lib/rbmark.rb @@ -1,55 +1,373 @@ # frozen_string_literal: true module RBMark - # Parser class - class Parser - def initialize(variants, default) - @default = default - @variants = variants - @markers = @variants.map { |x| [x.begin, x] }.to_h - end - - # Parse text using the given variants - # @param text [String] - # @return [Array<::RBMark::DOM::DOMObject>] - def parse(text) - chunks = [] - until text.nil? or text.empty? - before, chunk, text = get_chunk(text) - chunks.append(@default.parse(before)) unless before.empty? - next unless chunk - - chunks.append(chunk) + # Module for representing parsing-related constructs + module Parsing + # Abstract scanner interface implementation + class Scanner + def initialize + @variants = [] end - chunks.flatten - end - private - - def get_chunk(text) - element, match = get_element(text) - if element - chunk, after = finalize_element(text[match.offset(0)[0]..], element) - return [match.pre_match, chunk, after] if chunk - - return [match.pre_match, nil, match.post_match] + # Scan text + # @param text [String] + # @return [Array] + def scan(_text) + raise StandardError, "Abstract method called" + # ... end - [text, nil, nil] + + attr_accessor :variants end - def get_element(text) - @markers.filter_map do |marker| - [marker[1], text.match(marker[0])] if text.match(marker[0]) - end.min_by { |x| x[1].offset(0)[0] } + # Line-level scanner for blocks + class LineScanner < Scanner + # (see ::RBMark::Parsing::Scanner#scan) + def scan(text, buffer: "", blocks: [], mode: nil) + prepare + lines = text.lines + lines.each_with_index do |line, index| + buffer += line + ahead = lines.fetch(index + 1, nil) + blocks, buffer, mode = try_begin(line, + blocks, + buffer, + mode, + lookahead: ahead) + if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer) + blocks, buffer, mode = flush(blocks, buffer, mode) + end + end + flush(blocks, buffer, mode)[0] + end + + # Predict mode for given line + # @param line [String] + # @return [Object] + def select_mode(line, **message) + @variants.find do |variant| + variant[0].begin?(line, **message) + end&.at(0) + end + + private + + # Attempt to open a new mode and, if possible, call :begin to prepare the block + def try_begin(line, blocks, buffer, mode, lookahead: nil) + return blocks, buffer, mode if mode + + mode = select_mode(line, lookahead: lookahead, + blocks: blocks, + buffer: buffer) + blocks.append(mode.begin(line)) if mode.respond_to?(:begin) + [blocks, buffer, mode] + end + + # Assign self as parent to all variants + # @return [void] + def prepare + @variants.each do |variant| + unless variant[0].is_a? ::RBMark::Parsing::BlockVariant + raise StandardError, "#{variant} is not a BlockVariant" + end + + variant[0].parent = self + end + @variants.sort_by!(&:last) + end + + # Flush the buffer using given mode + # @param blocks [Array] + # @param buffer [String] + # @param mode [Object] + # @return [Array(Array, String, ::RBMark::Parsing::Variant)] + def flush(blocks, buffer, mode) + return blocks, "" if buffer == "" + + mode.end(blocks.last, buffer) if mode.respond_to?(:end) + blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush) + if mode.respond_to?(:restructure) + blocks, buffer, mode = mode.restructure(blocks, buffer, mode) + else + buffer = "" + mode = nil + end + [blocks, buffer, mode] + end end - def finalize_element(text, element) - match = text.match(element.end) - return nil, nil unless match + # Abstract variant interface + class Variant + end - chunk_text = text[..(match.offset(0)[1] - 1)] - remaining_text = text[match.offset(0)[1]..] - [element.parse(chunk_text), remaining_text] + # Abstract block-level variant + class BlockVariant < Variant + # Check if a block begins on this line + # @param line [String] + # @param opts [Hash] options hash + # @option [String, nil] :lookahead next line over + # @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks + # @option [String] :buffer current state of buffer + # @return [Boolean] + def begin?(_line, **_opts) + raise StandardError, "Abstract method called" + end + + # Check if a block ends on this line + # @param line [String] + # @param opts [Hash] options hash + # @option [String, nil] :lookahead next line over + # @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks + # @option [String] :buffer current state of buffer + # @return [Boolean] + def end?(_line, **_opts) + raise StandardError, "Abstract method called" + end + + # @!method begin(buffer) + # Open a block to be later filled in by BlockVariant#end + # @param buffer [String] + # @return [::RBMark::DOM::DOMObject] + + # @!method end(block, buffer) + # Finalize a block opened by begin + # @param buffer [String] + # @return [void] + + # @!method flush(buffer) + # Flush buffer and create a new DOM object + # @param buffer [String] + # @return [::RBMark::DOM::DOMObject] + + # @!method restructure(blocks, buffer, mode) + # Restructure current set of blocks (if method is defined) + # @param blocks [Array<::RBMark::DOM::DOMObject>] + # @param buffer [String] + # @param mode [::RBMark::Parsing::Variant] + # @return [Array(Array, String, ::RBMark::Parsing::Variant)] + + attr_accessor :parent + end + + # Paragraph breaking variant + class BreakerVariant < BlockVariant + # Check that a paragraph matches the breaker + # @param buffer [String] + # @return [Class, nil] + def match(_buffer) + raise StandardError, "Abstract method called" + end + + # @!method preprocess(buffer) + # preprocess buffer + # @param buffer [String] + # @return [String] + end + + # Paragraph variant + class ParagraphVariant < BlockVariant + # (see BlockVariant#begin?) + def begin?(line, **_opts) + line.match?(/\S/) + end + + # (see BlockVariant#end?) + def end?(line, lookahead: nil, **_opts) + return true if check_paragraph_breakers(line) + + if lookahead + return false if check_paragraph_breakers(lookahead) + + return false if lookahead.match(/^ {4}/) + + !parent.select_mode(lookahead).is_a?(self.class) + else + true + end + end + + # (see BlockVariant#flush) + # @sg-ignore + def flush(buffer) + dom_class = nil + breaker = parent.variants.find do |x| + x[0].is_a?(::RBMark::Parsing::BreakerVariant) && + (dom_class = x[0].match(buffer)) + end&.first + buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess) + (dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip) + end + + private + + def check_paragraph_breakers(line) + breakers = parent.variants.filter_map do |x| + x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant + end + breakers.any? { |x| x.begin?(line, breaks_paragraph: true) } + end + end + + # Thematic break variant + class ThematicBreakVariant < BlockVariant + # (see BlockVariant#begin?) + def begin?(line, **_opts) + line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) && + line.match?(/^ {0,3}[-_*]/) && + ( + line.count("-") >= 3 || + line.count("_") >= 3 || + line.count("*") >= 3 + ) + end + + # (see BlockVariant#end?) + def end?(_line, **_opts) + true + end + + # (see BlockVariant#flush) + def flush(_buffer) + ::RBMark::DOM::HorizontalRule.new + end + end + + # ATX Heading variant + class ATXHeadingVariant < BlockVariant + # (see BlockVariant#begin?) + def begin?(line, **_opts) + line.match?(/^ {0,3}\#{1,6}(?: .*|)$/) + end + + # (see BlockVariant#end?) + def end?(_line, **_opts) + true + end + + # (see BlockVariant#flush) + def flush(buffer) + lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2] + content = content.gsub(/( #+|)$/, "") + heading(lvl).parse(content.strip) + end + + private + + def heading(lvl) + case lvl.length + when 1 then ::RBMark::DOM::Heading1 + when 2 then ::RBMark::DOM::Heading2 + when 3 then ::RBMark::DOM::Heading3 + when 4 then ::RBMark::DOM::Heading4 + when 5 then ::RBMark::DOM::Heading5 + when 6 then ::RBMark::DOM::Heading6 + end + end + end + + # Paragraph closing variant + class BlankSeparator < BreakerVariant + # (see BlockVariant#begin?) + def begin?(line, breaks_paragraph: nil, **_opts) + breaks_paragraph && + line.match?(/^ {0,3}$/) + end + + # (see BlockVariant#end?) + def end?(_line, **_opts) + true + end + + # (see BreakerVariant#match) + def match(_buffer) + nil + end + end + + # Setext heading variant + class SetextHeadingVariant < BreakerVariant + # (see BlockVariant#begin?) + def begin?(line, breaks_paragraph: nil, **_opts) + breaks_paragraph && + line.match?(/^ {0,3}(?:-+|=+) *$/) + end + + # (see BlockVariant#end?) + def end?(_line, **_opts) + true + end + + # (see BreakerVariant#match) + def match(buffer) + return nil unless preprocess(buffer).match(/\S/) + + heading(buffer.lines.last) + end + + # (see BreakerVariant#preprocess) + def preprocess(buffer) + buffer.lines[..-2].join + end + + private + + def heading(buffer) + case buffer + when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2 + when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1 + end + end + end + + # Indented literal block variant + class IndentedBlockVariant < BlockVariant + # (see BlockVariant#begin?) + def begin?(line, **_opts) + line.match?(/^(?: {4}|\t)/) + end + + # (see BlockVariant#end?) + def end?(_line, lookahead: nil, **_opts) + !lookahead&.match?(/^(?: {4}.*|\s*|\t)$/) + end + + # (see BlockVariant#flush) + def flush(buffer) + text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join + block = ::RBMark::DOM::IndentBlock.new + block.content = text # TODO: replace this with inline text + block + end + end + + # Fenced code block + class FencedCodeBlock < BlockVariant + # (see BlockVariant#begin?) + def begin?(line, **_opts) + line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/) + end + + # (see BlockVariant#end?) + def end?(line, blocks: nil, buffer: nil, **_opts) + buffer.lines.length > 1 and + line.strip == blocks.last.properties[:expected_closer] + end + + # (see BlockVariant#begin) + def begin(buffer) + block = ::RBMark::DOM::CodeBlock.new + block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0] + block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1] + .strip + block + end + + # (see BlockVariant#end) + def end(block, buffer) + # TODO: replace this with inline text + block.properties.delete(:expected_closer) + block.content = buffer.lines[1..-2].join + end end end @@ -66,42 +384,56 @@ module RBMark @subclasses.append(subclass) subclass.variants = @variants.dup subclass.variants ||= [] - subclass.default_class = @default_class + subclass.atomic_mode = @atomic_mode + subclass.scanner_class = @scanner_class end # Add potential sub-element variant # @param cls [Class] DOMObject subclass - def variant(cls) - unless cls < ::RBMark::DOM::DOMObject + def variant(cls, prio: 1) + unless cls < ::RBMark::Parsing::Variant raise StandardError, "#{cls} is not a DOMObject subclass" end - @variants.append(cls) + @variants.append([cls, prio]) @subclasses&.each do |subclass| - subclass.variant(cls) + subclass.variant(cls, prio) end end - # Set default element class + # Set scanner class # @param cls [Class] DOMObject subclass - def default(cls) - unless cls < ::RBMark::DOM::DOMObject - raise StandardError, "#{cls} is not a DOMObject subclass" + def scanner(cls) + unless cls < ::RBMark::Parsing::Scanner + raise StandardError, "#{cls} is not a Scanner subclass" end - @default_class = cls + @scanner_class = cls @subclasses&.each do |subclass| - subclass.default(cls) + subclass.scanner(cls) end end + # Prepare scanner and variants + # @return [void] + def prepare + return if @prepared + + @scanner = @scanner_class.new + @scanner.variants = @variants.map { |x| [x[0].new, x[1]] } + end + # Parse text from the given context # @param text [String] # @return [self] def parse(text) - parser = ::RBMark::Parser.new(@variants, @default_class) - instance = create - instance.append(*parser.parse(text)) + prepare unless @atomic_mode + instance = new + if @atomic_mode + instance.content = text + else + instance.append(*@scanner.scan(text)) + end instance end @@ -115,7 +447,13 @@ module RBMark end end - attr_accessor :variants, :begin, :end, :default_class, :alt_for + # Set the atomic flag + # @return [void] + def atomic + @atomic_mode = true + end + + attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode end def initialize @@ -169,12 +507,6 @@ module RBMark @content = text end - # Get length of text contents - # @return [Integer] - def length - @children.map(&:length).sum - end - # Get text content of a DOMObject # @return [String, nil] attr_reader :content, :children, :properties @@ -182,350 +514,96 @@ module RBMark # Inline text class Text < DOMObject - # Stub parser for inline text element - # @param text [String] - # @return [self] - def self.parse(text) - instance = new - instance.content = text.gsub("\n", ' ').gsub(/\s+/, " ") - instance - end - - # Get length of inline text - # @return [Integer] - def length - @content.length - end end # Inline preformatted text class InlinePre < DOMObject - self.begin = /(? - list.children.last.properties[:number]) - true - else - false - end - end - - # Merge 2 elements - def self.merge_adjacent(left, right) - if right.is_a? ::RBMark::DOM::ULBlock or - right.is_a? ::RBMark::DOM::OLBlock - right.children.each do |child| - left.append(child) - end - elsif right.is_a? ::RBMark::DOM::IndentBlock - left.children.last.append( - *::RBMark::DOM::ListElement.parse(right.children.first.content) - .children - ) - end - end + scanner ::RBMark::Parsing::LineScanner + variant ::RBMark::Parsing::ATXHeadingVariant + variant ::RBMark::Parsing::ThematicBreakVariant + variant ::RBMark::Parsing::SetextHeadingVariant + variant ::RBMark::Parsing::IndentedBlockVariant + variant ::RBMark::Parsing::FencedCodeBlock + variant ::RBMark::Parsing::BlankSeparator, prio: 9998 + variant ::RBMark::Parsing::ParagraphVariant, prio: 9999 end # Paragraph in a document (separated by 2 newlines) class Paragraph < InlineFormattable - # (see ::RBMark::DOM::DOMObject.parse) - # @return [Array] - def self.parse(text) - text.split("\n\n").map do |chunk| - super(chunk) - end - end + atomic end # Heading level 1 class Heading1 < InlineFormattable - self.begin = /^# / - self.end = /#?$/ - - # (see ::RBMark::DOM::DOMObject.parse) - def self.parse(text) - super(text.gsub(self.begin, '').gsub(self.end, '')) - end end # Heading level 2 class Heading2 < Heading1 - self.begin = /^## / - self.end = /(?:##)?$/ end # Heading level 3 class Heading3 < Heading1 - self.begin = /^### / - self.end = /(?:###)?$/ end # Heading level 4 class Heading4 < Heading1 - self.begin = /^#### / - self.end = /(?:####)?$/ end # Heading level 5 class Heading5 < Heading1 - self.begin = /^##### / - self.end = /(?:#####)?$/ end # Heading level 6 class Heading6 < Heading1 - self.begin = /^###### / - self.end = /(?:######)?$/ - end - - # Alternative heading 1 - class AltHeading1 < InlineFormattable - self.begin = /^[^\n]+\n={3,}$/m - self.end = /={3,}$/ - self.alt_for = ::RBMark::DOM::Heading1 - - # (see ::RBMark::DOM::DOMObject.parse) - def self.parse(text) - super(text.match(/\A[^\n]+$/)[0].strip) - end - end - - # Alternative heading 2 - class AltHeading2 < InlineFormattable - self.begin = /^[^\n]+\n-{3,}$/m - self.end = /-{3,}$/ - self.alt_for = ::RBMark::DOM::Heading2 - - # (see ::RBMark::DOM::DOMObject.parse) - def self.parse(text) - super(text.match(/\A[^\n]+$/)[0].strip) - end end # Preformatted code block class CodeBlock < DOMObject - self.begin = /^```[^\n]*$/ - self.end = /^```[^\n]*\n.*?\n```$/m - - # Stub parser for block text element - # @param text [String] - # @return [self] - def self.parse(text) - instance = new - language = text.match(/\A```([^\n]*)/)[1].strip - element = ::RBMark::DOM::Text.new - element.content = text.lines[1..-2].join('').rstrip - instance.append(element) - instance.property language: language - instance - end end # Quote block class QuoteBlock < Document - self.begin = /^> \S/ - self.end = /(?:^(?!>)|\Z)/ - - # stub - def self.parse(text) - super(text.lines.map { |x| x[2..] }.join('')) - end end # Table @@ -538,122 +616,19 @@ module RBMark # Unordered list class ULBlock < DOMObject - self.begin = /^- +\S+/ - self.end = /(?:^(?!- +\S+| )|\Z)/ - - # (see RBMark::DOM::DOMObject.parse) - def self.parse(text) - block = [] - instance = new - text.lines.each do |line| - if line.start_with?("- ") - unless block.empty? - instance.append(::RBMark::DOM::ListElement.parse(block.join(''))) - end - block = [line[2..]] - else - block.append(line[2..]) - end - end - instance.append(::RBMark::DOM::ListElement.parse(block.join(''))) - instance - end end # Ordered list block class OLBlock < DOMObject - self.begin = /^\d+\. +\S+/ - self.end = /(?:^(?!\d+\. +\S+| {4})|\Z)/ - - # (see RBMark::DOM::DOMObject.parse) - def self.parse(text) - block = [] - instance = new - counter = nil - text.lines.each do |line| - if line.start_with?(/^\d+\. /) - unless block.empty? - instance.append(element(block.join(''), counter)) - end - counter = line.match(/^(\d+)\. /)[1] - block = [line.gsub(/^(?:\d+\. | {4})/, '')] - else - block.append(line.gsub(/^(?:\d+\. | {4})/, '')) - end - end - instance.append(element(block.join(''), counter)) - instance - end - - # Construct a new ListElement - def self.element(text, counter) - instance = ::RBMark::DOM::ListElement.parse(text) - instance.property number: counter - instance - end end # Indent block class IndentBlock < DOMObject - self.begin = /^ {4}/ - self.end = /(?:^(?! {4})|\Z)/ - - # (see RBMark::DOM::DOMObject.parse) - def self.parse(text) - instance = new - element = ::RBMark::DOM::Text.new - element.content = text.lines.map { |x| x[4..] }.join('') - instance.append(element) - instance - end end # Horizontal rule class HorizontalRule < DOMObject - self.begin = /^-{3,}$/ - self.end = /$/ - - # stub for HR - def self.parse(_text) - new - end - - # Stub for HR length - # @return [Integer] - def length - 1 - end - end - - InlineFormattable.class_exec do - default ::RBMark::DOM::Text - variant ::RBMark::DOM::InlineBold - variant ::RBMark::DOM::InlineItalics - variant ::RBMark::DOM::InlineAltItalics - variant ::RBMark::DOM::InlineUnder - variant ::RBMark::DOM::InlineImage - variant ::RBMark::DOM::InlineLink - variant ::RBMark::DOM::InlinePre - variant ::RBMark::DOM::InlineStrike - variant ::RBMark::DOM::InlineBreak - end - - Document.class_exec do - default ::RBMark::DOM::Paragraph - variant ::RBMark::DOM::Heading1 - variant ::RBMark::DOM::Heading2 - variant ::RBMark::DOM::Heading3 - variant ::RBMark::DOM::Heading4 - variant ::RBMark::DOM::Heading5 - variant ::RBMark::DOM::Heading6 - variant ::RBMark::DOM::AltHeading1 - variant ::RBMark::DOM::AltHeading2 - variant ::RBMark::DOM::QuoteBlock - variant ::RBMark::DOM::CodeBlock - variant ::RBMark::DOM::ULBlock - variant ::RBMark::DOM::OLBlock - variant ::RBMark::DOM::IndentBlock - variant ::RBMark::DOM::HorizontalRule + atomic end end end diff --git a/lib/rbmark/renderers.rb b/lib/rbmark/renderers.rb new file mode 100644 index 0000000..7cbe4b5 --- /dev/null +++ b/lib/rbmark/renderers.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module RBMark + # Renderers from Markdown to expected output format + module Renderers + end +end + +require_relative 'renderers/html' diff --git a/lib/rbmark/renderers/html.rb b/lib/rbmark/renderers/html.rb new file mode 100644 index 0000000..de8dd2c --- /dev/null +++ b/lib/rbmark/renderers/html.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require 'rbmark' + +module RBMark + module Renderers + # HTML Renderer + class HTML + ELEMENT_MAP = { + "RBMark::DOM::InlinePre" => { + tag: "code", + inline: true + }, + "RBMark::DOM::InlineBreak" => { + tag: "br", + inline: true + }, + "RBMark::DOM::InlineBold" => { + tag: "strong", + inline: true + }, + "RBMark::DOM::InlineItalics" => { + tag: "em", + inline: true + }, + "RBMark::DOM::InlineUnder" => { + tag: "span", + inline: true, + style: "text-decoration: underline;" + }, + "RBMark::DOM::InlineStrike" => { + tag: "s", + inline: true + }, + "RBMark::DOM::InlineLink" => { + tag: "link", + href: true, + inline: true + }, + "RBMark::DOM::InlineImage" => { + tag: "img", + src: true, + inline: true + }, + "RBMark::DOM::ULBlock" => { + tag: "ul" + }, + "RBMark::DOM::OLBlock" => { + tag: "ol" + }, + "RBMark::DOM::IndentBlock" => { + tag: "pre" + }, + "RBMark::DOM::ListElement" => { + tag: "li" + }, + "RBMark::DOM::Paragraph" => { + tag: "p" + }, + "RBMark::DOM::Heading1" => { + tag: "h1" + }, + "RBMark::DOM::Heading2" => { + tag: "h2" + }, + "RBMark::DOM::Heading3" => { + tag: "h3" + }, + "RBMark::DOM::Heading4" => { + tag: "h4" + }, + "RBMark::DOM::Heading5" => { + tag: "h5" + }, + "RBMark::DOM::Heading6" => { + tag: "h6" + }, + "RBMark::DOM::Document" => { + tag: "main" + }, + "RBMark::DOM::CodeBlock" => { + tag: "pre", + outer: { + tag: "code" + } + }, + "RBMark::DOM::QuoteBlock" => { + tag: "blockquote" + }, + "RBMark::DOM::HorizontalRule" => { + tag: "hr" + }, + "RBMark::DOM::Text" => nil + }.freeze + + def initialize(dom, options) + @document = dom + @options = options + end + + # Render document to HTML + def render + preambule if @options['preambule'] + _render(@document, indent = 2) + postambule if @options['postambule'] + end + + private + + def _render(element, indent = 0) + + + def preambule + @options['preambule'] or <<~TEXT + + + + #{@document['head']} + + + TEXT + end + + def postambule + @options['postambule'] or <<~TEXT + + + TEXT + end + end + end +end diff --git a/test/test_atx_headers.rb b/test/test_atx_headers.rb new file mode 100644 index 0000000..58a038a --- /dev/null +++ b/test/test_atx_headers.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test ATX Heading parsing compliance with CommonMark v0.31.2 +class TestATXHeadings < Minitest::Test + def test_simple_heading1 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + # ATX Heading level 1 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_simple_heading2 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ## ATX Heading level 2 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + end + + def test_simple_heading3 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ### ATX Heading level 3 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading3, doc.children[0]) + end + + def test_simple_heading4 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #### ATX Heading level 4 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) + end + + def test_simple_heading5 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ##### ATX Heading level 5 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading5, doc.children[0]) + end + + def test_simple_heading6 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ###### ATX Heading level 6 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading6, doc.children[0]) + end + + def test_simple_not_a_heading + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ####### NOT a heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_breaking_paragrpah + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Paragraph 1 + # ATX Heading level 1 + Paragraph 2 + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) + end + + def test_heading_sans_space + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #NOT an ATX heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_heading_escaped + doc = ::RBMark::DOM::Document.parse(<<~DOC) + \\# Escaped ATX heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #### Heading level 4 + ### Heading level 3 + ## Heading level 2 + # Heading level 1 + # NOT a heading + DOC + assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading3, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) + refute_instance_of(::RBMark::DOM::Heading1, doc.children[4]) + end +end diff --git a/test/test_fenced_code_block.rb b/test/test_fenced_code_block.rb new file mode 100644 index 0000000..2d1878b --- /dev/null +++ b/test/test_fenced_code_block.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test Setext Heading parsing compliance with CommonMark v0.31.2 +class TestSetextHeadings < Minitest::Test + def test_simple_heading1 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo *bar* + ========= + + Foo *bar* + --------- + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + end + + def test_multiline_span + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo *bar + baz* + ==== + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + assert_equal(1, doc.children.length) + end + + def test_span_inlining + doc = ::RBMark::DOM::Document.parse(<<~DOC) + start + + Foo *bar + baz + ==== + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + skip + end + + def test_line_length + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + ------------------------------ + + Foo + = + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + end + + def test_content_indent + skip # TODO: implement this + end + + def test_marker_indent + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + ------------------------------ + + Foo + = + + Foo + = + + Foo + = + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) + end + + def test_no_internal_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + -- - - + + Foo + == = + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_block_level_priority + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ` Foo + ------ + ` + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1]) + end + + def test_paragraph_breaking_only + doc = ::RBMark::DOM::Document.parse(<<~DOC) + > text + ------ + DOC + skip # TODO: implement this + end + + def test_paragraph_breaking_only_lazy_continuation + doc = ::RBMark::DOM::Document.parse(<<~DOC) + > text + continuation line + ------ + DOC + skip # TODO: implement this + end + + def test_headings_back_to_back + doc = ::RBMark::DOM::Document.parse(<<~DOC) + heading1 + ------ + heading2 + ------ + heading3 + ====== + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) + end + + def test_no_empty_headings + doc = ::RBMark::DOM::Document.parse(<<~DOC) + + ====== + DOC + refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_thematic_breaks + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ---- + ---- + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + refute_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + end +end diff --git a/test/test_fenced_code_blocks.rb b/test/test_fenced_code_blocks.rb new file mode 100644 index 0000000..58a038a --- /dev/null +++ b/test/test_fenced_code_blocks.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test ATX Heading parsing compliance with CommonMark v0.31.2 +class TestATXHeadings < Minitest::Test + def test_simple_heading1 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + # ATX Heading level 1 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_simple_heading2 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ## ATX Heading level 2 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + end + + def test_simple_heading3 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ### ATX Heading level 3 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading3, doc.children[0]) + end + + def test_simple_heading4 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #### ATX Heading level 4 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) + end + + def test_simple_heading5 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ##### ATX Heading level 5 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading5, doc.children[0]) + end + + def test_simple_heading6 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ###### ATX Heading level 6 + Paragraph + DOC + assert_instance_of(::RBMark::DOM::Heading6, doc.children[0]) + end + + def test_simple_not_a_heading + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ####### NOT a heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_breaking_paragrpah + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Paragraph 1 + # ATX Heading level 1 + Paragraph 2 + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) + end + + def test_heading_sans_space + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #NOT an ATX heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_heading_escaped + doc = ::RBMark::DOM::Document.parse(<<~DOC) + \\# Escaped ATX heading + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + end + + def test_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + #### Heading level 4 + ### Heading level 3 + ## Heading level 2 + # Heading level 1 + # NOT a heading + DOC + assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading3, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) + refute_instance_of(::RBMark::DOM::Heading1, doc.children[4]) + end +end diff --git a/test/test_indent_block.rb b/test/test_indent_block.rb new file mode 100644 index 0000000..c81e0b4 --- /dev/null +++ b/test/test_indent_block.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test Setext Heading parsing compliance with CommonMark v0.31.2 +class TestSetextHeadings < Minitest::Test + def test_simple_indent + doc = ::RBMark::DOM::Document.parse(<<~DOC) + text + + indented code block + without space mangling + + int main() { + printf("Hello world!\\n"); + } + DOC + assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) + end + + def test_list_item_precedence + skip # TODO: implement this + end + + def test_numbered_list_item_precednce + skip # TODO: implement this + end + + def test_check_indent_contents + skip # TODO: yet again please implement this at some point thanks + end + + def test_long_chunk + doc = ::RBMark::DOM::Document.parse(<<~DOC) + text + + indented code block + without space mangling + + int main() { + printf("Hello world!\\n"); + } + + + there are many space changes here and blank lines that + should *NOT* affect the way this is parsed + DOC + assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) + end + + def test_does_not_interrupt_paragraph + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Paragraph begins here + paragraph does the stupid wacky shit that somebody thinks is very funny + paragraph keeps doing that shit + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + assert_equal(1, doc.children.length) + end + + def test_begins_at_first_sight_of_four_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + text + + This is an indent block + This is a paragraph + DOC + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) + assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) + end + + def test_interrupts_all_other_blocks + doc = ::RBMark::DOM::Document.parse(<<~DOC) + # Heading + foo + Heading + ------ + foo + ---- + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) + assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[3]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4]) + end + + def test_check_blank_lines_contents + skip # TODO: PLEASE I FUCKING BEG YOU IMPLEMENT THIS + end + + def test_check_contents_trailing_spaces + skip # TODO: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa + end +end diff --git a/test/test_setext_headers.rb b/test/test_setext_headers.rb new file mode 100644 index 0000000..2d1878b --- /dev/null +++ b/test/test_setext_headers.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test Setext Heading parsing compliance with CommonMark v0.31.2 +class TestSetextHeadings < Minitest::Test + def test_simple_heading1 + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo *bar* + ========= + + Foo *bar* + --------- + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + end + + def test_multiline_span + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo *bar + baz* + ==== + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + assert_equal(1, doc.children.length) + end + + def test_span_inlining + doc = ::RBMark::DOM::Document.parse(<<~DOC) + start + + Foo *bar + baz + ==== + DOC + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + skip + end + + def test_line_length + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + ------------------------------ + + Foo + = + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + end + + def test_content_indent + skip # TODO: implement this + end + + def test_marker_indent + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + ------------------------------ + + Foo + = + + Foo + = + + Foo + = + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) + end + + def test_no_internal_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + -- - - + + Foo + == = + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_block_level_priority + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ` Foo + ------ + ` + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1]) + end + + def test_paragraph_breaking_only + doc = ::RBMark::DOM::Document.parse(<<~DOC) + > text + ------ + DOC + skip # TODO: implement this + end + + def test_paragraph_breaking_only_lazy_continuation + doc = ::RBMark::DOM::Document.parse(<<~DOC) + > text + continuation line + ------ + DOC + skip # TODO: implement this + end + + def test_headings_back_to_back + doc = ::RBMark::DOM::Document.parse(<<~DOC) + heading1 + ------ + heading2 + ------ + heading3 + ====== + DOC + assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) + end + + def test_no_empty_headings + doc = ::RBMark::DOM::Document.parse(<<~DOC) + + ====== + DOC + refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) + end + + def test_thematic_breaks + doc = ::RBMark::DOM::Document.parse(<<~DOC) + ---- + ---- + DOC + refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) + refute_instance_of(::RBMark::DOM::Heading2, doc.children[1]) + end +end diff --git a/test/test_thematic_breaks.rb b/test/test_thematic_breaks.rb new file mode 100644 index 0000000..1ace973 --- /dev/null +++ b/test/test_thematic_breaks.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require_relative '../lib/rbmark' + +# Test thematic break parsing compliance with CommonMark v0.31.2 +class TestThematicBreaks < Minitest::Test + def test_simple + doc = ::RBMark::DOM::Document.parse(<<~DOC) + --- + *** + ___ + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) + end + + def test_simple_invalid + doc = ::RBMark::DOM::Document.parse(<<~DOC) + +++ + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + doc = ::RBMark::DOM::Document.parse(<<~DOC) + === + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + end + + def test_simple_less_characters + doc = ::RBMark::DOM::Document.parse(<<~DOC) + -- + + ** + + __ + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) + end + + def test_indentation + doc = ::RBMark::DOM::Document.parse(<<~DOC) + *** + *** + *** + *** + *** + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4]) + end + + def test_indentation_mixed_classes + doc = ::RBMark::DOM::Document.parse(<<~DOC) + Foo + + *** + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children.last) + end + + def test_line_length + doc = ::RBMark::DOM::Document.parse(<<~DOC) + _________________________________ + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + end + + def test_mixed_spaces + doc = ::RBMark::DOM::Document.parse(<<~DOC) + - - - + + ** * ** * ** * ** + + - - - - + + - - - - + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) + end + + def test_mixed_characters + doc = ::RBMark::DOM::Document.parse(<<~DOC) + _ _ _ _ a + + a------ + + ---a--- + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) + end + + def test_mixed_markets + doc = ::RBMark::DOM::Document.parse(<<~DOC) + *-* + DOC + refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) + end + + def test_interrupt_list + doc = ::RBMark::DOM::Document.parse(<<~DOC) + - foo + *** + - bar + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + end + + def test_interrupt_paragraph + doc = ::RBMark::DOM::Document.parse(<<~DOC) + foo + *** + bar + DOC + assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) + end +end