From f3d049feb232fb321fc20cafff5dce8a6f893c6c Mon Sep 17 00:00:00 2001 From: Yessiest <yessiest@memeware.net> Date: Sun, 2 Mar 2025 13:38:25 +0400 Subject: [PATCH] fixes for list parsing --- README.md | 2 +- architecture.md | 92 ++++ classes | 13 - document.rb | 738 -------------------------------- lib/blankshell.rb | 64 ++- test.md | 81 ---- test/test_atx_headers.rb | 102 ----- test/test_fenced_code_block.rb | 147 ------- test/test_fenced_code_blocks.rb | 102 ----- test/test_indent_block.rb | 97 ----- test/test_setext_headers.rb | 147 ------- test/test_thematic_breaks.rb | 127 ------ view_structure.rb | 21 + 13 files changed, 162 insertions(+), 1571 deletions(-) create mode 100644 architecture.md delete mode 100644 classes delete mode 100644 document.rb delete mode 100644 test.md delete mode 100644 test/test_atx_headers.rb delete mode 100644 test/test_fenced_code_block.rb delete mode 100644 test/test_fenced_code_blocks.rb delete mode 100644 test/test_indent_block.rb delete mode 100644 test/test_setext_headers.rb delete mode 100644 test/test_thematic_breaks.rb create mode 100644 view_structure.rb diff --git a/README.md b/README.md index 38b356c..7aeafc5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ # rubymark -Minimalistic modular markdown parser in Ruby \ No newline at end of file +Modular, compliant markdown parser in Ruby diff --git a/architecture.md b/architecture.md new file mode 100644 index 0000000..dab5c57 --- /dev/null +++ b/architecture.md @@ -0,0 +1,92 @@ +Architecture of madness +======================= + +Prelude +------- + +It needs to be stressed that making the parser modular while keeping it +relatively simple was a laborous undertaking. There has not been a standard +more hostile towards the people who dare attempt to implement it than +CommonMark. It should also be noted, that despite it being titled a +"Standard" in this document, it is less widely adopted than the Github +Flavored Markdown syntax. Github Flavored Markdown, however, is only but +a mere subset of this parser's model, albeit requiring a few extensions. + +Current state (as of March 02, 2025) +------------------------------------ + +This parser processes text in what can be boiled down to three phases. + +- Block/Line phase +- Overlay phase +- Inline phase + +The first phase breaks down blocks, line by line, into block structures. +Blocks (preferably inherited from the Block class) can contain other blocks. +(i.e. QuoteBlock, ULBlock, OLBlock). Other blocks (known as leaf blocks) +may not contain anything else (except inline content, more on that later). + +Blocks are designed to be parsed independently. This means that it *should* +be possible to tear out any standard block and make it not get parsed. +This, however, isn't thoroughly tested for. + +Blocks as proper, real classes have a certain lifecycle to follow when +being constructed: + +1. Open condition + - A block needs to find its first marker on the current line to open + (see `#begin?` method) + - Once it's open, it's immediately initialized and fed the line it just + read (but now as an object, not as a class) (see `#consume` method) +2. Marker/Line consumption + - While it should be kept open, the block parser instance will + keep reading inupt through `#consume` method, returning a pair + of modified line (after consuming its tokens from it) and + a boolean value indicating permission of lazy continuation + (if it's a block like a QuoteBlock or ULBlock that can be lazily + overflowed). + Every line the parser needs to record needs to be pushed + through the `#push` method. +3. Closure + - If the current line no longer belongs to the current block + (if the block should have been closed on the previous line), + it simply needs to `return` a pair of `nil`, and a boolean value for + permission of lazy continuation + - If a block should be closed on the current line, it should capture it, + keep track of the "closed" state, then `return` `nil` on the next call + of `#consume` + - Once a block is closed, it: + 1. Receives its content from the parser + 2. Parser receives the "close" method call + 3. (optional) Parser may have a callable method `#applyprops`. If + it exists, it gets called with the current constructed block. + 4. (optional) All overlays assigned to this block's class are + processed on the contents of this block (more on that in + Overlay phase) + 5. (optional) Parser may return a different class, which + the current block should be cast into (Overlays may change + the class as well) + 6. (optional) If a block can respond to `#parse_inner` method, it + will get called, allowing the block to parse its own contents. + - After this point, the block is no longer touched until the document + fully gets processed. +4. Inline processing + - (Applies only to Paragraph and any child of LeafBlock) + When the document gets fully processed, the contents of the current + block are taken, assigned to an InlineRoot instance, and then parsed + in Inline mode +5. Completion + - The resulting document is then returned. + +While there is a lot of functionality available in desgining blocks, it is +not necessary for the simplest of the block kinds available. The simplest +example of a block parser is likely the ThematicBreakParser class, which +implements the only 2 methods needed for a block parser to function. + +While parsing text, a block may use additional info: + +- In consume method: `lazy` hasharg, if the current line is being processed + in lazy continuation mode (likely only ever matters for Paragraph); and + `parent` - the parent block containing this block. + + diff --git a/classes b/classes deleted file mode 100644 index 9153047..0000000 --- a/classes +++ /dev/null @@ -1,13 +0,0 @@ -Bold [x} -Italics [x] -Underline [x] -Strikethrough [x] -CodeInline [x] -Link [x] -Image [x] -Headings [x] -CodeBlock [x] -QuoteBlock [x] -ULBlock [x] -OLBLock [x] -TableBlock [] diff --git a/document.rb b/document.rb deleted file mode 100644 index 07c15f6..0000000 --- a/document.rb +++ /dev/null @@ -1,738 +0,0 @@ -# frozen_string_literal: true - -module RBMark - # Parser units - # Parsers are divided into three categories: - # - Slicers - these parsers read the whole text of an element and slice it into chunks digestible by other parsers - # - ChunkParsers - these parsers transform chunks of text into a single DOM unit - # - InlineParsers - these parsers are called directly by the slicer to check whether a certain element matches needed criteria - module Parsers - # Abstract slicer class - class Slicer - # @param parent [::RBMark::DOM::DOMObject] - def initialize - @chunk_parsers = [] - end - - attr_accessor :chunk_parsers - - private - - def parse_chunk(text) - @chunk_parsers.each do |parser| - unless parser.is_a? ChunkParser - raise StandardError, 'not a ChunkParser' - end - - next unless parser.match?(text) - - return parser.match(text) - end - nil - end - end - - # Abstract inline parser class - class InlineParser - # Test if piece matches bold syntax - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(@match_exp) - end - - # Construct a new object from text - # @param text [String] - # @return [Object] - def match(text) - @class.parse(text) - end - - attr_reader :class, :match_exp - end - - # Abstract chunk parser class - class ChunkParser - # Stub for match method - def match(text) - element = ::RBMark::DOM::Text.new - element.content = text - element - end - - # Stub for match? method - def match?(_text) - true - end - end - - # Slices text into paragraphs and feeds slices to chunk parsers - class RootSlicer < Slicer - # Parse text into chunks and feed each to the chain - # @param text [String] - def parse(text) - output = text.split(/(?:\r\r|\n\n|\r\n\r\n|\Z)/) - .reject { |x| x.match(/\A\s*\Z/) } - .map do |block| - parse_chunk(block) - end - merge_list_indents(output) - end - - private - - def merge_list_indents(chunks) - last_list = nil - delete_deferred = [] - chunks.each_with_index do |chunk, index| - if !last_list and [::RBMark::DOM::ULBlock, - ::RBMark::DOM::OLBlock].include? chunk.class - last_list = chunk - elsif last_list and mergeable?(last_list, chunk) - merge(last_list, chunk) - delete_deferred.prepend(index) - else - last_list = nil - end - end - delete_deferred.each { |i| chunks.delete_at(i) } - chunks - end - - def mergeable?(last_list, chunk) - if chunk.is_a? ::RBMark::DOM::IndentBlock or - (chunk.is_a? ::RBMark::DOM::ULBlock and - last_list.is_a? ::RBMark::DOM::ULBlock) or - (chunk.is_a? ::RBMark::DOM::OLBlock and - last_list.is_a? ::RBMark::DOM::OLBlock and - last_list.properties["num"] > chunk.properties["num"]) - true - else - false - end - end - - def merge(last_list, chunk) - if chunk.is_a? ::RBMark::DOM::IndentBlock - last_list.children.last.children.append(*chunk.children) - else - last_list.children.append(*chunk.children) - end - end - end - - # Inline text slicer (slices based on the start and end symbols) - class InlineSlicer < Slicer - # Parse slices - # @param text [String] - def parse(text) - parts = [] - index = prepare_markers - until text.empty? - before, part, text = slice(text) - parts.append(::RBMark::DOM::Text.parse(before)) unless before.empty? - next unless part - - element = index.fetch(part.regexp, - ::RBMark::Parsers::TextInlineParser.new) - .match(part[0]) - parts.append(element) - end - parts - end - - private - - # Prepare markers from chunk_parsers - # @return [Hash] - def prepare_markers - index = {} - @markers = @chunk_parsers.map do |parser| - index[parser.match_exp] = parser - parser.match_exp - end - index - end - - # Get the next slice of a text based on markers - # @param text [String] - # @return [Array<(String,MatchData,String)>] - def slice(text) - first_tag = @markers.map { |x| text.match(x) } - .reject(&:nil?) - .min_by { |x| x.offset(0)[0] } - return text, nil, "" unless first_tag - - [first_tag.pre_match, first_tag, first_tag.post_match] - end - end - - # Slicer for unordered lists - class UnorderedSlicer < Slicer - # Parse list elements - def parse(text) - output = [] - buffer = "" - text.lines.each do |line| - if line.start_with? "- " and !buffer.empty? - output.append(make_element(buffer)) - buffer = "" - end - buffer += line[2..] - end - output.append(make_element(buffer)) unless buffer.empty? - output - end - - private - - def make_element(text) - ::RBMark::DOM::ListElement.parse(text) - end - end - - # Slicer for unordered lists - class OrderedSlicer < Slicer - # rubocop:disable Metrics/AbcSize - - # Parse list elements - def parse(text) - output = [] - buffer = "" - indent = text.match(/\A\d+\. /)[0].length - num = text.match(/\A(\d+)\. /)[1] - text.lines.each do |line| - if line.start_with?(/\d+\. /) and !buffer.empty? - output.append(make_element(buffer, num)) - buffer = "" - indent = line.match(/\A\d+\. /)[0].length - num = line.match(/\A(\d+)\. /)[1] - end - buffer += line[indent..] - end - output.append(make_element(buffer, num)) unless buffer.empty? - output - end - - # rubocop:enable Metrics/AbcSize - private - - def make_element(text, num) - element = ::RBMark::DOM::ListElement.parse(text) - element.property num: num.to_i - element - end - end - - # Quote block parser - class QuoteChunkParser < ChunkParser - # Tests for chunk being a block quote - # @param text [String] - # @return [Boolean] - def match?(text) - text.lines.map do |x| - x.match?(/\A\s*>(?:\s[^\n\r]+|)\Z/m) - end.all?(true) - end - - # Transforms text chunk into a block quote - # @param text - # @return [::RBMark::DOM::QuoteBlock] - def match(text) - text = text.lines.map do |x| - x.match(/\A\s*>(\s[^\n\r]+|)\Z/m)[1].to_s[1..] - end.join("\n") - ::RBMark::DOM::QuoteBlock.parse(text) - end - end - - # Paragraph block - class ParagraphChunkParser < ChunkParser - # Acts as a fallback for the basic paragraph chunk - # @param text [String] - # @return [Boolean] - def match?(_text) - true - end - - # Creates a new paragraph with the given text - def match(text) - ::RBMark::DOM::Paragraph.parse(text) - end - end - - # Code block - class CodeChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(/\A```\w+[\r\n]{1,2}.*[\r\n]{1,2}```\Z/m) - end - - # Create a new element - def match(text) - lang, code = text.match( - /\A```(\w+)[\r\n]{1,2}(.*)[\r\n]{1,2}```\Z/m - )[1, 2] - element = ::RBMark::DOM::CodeBlock.new - element.property language: lang - element.content = code - element - end - end - - # Heading chunk parser - class HeadingChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(/\A\#{1,4}\s/) - end - - # Create a new element - def match(text) - case text.match(/\A\#{1,4}\s/)[0] - when "# " then ::RBMark::DOM::Heading1.parse(text[2..]) - when "## " then ::RBMark::DOM::Heading2.parse(text[3..]) - when "### " then ::RBMark::DOM::Heading3.parse(text[4..]) - when "#### " then ::RBMark::DOM::Heading4.parse(text[5..]) - end - end - end - - # Unordered list parser (chunk) - class UnorderedChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - return false unless text.start_with? "- " - - text.lines.map do |line| - line.match?(/\A(?:- .*| .*| )\Z/) - end.all?(true) - end - - # Create a new element - def match(text) - ::RBMark::DOM::ULBlock.parse(text) - end - end - - # Ordered list parser (chunk) - class OrderedChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - return false unless text.start_with?(/\d+\. /) - - indent = 0 - text.lines.each do |line| - if line.start_with?(/\d+\. /) - indent = line.match(/\A\d+\. /)[0].length - elsif line.start_with?(/\s+/) - return false if line.match(/\A\s+/)[0].length < indent - else - return false - end - end - true - end - - # Create a new element - def match(text) - ::RBMark::DOM::OLBlock.parse(text) - end - end - - # Indented block parser - class IndentChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.lines.map do |x| - x.start_with? " " or x.start_with? "\t" - end.all?(true) - end - - # Create a new element - def match(text) - text = text.lines.map { |x| x.match(/\A(?: {4}|\t)(.*)\Z/)[1] } - .join("\n") - ::RBMark::DOM::IndentBlock.parse(text) - end - end - - # Stub text parser - class TextInlineParser < InlineParser - # Stub method for creating new Text object - def match(text) - instance = ::RBMark::DOM::Text.new - instance.content = text - instance - end - end - - # Bold text - class BoldInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)\*\*+.+?(?<!\\)\*+\*/ - end - - # Match element - def match(text) - ::RBMark::DOM::InlineBold.parse(text[2..-3]) - end - end - - # Italics text - class ItalicsInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)\*+.+?(?<!\\)\*+/ - end - - # Match element - def match(text) - ::RBMark::DOM::InlineItalics.parse(text[1..-2]) - end - end - - # Underlined text - class UnderInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)__+.+?(?<!\\)_+_/ - end - - # Match element - def match(text) - ::RBMark::DOM::InlineUnder.parse(text[2..-3]) - end - end - - # Strikethrough text - class StrikeInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)~~+.+?(?<!\\)~+~/ - end - - # Match element - def match(text) - ::RBMark::DOM::InlineStrike.parse(text[2..-3]) - end - end - - # Preformatted text - class PreInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)``+.+?(?<!\\)`+`/ - end - - # Match element - def match(text) - ::RBMark::DOM::InlinePre.parse(text[2..-3]) - end - end - - # Hyperreference link - class LinkInlineParser < InlineParser - def initialize - super - @match_exp = /(?<![\\!])\[(.+?(?<!\\))\]\((.+?(?<!\\))\)/ - end - - # Match element - def match(text) - title, link = text.match(@match_exp)[1..2] - element = ::RBMark::DOM::InlineLink.new - element.content = title - element.property link: link - element - end - end - - # Image - class ImageInlineParser < InlineParser - def initialize - super - @match_exp = /(?<!\\)!\[(.+?(?<!\\))\]\((.+?(?<!\\))\)/ - end - - # Match element - def match(text) - title, link = text.match(@match_exp)[1..2] - element = ::RBMark::DOM::InlineImage.new - element.content = title - element.property link: link - element - end - end - - # Linebreak - class BreakInlineParser < InlineParser - def initialize - super - @match_exp = /\s{2}/ - end - - # Match element - def match(_text) - element = ::RBMark::DOM::InlineBreak.new - element.content = "" - element - end - end - end - - # Module for representing abstract object hierarchy - module DOM - # Abstract container - class DOMObject - class << self - attr_accessor :parsers - attr_reader :slicer - - # Hook for initializing variables - def inherited(subclass) - super - # Inheritance initialization - subclass.slicer = @slicer if @slicer - subclass.parsers = @parsers.dup if @parsers - subclass.parsers ||= [] - end - - # Initialize parsers for the current class - def initialize_parsers - @active_parsers = @parsers.map(&:new) - @active_slicer = @slicer.new if @slicer - end - - # Add a slicer - # @param parser [Object] - def slicer=(parser) - unless parser < ::RBMark::Parsers::Slicer - raise StandardError, "#{x} is not a Slicer" - end - - @slicer = parser - end - - # Add a parser to the chain - # @param parser [Object] - def parser(parser) - unless [::RBMark::Parsers::InlineParser, - ::RBMark::Parsers::ChunkParser].any? { |x| parser < x } - raise StandardError, "#{x} is not an InlineParser or a ChunkParser" - end - - @parsers.append(parser) - end - - # Parse text from the given context - # @param text [String] - # @return [self] - def parse(text) - initialize_parsers - container = new - container.content = text - _parse(container) - container.content = "" unless container.is_a? ::RBMark::DOM::Text - container - end - - private - - def _parse(instance) - return unless @active_slicer - - @active_slicer.chunk_parsers = @active_parsers - instance.children.append(*@active_slicer.parse(instance.content)) - end - end - - def initialize - @content = nil - @children = [] - @properties = {} - end - - # Set certain property in the properties hash - # @param properties [Hash] proeprties to update - def property(**properties) - @properties.update(**properties) - end - - # Add child to container - # @param child [DOMObject] - def append(*children) - unless children.all? { |x| x.is_a? DOMObject } - raise StandardError, "#{x} is not a DOMObject" - end - - @children.append(*children) - end - - # Insert a child into the container - # @param child [DOMObject] - # @param index [Integer] - def insert(index, child) - raise StandardError, "not a DOMObject" unless child.is_a? DOMObject - - @children.insert(index, child) - end - - # Delete a child from container - # @param index [Integer] - def delete_at(index) - @children.delete_at(index) - end - - # Get a child from the container - # @param key [Integer] - def [](key) - @children[key] - end - - # Set text content of a DOMObject - # @param text [String] - def content=(text) - raise StandardError, "not a String" unless text.is_a? String - - @content = text - end - - # Get text content of a DOMObject - # @return [String, nil] - attr_reader :content, :children, :properties - end - - # Document root - class Document < DOMObject - self.slicer = ::RBMark::Parsers::RootSlicer - parser ::RBMark::Parsers::IndentChunkParser - parser ::RBMark::Parsers::QuoteChunkParser - parser ::RBMark::Parsers::HeadingChunkParser - parser ::RBMark::Parsers::CodeChunkParser - parser ::RBMark::Parsers::UnorderedChunkParser - parser ::RBMark::Parsers::OrderedChunkParser - parser ::RBMark::Parsers::ParagraphChunkParser - end - - # Inline text - class Text < DOMObject - def self.parse(text) - instance = super(text) - instance.content = instance.content.gsub(/[\s\r\n]+/, " ") - instance - end - end - - # Inline preformatted text - class InlinePre < DOMObject - self.slicer = ::RBMark::Parsers::InlineSlicer - end - - # Infline formattable text - class InlineFormattable < DOMObject - self.slicer = ::RBMark::Parsers::InlineSlicer - parser ::RBMark::Parsers::BreakInlineParser - parser ::RBMark::Parsers::BoldInlineParser - parser ::RBMark::Parsers::ItalicsInlineParser - parser ::RBMark::Parsers::PreInlineParser - parser ::RBMark::Parsers::UnderInlineParser - parser ::RBMark::Parsers::StrikeInlineParser - parser ::RBMark::Parsers::LinkInlineParser - parser ::RBMark::Parsers::ImageInlineParser - end - - # Bold text - class InlineBold < InlineFormattable - end - - # Italics text - class InlineItalics < InlineFormattable - end - - # Underline text - class InlineUnder < InlineFormattable - end - - # Strikethrough text - class InlineStrike < InlineFormattable - end - - # Hyperreferenced text - class InlineLink < InlineFormattable - end - - # Image - class InlineImage < DOMObject - end - - # Linebreak - class InlineBreak < DOMObject - end - - # Heading level 1 - class Heading1 < InlineFormattable - end - - # Heading level 2 - class Heading2 < Heading1 - end - - # Heading level 3 - class Heading3 < Heading1 - end - - # Heading level 4 - class Heading4 < Heading1 - end - - # Preformatted code block - class CodeBlock < DOMObject - end - - # Quote block - class QuoteBlock < Document - end - - # Table - class TableBlock < DOMObject - end - - # Unordered list - class ULBlock < DOMObject - self.slicer = ::RBMark::Parsers::UnorderedSlicer - end - - # Ordered list block - class OLBlock < DOMObject - self.slicer = ::RBMark::Parsers::OrderedSlicer - end - - # Indent block - class IndentBlock < Document - end - - # List element - class ListElement < Document - end - - # Horizontal rule - class HorizontalRule < DOMObject - end - - # Paragraph in a document (separated by 2 newlines) - class Paragraph < InlineFormattable - end - end -end diff --git a/lib/blankshell.rb b/lib/blankshell.rb index ddfc36c..ab2cd64 100644 --- a/lib/blankshell.rb +++ b/lib/blankshell.rb @@ -484,18 +484,24 @@ module PointBlank self.open(line) return [nil, true] unless continues?(line) - [line, true] + [normalize(line), true] end + attr_reader :preoff + private # Open block if it hasn't been opened yet def open(line) - marker, offset = line.match(/\A {0,3}([-+*])(\s+)/)&.captures - return unless marker + return if @open - @marker ||= ['+', '*'].include?(marker) ? "\\#{marker}" : marker - @offset = offset + preoff, mark, off = line.match(/\A( {0,3})([-+*])(\s+)/)&.captures + return unless mark + + @preoff = preoff + @marker ||= ['+', '*'].include?(mark) ? "\\#{mark}" : mark + @offset = off + @open = true end # Check if a line continues this ULParser block @@ -505,6 +511,11 @@ module PointBlank line.start_with?(/\A(?: {0,3}#{@marker}| )#{@offset}/) || line.strip.empty? end + + # Strip off pre-marker offset + def normalize(line) + line.delete_prefix(@preoff) + end end # Unorder list block (element) @@ -515,7 +526,8 @@ module PointBlank end # (see ::PointBlank::Parsing::NullParser#consume) - def consume(line, _parent = nil, **_hargs) + def consume(line, parent = nil, **_hargs) + @parent ||= parent return [nil, true] unless continues?(line) self.open(line) @@ -544,7 +556,12 @@ module PointBlank # Normalize the line def normalize(line) - line.gsub(/\A(?: {0,3}#{@marker}| )#{@offset}/, '') + if !@opening_stripped + @opening_stripped = true + line.gsub(/\A(?: {0,3}#{@marker}| )#{@offset}/, '') + else + line.gsub(/\A\s#{@offset}/, '') + end end end @@ -567,20 +584,23 @@ module PointBlank self.open(line) return [nil, true] unless continues?(line) - [line, true] + [normalize(line), true] end private # Open block if it hasn't been opened yet def open(line) - num, marker, offset = line.match(/\A {0,3}(\d+)([).])(\s+)/) - &.captures + return if @open + pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/) + &.captures return unless marker + @preoff = pre @num = " " * (num.length + 1) @mark ||= "\\#{marker}" - @offset = offset + @offset = off + @open = true end # Check if a line continues this ULParser block @@ -590,6 +610,11 @@ module PointBlank line.start_with?(/\A(?: {0,3}(\d+)#{@mark}|#{@num})#{@offset}/) || line.strip.empty? end + + # Strip off pre-marker offset + def normalize(line) + line.delete_prefix(@preoff) + end end # Unorder list block (element) @@ -602,7 +627,7 @@ module PointBlank # (see ::PointBlank::Parsing::NullParser#consume) def consume(line, _parent = nil, **_hargs) return [nil, true] unless continues?(line) - + self.open(line) [normalize(line), true] @@ -619,10 +644,12 @@ module PointBlank def open(line) return if @open - @num, @marker, @offset = line.match(/\A {0,3}(\d+)([).])(\s+)/) - &.captures + num, marker, off = line.match(/\A {0,3}(\d+)([).])(\s+)/) + &.captures + @num = num @numoffset = " " * (@num.length + 1) - @marker = "\\#{@marker}" + @marker = "\\#{marker}" + @offset = off @open = true end @@ -636,7 +663,12 @@ module PointBlank # Normalize the line def normalize(line) - line.gsub(/\A(?: {0,3}(\d+)#{@marker}|#{@numoffset})#{@offset}/, '') + if !@opening_stripped + @opening_stripped = true + line.gsub(/\A(?: {0,3}\d+#{@marker}|#{@numoffset})#{@offset}/, '') + else + line.gsub(/\A#{@numoffset}#{@offset}/, '') + end end end diff --git a/test.md b/test.md deleted file mode 100644 index a7a0bf7..0000000 --- a/test.md +++ /dev/null @@ -1,81 +0,0 @@ -# Header level sadga kjshdkj hasdkjs hakjdhakjshd kashd kjashd kjashdk asjhdkj ashdkj ahskj hdaskd haskj hdkjash dkjashd ksajdh askjd hak askjhdkasjhdaksjhd sakjd 1 - -> Block quote text -> -> Second block quote paragraph -> Block quote **bold** and *italics* test -> Block quote **bold *italics* mix** test - -## Header level 2 - -[link](http://example.com) - - -```plaintext -code *block* -eat my shit -``` - -paragraph with ``inline code block`` - -- Unordered list element 1 -- Unordered list element 2 - -1. Ordered list element 1 -2. Ordered list element 2 - -This is not a list -- because it continues the paragraph -- this is how it should be, like it or not - -- This is also not a list -because there is text on the next line - -- But this here is a list - because the spacing is made correctly - - more so than that, there are multiple paragraphs here! - - - AND even more lists in a list! - - how extra -- And this is just the next element in the list - -1. same thing but with ordered lists - ordered lists have a little extra special property to them - - the indentations are always symmetrical to the last space of the bullet's number -10. i.e., if you look at this here example - this will work - - obviously - - -1. But this -10. Won't - because the indentation doesn't match the start of the line. - -generally speaking this kind of insane syntax trickery won't be necessary, -but it's just better to have standards than to have none of them. - -an unfortunate side effect of this flexibility should also be noted, and -it's that markdown linters don't like this sort of stuff. -Yet another reason not to use a markdown linter. - -- And this is just the lame stupid old way to do this, as described by mardkownguide - - > just indent your stuff and it works - > really it's as simple as that. - > bruh - - there can be as many as infinite number of elements appended to the list that way. - - you can even start a sublist here if you want to - - - here's a new nested list - - could you imagine the potential - - and here's an image of nothing - -  - -- I may also need to merge lists for this to work properly diff --git a/test/test_atx_headers.rb b/test/test_atx_headers.rb deleted file mode 100644 index 58a038a..0000000 --- a/test/test_atx_headers.rb +++ /dev/null @@ -1,102 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test ATX Heading parsing compliance with CommonMark v0.31.2 -class TestATXHeadings < Minitest::Test - def test_simple_heading1 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - # ATX Heading level 1 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_simple_heading2 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ## ATX Heading level 2 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - end - - def test_simple_heading3 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ### ATX Heading level 3 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading3, doc.children[0]) - end - - def test_simple_heading4 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #### ATX Heading level 4 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) - end - - def test_simple_heading5 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ##### ATX Heading level 5 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading5, doc.children[0]) - end - - def test_simple_heading6 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ###### ATX Heading level 6 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading6, doc.children[0]) - end - - def test_simple_not_a_heading - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ####### NOT a heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_breaking_paragrpah - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Paragraph 1 - # ATX Heading level 1 - Paragraph 2 - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) - end - - def test_heading_sans_space - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #NOT an ATX heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_heading_escaped - doc = ::RBMark::DOM::Document.parse(<<~DOC) - \\# Escaped ATX heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #### Heading level 4 - ### Heading level 3 - ## Heading level 2 - # Heading level 1 - # NOT a heading - DOC - assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading3, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) - refute_instance_of(::RBMark::DOM::Heading1, doc.children[4]) - end -end diff --git a/test/test_fenced_code_block.rb b/test/test_fenced_code_block.rb deleted file mode 100644 index 2d1878b..0000000 --- a/test/test_fenced_code_block.rb +++ /dev/null @@ -1,147 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test Setext Heading parsing compliance with CommonMark v0.31.2 -class TestSetextHeadings < Minitest::Test - def test_simple_heading1 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo *bar* - ========= - - Foo *bar* - --------- - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - end - - def test_multiline_span - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo *bar - baz* - ==== - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - assert_equal(1, doc.children.length) - end - - def test_span_inlining - doc = ::RBMark::DOM::Document.parse(<<~DOC) - start - - Foo *bar - baz - ==== - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - skip - end - - def test_line_length - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - ------------------------------ - - Foo - = - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - end - - def test_content_indent - skip # TODO: implement this - end - - def test_marker_indent - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - ------------------------------ - - Foo - = - - Foo - = - - Foo - = - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) - end - - def test_no_internal_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - -- - - - - Foo - == = - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_block_level_priority - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ` Foo - ------ - ` - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1]) - end - - def test_paragraph_breaking_only - doc = ::RBMark::DOM::Document.parse(<<~DOC) - > text - ------ - DOC - skip # TODO: implement this - end - - def test_paragraph_breaking_only_lazy_continuation - doc = ::RBMark::DOM::Document.parse(<<~DOC) - > text - continuation line - ------ - DOC - skip # TODO: implement this - end - - def test_headings_back_to_back - doc = ::RBMark::DOM::Document.parse(<<~DOC) - heading1 - ------ - heading2 - ------ - heading3 - ====== - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) - end - - def test_no_empty_headings - doc = ::RBMark::DOM::Document.parse(<<~DOC) - - ====== - DOC - refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_thematic_breaks - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ---- - ---- - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - refute_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - end -end diff --git a/test/test_fenced_code_blocks.rb b/test/test_fenced_code_blocks.rb deleted file mode 100644 index 58a038a..0000000 --- a/test/test_fenced_code_blocks.rb +++ /dev/null @@ -1,102 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test ATX Heading parsing compliance with CommonMark v0.31.2 -class TestATXHeadings < Minitest::Test - def test_simple_heading1 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - # ATX Heading level 1 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_simple_heading2 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ## ATX Heading level 2 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - end - - def test_simple_heading3 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ### ATX Heading level 3 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading3, doc.children[0]) - end - - def test_simple_heading4 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #### ATX Heading level 4 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) - end - - def test_simple_heading5 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ##### ATX Heading level 5 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading5, doc.children[0]) - end - - def test_simple_heading6 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ###### ATX Heading level 6 - Paragraph - DOC - assert_instance_of(::RBMark::DOM::Heading6, doc.children[0]) - end - - def test_simple_not_a_heading - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ####### NOT a heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_breaking_paragrpah - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Paragraph 1 - # ATX Heading level 1 - Paragraph 2 - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) - end - - def test_heading_sans_space - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #NOT an ATX heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_heading_escaped - doc = ::RBMark::DOM::Document.parse(<<~DOC) - \\# Escaped ATX heading - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - end - - def test_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - #### Heading level 4 - ### Heading level 3 - ## Heading level 2 - # Heading level 1 - # NOT a heading - DOC - assert_instance_of(::RBMark::DOM::Heading4, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading3, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) - refute_instance_of(::RBMark::DOM::Heading1, doc.children[4]) - end -end diff --git a/test/test_indent_block.rb b/test/test_indent_block.rb deleted file mode 100644 index c81e0b4..0000000 --- a/test/test_indent_block.rb +++ /dev/null @@ -1,97 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test Setext Heading parsing compliance with CommonMark v0.31.2 -class TestSetextHeadings < Minitest::Test - def test_simple_indent - doc = ::RBMark::DOM::Document.parse(<<~DOC) - text - - indented code block - without space mangling - - int main() { - printf("Hello world!\\n"); - } - DOC - assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) - end - - def test_list_item_precedence - skip # TODO: implement this - end - - def test_numbered_list_item_precednce - skip # TODO: implement this - end - - def test_check_indent_contents - skip # TODO: yet again please implement this at some point thanks - end - - def test_long_chunk - doc = ::RBMark::DOM::Document.parse(<<~DOC) - text - - indented code block - without space mangling - - int main() { - printf("Hello world!\\n"); - } - - - there are many space changes here and blank lines that - should *NOT* affect the way this is parsed - DOC - assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) - end - - def test_does_not_interrupt_paragraph - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Paragraph begins here - paragraph does the stupid wacky shit that somebody thinks is very funny - paragraph keeps doing that shit - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - assert_equal(1, doc.children.length) - end - - def test_begins_at_first_sight_of_four_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - text - - This is an indent block - This is a paragraph - DOC - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0]) - assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2]) - end - - def test_interrupts_all_other_blocks - doc = ::RBMark::DOM::Document.parse(<<~DOC) - # Heading - foo - Heading - ------ - foo - ---- - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[2]) - assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[3]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4]) - end - - def test_check_blank_lines_contents - skip # TODO: PLEASE I FUCKING BEG YOU IMPLEMENT THIS - end - - def test_check_contents_trailing_spaces - skip # TODO: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa - end -end diff --git a/test/test_setext_headers.rb b/test/test_setext_headers.rb deleted file mode 100644 index 2d1878b..0000000 --- a/test/test_setext_headers.rb +++ /dev/null @@ -1,147 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test Setext Heading parsing compliance with CommonMark v0.31.2 -class TestSetextHeadings < Minitest::Test - def test_simple_heading1 - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo *bar* - ========= - - Foo *bar* - --------- - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - end - - def test_multiline_span - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo *bar - baz* - ==== - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - assert_equal(1, doc.children.length) - end - - def test_span_inlining - doc = ::RBMark::DOM::Document.parse(<<~DOC) - start - - Foo *bar - baz - ==== - DOC - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - skip - end - - def test_line_length - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - ------------------------------ - - Foo - = - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - end - - def test_content_indent - skip # TODO: implement this - end - - def test_marker_indent - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - ------------------------------ - - Foo - = - - Foo - = - - Foo - = - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[3]) - end - - def test_no_internal_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - -- - - - - Foo - == = - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_block_level_priority - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ` Foo - ------ - ` - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1]) - end - - def test_paragraph_breaking_only - doc = ::RBMark::DOM::Document.parse(<<~DOC) - > text - ------ - DOC - skip # TODO: implement this - end - - def test_paragraph_breaking_only_lazy_continuation - doc = ::RBMark::DOM::Document.parse(<<~DOC) - > text - continuation line - ------ - DOC - skip # TODO: implement this - end - - def test_headings_back_to_back - doc = ::RBMark::DOM::Document.parse(<<~DOC) - heading1 - ------ - heading2 - ------ - heading3 - ====== - DOC - assert_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - assert_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - assert_instance_of(::RBMark::DOM::Heading1, doc.children[2]) - end - - def test_no_empty_headings - doc = ::RBMark::DOM::Document.parse(<<~DOC) - - ====== - DOC - refute_instance_of(::RBMark::DOM::Heading1, doc.children[0]) - end - - def test_thematic_breaks - doc = ::RBMark::DOM::Document.parse(<<~DOC) - ---- - ---- - DOC - refute_instance_of(::RBMark::DOM::Heading2, doc.children[0]) - refute_instance_of(::RBMark::DOM::Heading2, doc.children[1]) - end -end diff --git a/test/test_thematic_breaks.rb b/test/test_thematic_breaks.rb deleted file mode 100644 index 1ace973..0000000 --- a/test/test_thematic_breaks.rb +++ /dev/null @@ -1,127 +0,0 @@ -# frozen_string_literal: true - -require 'minitest/autorun' -require_relative '../lib/rbmark' - -# Test thematic break parsing compliance with CommonMark v0.31.2 -class TestThematicBreaks < Minitest::Test - def test_simple - doc = ::RBMark::DOM::Document.parse(<<~DOC) - --- - *** - ___ - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) - end - - def test_simple_invalid - doc = ::RBMark::DOM::Document.parse(<<~DOC) - +++ - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - doc = ::RBMark::DOM::Document.parse(<<~DOC) - === - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - end - - def test_simple_less_characters - doc = ::RBMark::DOM::Document.parse(<<~DOC) - -- - - ** - - __ - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) - end - - def test_indentation - doc = ::RBMark::DOM::Document.parse(<<~DOC) - *** - *** - *** - *** - *** - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4]) - end - - def test_indentation_mixed_classes - doc = ::RBMark::DOM::Document.parse(<<~DOC) - Foo - - *** - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children.last) - end - - def test_line_length - doc = ::RBMark::DOM::Document.parse(<<~DOC) - _________________________________ - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - end - - def test_mixed_spaces - doc = ::RBMark::DOM::Document.parse(<<~DOC) - - - - - - ** * ** * ** * ** - - - - - - - - - - - - - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) - end - - def test_mixed_characters - doc = ::RBMark::DOM::Document.parse(<<~DOC) - _ _ _ _ a - - a------ - - ---a--- - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2]) - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3]) - end - - def test_mixed_markets - doc = ::RBMark::DOM::Document.parse(<<~DOC) - *-* - DOC - refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0]) - end - - def test_interrupt_list - doc = ::RBMark::DOM::Document.parse(<<~DOC) - - foo - *** - - bar - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - end - - def test_interrupt_paragraph - doc = ::RBMark::DOM::Document.parse(<<~DOC) - foo - *** - bar - DOC - assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1]) - end -end diff --git a/view_structure.rb b/view_structure.rb new file mode 100644 index 0000000..079dd2f --- /dev/null +++ b/view_structure.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require_relative 'lib/blankshell.rb' + +structure = PointBlank::DOM::Document.parse(File.read(ARGV[0])) +def red(string) + "\033[31m#{string}\033[0m" +end +def yellow(string) + "\033[33m#{string}\033[0m" +end + +def prettyprint(doc, indent = 0) + closed = doc.properties[:closed] + puts "#{yellow(doc.class.name.gsub(/\w+::DOM::/,""))}#{red(closed ? "(c)" : "")}: #{doc.content.inspect}" + doc.children.each do |child| + print red("#{" " * indent} - ") + prettyprint(child, indent + 4) + end +end +prettyprint(structure)