diff --git a/document.rb b/document.rb index 747e0ae..c0418cc 100644 --- a/document.rb +++ b/document.rb @@ -1,511 +1,55 @@ # frozen_string_literal: true module RBMark - # Parser units - # Parsers are divided into three categories: - # - Slicers - these parsers read the whole text of an element and slice it into chunks digestible by other parsers - # - ChunkParsers - these parsers transform chunks of text into a single DOM unit - # - InlineParsers - these parsers are called directly by the slicer to check whether a certain element matches needed criteria - module Parsers - # Abstract slicer class - class Slicer - # @param parent [::RBMark::DOM::DOMObject] - def initialize - @chunk_parsers = [] - end - - attr_accessor :chunk_parsers - - private - - def parse_chunk(text) - @chunk_parsers.each do |parser| - unless parser.is_a? ChunkParser - raise StandardError, 'not a ChunkParser' - end - - next unless parser.match?(text) - - return parser.match(text) - end - nil - end + # Parser class + class Parser + def initialize(variants, default) + @default = default + @variants = variants + @markers = @variants.map { |x| [x.begin, x] }.to_h end - # Abstract inline parser class - class InlineParser - # Test if piece matches bold syntax - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(@match_exp) - end + # Parse text using the given variants + # @param text [String] + # @return [Array<::RBMark::DOM::DOMObject>] + def parse(text) + chunks = [] + until text.nil? or text.empty? + before, chunk, text = get_chunk(text) + chunks.append(@default.parse(before)) unless before.empty? + next unless chunk - # Construct a new object from text - # @param text [String] - # @return [Object] - def match(text) - @class.parse(text) + chunks.append(chunk) end - - attr_reader :class, :match_exp + chunks.flatten end - # Abstract chunk parser class - class ChunkParser - # Stub for match method - def match(text) - element = ::RBMark::DOM::Text.new - element.content = text - element - end + private - # Stub for match? method - def match?(_text) - true + def get_chunk(text) + element, match = get_element(text) + if element + chunk, after = finalize_element(text[match.offset(0)[0]..], element) + return [match.pre_match, chunk, after] if chunk + + return [match.pre_match, nil, match.post_match] end + [text, nil, nil] end - # Slices text into paragraphs and feeds slices to chunk parsers - class RootSlicer < Slicer - # Parse text into chunks and feed each to the chain - # @param text [String] - def parse(text) - output = text.split(/(?:\r\r|\n\n|\r\n\r\n|\Z)/) - .reject { |x| x.match(/\A\s*\Z/) } - .map do |block| - parse_chunk(block) - end - merge_list_indents(output) - end - - private - - def merge_list_indents(chunks) - last_list = nil - delete_deferred = [] - chunks.each_with_index do |chunk, index| - if !last_list and [::RBMark::DOM::ULBlock, - ::RBMark::DOM::OLBlock].include? chunk.class - last_list = chunk - elsif last_list and mergeable?(last_list, chunk) - merge(last_list, chunk) - delete_deferred.prepend(index) - else - last_list = nil - end - end - delete_deferred.each { |i| chunks.delete_at(i) } - chunks - end - - def mergeable?(last_list, chunk) - if chunk.is_a? ::RBMark::DOM::IndentBlock or - (chunk.is_a? ::RBMark::DOM::ULBlock and - last_list.is_a? ::RBMark::DOM::ULBlock) or - (chunk.is_a? ::RBMark::DOM::OLBlock and - last_list.is_a? ::RBMark::DOM::OLBlock and - last_list.properties["num"] > chunk.properties["num"]) - true - else - false - end - end - - def merge(last_list, chunk) - if chunk.is_a? ::RBMark::DOM::IndentBlock - last_list.children.last.children.append(*chunk.children) - else - last_list.children.append(*chunk.children) - end - end + def get_element(text) + @markers.filter_map do |marker| + [marker[1], text.match(marker[0])] if text.match(marker[0]) + end.min_by { |x| x[1].offset(0)[0] } end - # Inline text slicer (slices based on the start and end symbols) - class InlineSlicer < Slicer - # Parse slices - # @param text [String] - def parse(text) - parts = [] - index = prepare_markers - until text.empty? - before, part, text = slice(text) - parts.append(::RBMark::DOM::Text.parse(before)) unless before.empty? - next unless part + def finalize_element(text, element) + match = text.match(element.end) + return nil, nil unless match - element = index.fetch(part.regexp, - ::RBMark::Parsers::TextInlineParser.new) - .match(part[0]) - parts.append(element) - end - parts - end - - private - - # Prepare markers from chunk_parsers - # @return [Hash] - def prepare_markers - index = {} - @markers = @chunk_parsers.map do |parser| - index[parser.match_exp] = parser - parser.match_exp - end - index - end - - # Get the next slice of a text based on markers - # @param text [String] - # @return [Array<(String,MatchData,String)>] - def slice(text) - first_tag = @markers.map { |x| text.match(x) } - .reject(&:nil?) - .min_by { |x| x.offset(0)[0] } - return text, nil, "" unless first_tag - - [first_tag.pre_match, first_tag, first_tag.post_match] - end - end - - # Slicer for unordered lists - class UnorderedSlicer < Slicer - # Parse list elements - def parse(text) - output = [] - buffer = "" - text.lines.each do |line| - if line.start_with? "- " and !buffer.empty? - output.append(make_element(buffer)) - buffer = "" - end - buffer += line[2..] - end - output.append(make_element(buffer)) unless buffer.empty? - output - end - - private - - def make_element(text) - ::RBMark::DOM::ListElement.parse(text) - end - end - - # Slicer for unordered lists - class OrderedSlicer < Slicer - # rubocop:disable Metrics/AbcSize - - # Parse list elements - def parse(text) - output = [] - buffer = "" - indent = text.match(/\A\d+\. /)[0].length - num = text.match(/\A(\d+)\. /)[1] - text.lines.each do |line| - if line.start_with?(/\d+\. /) and !buffer.empty? - output.append(make_element(buffer, num)) - buffer = "" - indent = line.match(/\A\d+\. /)[0].length - num = line.match(/\A(\d+)\. /)[1] - end - buffer += line[indent..] - end - output.append(make_element(buffer, num)) unless buffer.empty? - output - end - - # rubocop:enable Metrics/AbcSize - private - - def make_element(text, num) - element = ::RBMark::DOM::ListElement.parse(text) - element.property num: num.to_i - element - end - end - - # Quote block parser - class QuoteChunkParser < ChunkParser - # Tests for chunk being a block quote - # @param text [String] - # @return [Boolean] - def match?(text) - text.lines.map do |x| - x.match?(/\A\s*>(?:\s[^\n\r]+|)\Z/m) - end.all?(true) - end - - # Transforms text chunk into a block quote - # @param text - # @return [::RBMark::DOM::QuoteBlock] - def match(text) - text = text.lines.map do |x| - x.match(/\A\s*>(\s[^\n\r]+|)\Z/m)[1].to_s[1..] - end.join("\n") - ::RBMark::DOM::QuoteBlock.parse(text) - end - end - - # Paragraph block - class ParagraphChunkParser < ChunkParser - # Acts as a fallback for the basic paragraph chunk - # @param text [String] - # @return [Boolean] - def match?(_text) - true - end - - # Creates a new paragraph with the given text - def match(text) - ::RBMark::DOM::Paragraph.parse(text) - end - end - - # Code block - class CodeChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(/\A```\w+[\r\n]{1,2}.*[\r\n]{1,2}```\Z/m) - end - - # Create a new element - def match(text) - lang, code = text.match( - /\A```(\w+)[\r\n]{1,2}(.*)[\r\n]{1,2}```\Z/m - )[1, 2] - element = ::RBMark::DOM::CodeBlock.new - element.property language: lang - text = ::RBMark::DOM::Text.new - text.content = code - element.append(text) - element - end - end - - # Heading chunk parser - class HeadingChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(/\A\#{1,4}\s/) - end - - # Create a new element - def match(text) - case text.match(/\A\#{1,4}\s/)[0] - when "# " then ::RBMark::DOM::Heading1.parse(text[2..]) - when "## " then ::RBMark::DOM::Heading2.parse(text[3..]) - when "### " then ::RBMark::DOM::Heading3.parse(text[4..]) - when "#### " then ::RBMark::DOM::Heading4.parse(text[5..]) - end - end - end - - # Unordered list parser (chunk) - class UnorderedChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - return false unless text.start_with? "- " - - text.lines.map do |line| - line.match?(/\A(?:- .*| .*| )\Z/) - end.all?(true) - end - - # Create a new element - def match(text) - ::RBMark::DOM::ULBlock.parse(text) - end - end - - # Ordered list parser (chunk) - class OrderedChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - return false unless text.start_with?(/\d+\. /) - - indent = 0 - text.lines.each do |line| - if line.start_with?(/\d+\. /) - indent = line.match(/\A\d+\. /)[0].length - elsif line.start_with?(/\s+/) - return false if line.match(/\A\s+/)[0].length < indent - else - return false - end - end - true - end - - # Create a new element - def match(text) - ::RBMark::DOM::OLBlock.parse(text) - end - end - - # Indented block parser - class IndentChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.lines.map do |x| - x.start_with? " " or x.start_with? "\t" - end.all?(true) - end - - # Create a new element - def match(text) - text = text.lines.map { |x| x.match(/\A(?: {4}|\t)(.*)\Z/)[1] } - .join("\n") - ::RBMark::DOM::IndentBlock.parse(text) - end - end - - # Horizontal Rule block parser - class HRChunkParser < ChunkParser - # Check if a block matches the given parser rule - # @param text [String] - # @return [Boolean] - def match?(text) - text.match?(/\A-{3,}\Z/) - end - - # Create a new element - def match(text) - element = ::RBMark::DOM::HorizontalRule.new() - element.content = "" - element - end - end - - # Stub text parser - class TextInlineParser < InlineParser - # Stub method for creating new Text object - def match(text) - instance = ::RBMark::DOM::Text.new - instance.content = text - instance - end - end - - # Bold text - class BoldInlineParser < InlineParser - def initialize - super - @match_exp = /(?] + def self.parse(text) + text.split("\n\n").map do |chunk| + super(chunk) + end + end end # Heading level 1 class Heading1 < InlineFormattable + self.begin = /^# / + self.end = /#?$/ + + # (see ::RBMark::DOM::DOMObject.parse) + def self.parse(text) + super(text.gsub(self.begin, '').gsub(self.end, '')) + end end # Heading level 2 class Heading2 < Heading1 + self.begin = /^## / + self.end = /(?:##)?$/ end # Heading level 3 class Heading3 < Heading1 + self.begin = /^### / + self.end = /(?:###)?$/ end # Heading level 4 class Heading4 < Heading1 + self.begin = /^#### / + self.end = /(?:####)?$/ + end + + # Heading level 5 + class Heading5 < Heading1 + self.begin = /^##### / + self.end = /(?:#####)?$/ + end + + # Heading level 6 + class Heading6 < Heading1 + self.begin = /^###### / + self.end = /(?:######)?$/ + end + + # Alternative heading 1 + class AltHeading1 < InlineFormattable + self.begin = /^[^\n]+\n={3,}$/m + self.end = /={3,}$/ + self.alt_for = ::RBMark::DOM::Heading1 + + # (see ::RBMark::DOM::DOMObject.parse) + def self.parse(text) + super(text.match(/\A[^\n]+$/)[0].strip) + end + end + + # Alternative heading 2 + class AltHeading2 < InlineFormattable + self.begin = /^[^\n]+\n-{3,}$/m + self.end = /-{3,}$/ + self.alt_for = ::RBMark::DOM::Heading2 + + # (see ::RBMark::DOM::DOMObject.parse) + def self.parse(text) + super(text.match(/\A[^\n]+$/)[0].strip) + end end # Preformatted code block class CodeBlock < DOMObject + self.begin = /^```[^\n]*$/ + self.end = /^```[^\n]*\n.*?\n```$/m + + # Stub parser for block text element + # @param text [String] + # @return [self] + def self.parse(text) + instance = new + language = text.match(/\A```([^\n]*)/)[1].strip + element = ::RBMark::DOM::Text.new + element.content = text.lines[1..-2].join('').rstrip + instance.append(element) + instance.property language: language + instance + end end # Quote block class QuoteBlock < Document + self.begin = /^> \S/ + self.end = /(?:^(?!>)|\Z)/ + + # stub + def self.parse(text) + super(text.lines.map { |x| x[2..] }.join('')) + end end # Table class TableBlock < DOMObject end - # Unordered list - class ULBlock < DOMObject - self.slicer = ::RBMark::Parsers::UnorderedSlicer - end - - # Ordered list block - class OLBlock < DOMObject - self.slicer = ::RBMark::Parsers::OrderedSlicer - end - - # Indent block - class IndentBlock < Document - end - # List element class ListElement < Document end - # Horizontal rule - class HorizontalRule < DOMObject + # Unordered list + class ULBlock < DOMObject + self.begin = /^- +\S+/ + self.end = /(?:^(?!- +\S+| )|\Z)/ + + # (see RBMark::DOM::DOMObject.parse) + def self.parse(text) + block = [] + instance = new + text.lines.each do |line| + if line.start_with?("- ") + unless block.empty? + instance.append(::RBMark::DOM::ListElement.parse(block.join(''))) + end + block = [line[2..]] + else + block.append(line[2..]) + end + end + instance.append(::RBMark::DOM::ListElement.parse(block.join(''))) + instance + end end - # Paragraph in a document (separated by 2 newlines) - class Paragraph < InlineFormattable + # Ordered list block + class OLBlock < DOMObject + self.begin = /^\d+\. +\S+/ + self.end = /(?:^(?!\d+\. +\S+| {4})|\Z)/ + + # (see RBMark::DOM::DOMObject.parse) + def self.parse(text) + block = [] + instance = new + counter = nil + text.lines.each do |line| + if line.start_with?(/^\d+\. /) + unless block.empty? + instance.append(element(block.join(''), counter)) + end + counter = line.match(/^(\d+)\. /)[1] + block = [line.gsub(/^(?:\d+\. | {4})/, '')] + else + block.append(line.gsub(/^(?:\d+\. | {4})/, '')) + end + end + instance.append(element(block.join(''), counter)) + instance + end + + # Construct a new ListElement + def self.element(text, counter) + puts text + instance = ::RBMark::DOM::ListElement.parse(text) + instance.property number: counter + instance + end + end + + # Indent block + class IndentBlock < Document + self.begin = /^ {4}/ + self.end = /(?:^(?! {4})|\Z)/ + + # (see RBMark::DOM::DOMObject.parse) + def self.parse(text) + super(text.lines.map { |x| x[4..] }.join('')) + end + end + + # Horizontal rule + class HorizontalRule < DOMObject + self.begin = /^-{3,}$/ + self.end = /$/ + + # stub for HR + def self.parse(_text) + new + end + + # Stub for HR length + # @return [Integer] + def length + 1 + end + end + + InlineFormattable.class_exec do + default ::RBMark::DOM::Text + variant ::RBMark::DOM::InlineBold + variant ::RBMark::DOM::InlineItalics + variant ::RBMark::DOM::InlineUnder + variant ::RBMark::DOM::InlineImage + variant ::RBMark::DOM::InlineLink + variant ::RBMark::DOM::InlinePre + variant ::RBMark::DOM::InlineStrike + variant ::RBMark::DOM::InlineBreak + end + + Document.class_exec do + default ::RBMark::DOM::Paragraph + variant ::RBMark::DOM::Heading1 + variant ::RBMark::DOM::Heading2 + variant ::RBMark::DOM::Heading3 + variant ::RBMark::DOM::Heading4 + variant ::RBMark::DOM::Heading5 + variant ::RBMark::DOM::Heading6 + variant ::RBMark::DOM::AltHeading1 + variant ::RBMark::DOM::AltHeading2 + variant ::RBMark::DOM::QuoteBlock + variant ::RBMark::DOM::CodeBlock + variant ::RBMark::DOM::ULBlock + variant ::RBMark::DOM::OLBlock + variant ::RBMark::DOM::IndentBlock + variant ::RBMark::DOM::HorizontalRule end end end diff --git a/mdpp.rb b/mdpp.rb index 6b789b2..93d88e1 100644 --- a/mdpp.rb +++ b/mdpp.rb @@ -89,7 +89,7 @@ module MDPP line = word next end - line = [line, word].join(' ') + line = [line, word].join(line.end_with?("\n") ? '' : ' ') end output.append(line.lstrip) output.join("\n") diff --git a/test.md b/test.md index 5a53f4f..efb7c14 100644 --- a/test.md +++ b/test.md @@ -6,16 +6,48 @@ > Block quote **bold** and *italics* test > Block quote **bold *italics* mix** test +> Nested block quote test +> > with a very stupid secondary quote syntax +this should end the block quote btw + ## Header level 2 -[link](http://example.com) +also header level 2 +--- + +also header level 1 +=== + +not a header level2 +---asdlkashdlkasjd + +not a header level1 +===asdajlsdkjlsd + + not a header level2 + --- + + not a header level1 + === +[link](http://example.com) ![image alt text](http://example.com) -```plaintext +``` plaintext code *block* eat my shit ``` +> It is also of importance to note that this code is a hello world program, +> written entirely in C++. +> +> ```cpp +> int main() { +> cout << "Hello **world!**"; +> } +> ``` + +> theoretical failure test case +> **bold** **with space** paragraph with ``inline code block`` - Unordered list element 1 @@ -24,6 +56,11 @@ paragraph with ``inline code block`` 1. Ordered list element 1 2. Ordered list element 2 +1. [Link](https://ass.com) + 2. [Lv2](https://ass.com) + 3. [Lv2](https://anus.com) +4. shit + This is not a list - because it continues the paragraph - this is how it should be, like it or not