diff --git a/classes b/classes new file mode 100644 index 0000000..9153047 --- /dev/null +++ b/classes @@ -0,0 +1,13 @@ +Bold [x} +Italics [x] +Underline [x] +Strikethrough [x] +CodeInline [x] +Link [x] +Image [x] +Headings [x] +CodeBlock [x] +QuoteBlock [x] +ULBlock [x] +OLBLock [x] +TableBlock [] diff --git a/document.rb b/document.rb new file mode 100644 index 0000000..747e0ae --- /dev/null +++ b/document.rb @@ -0,0 +1,756 @@ +# frozen_string_literal: true + +module RBMark + # Parser units + # Parsers are divided into three categories: + # - Slicers - these parsers read the whole text of an element and slice it into chunks digestible by other parsers + # - ChunkParsers - these parsers transform chunks of text into a single DOM unit + # - InlineParsers - these parsers are called directly by the slicer to check whether a certain element matches needed criteria + module Parsers + # Abstract slicer class + class Slicer + # @param parent [::RBMark::DOM::DOMObject] + def initialize + @chunk_parsers = [] + end + + attr_accessor :chunk_parsers + + private + + def parse_chunk(text) + @chunk_parsers.each do |parser| + unless parser.is_a? ChunkParser + raise StandardError, 'not a ChunkParser' + end + + next unless parser.match?(text) + + return parser.match(text) + end + nil + end + end + + # Abstract inline parser class + class InlineParser + # Test if piece matches bold syntax + # @param text [String] + # @return [Boolean] + def match?(text) + text.match?(@match_exp) + end + + # Construct a new object from text + # @param text [String] + # @return [Object] + def match(text) + @class.parse(text) + end + + attr_reader :class, :match_exp + end + + # Abstract chunk parser class + class ChunkParser + # Stub for match method + def match(text) + element = ::RBMark::DOM::Text.new + element.content = text + element + end + + # Stub for match? method + def match?(_text) + true + end + end + + # Slices text into paragraphs and feeds slices to chunk parsers + class RootSlicer < Slicer + # Parse text into chunks and feed each to the chain + # @param text [String] + def parse(text) + output = text.split(/(?:\r\r|\n\n|\r\n\r\n|\Z)/) + .reject { |x| x.match(/\A\s*\Z/) } + .map do |block| + parse_chunk(block) + end + merge_list_indents(output) + end + + private + + def merge_list_indents(chunks) + last_list = nil + delete_deferred = [] + chunks.each_with_index do |chunk, index| + if !last_list and [::RBMark::DOM::ULBlock, + ::RBMark::DOM::OLBlock].include? chunk.class + last_list = chunk + elsif last_list and mergeable?(last_list, chunk) + merge(last_list, chunk) + delete_deferred.prepend(index) + else + last_list = nil + end + end + delete_deferred.each { |i| chunks.delete_at(i) } + chunks + end + + def mergeable?(last_list, chunk) + if chunk.is_a? ::RBMark::DOM::IndentBlock or + (chunk.is_a? ::RBMark::DOM::ULBlock and + last_list.is_a? ::RBMark::DOM::ULBlock) or + (chunk.is_a? ::RBMark::DOM::OLBlock and + last_list.is_a? ::RBMark::DOM::OLBlock and + last_list.properties["num"] > chunk.properties["num"]) + true + else + false + end + end + + def merge(last_list, chunk) + if chunk.is_a? ::RBMark::DOM::IndentBlock + last_list.children.last.children.append(*chunk.children) + else + last_list.children.append(*chunk.children) + end + end + end + + # Inline text slicer (slices based on the start and end symbols) + class InlineSlicer < Slicer + # Parse slices + # @param text [String] + def parse(text) + parts = [] + index = prepare_markers + until text.empty? + before, part, text = slice(text) + parts.append(::RBMark::DOM::Text.parse(before)) unless before.empty? + next unless part + + element = index.fetch(part.regexp, + ::RBMark::Parsers::TextInlineParser.new) + .match(part[0]) + parts.append(element) + end + parts + end + + private + + # Prepare markers from chunk_parsers + # @return [Hash] + def prepare_markers + index = {} + @markers = @chunk_parsers.map do |parser| + index[parser.match_exp] = parser + parser.match_exp + end + index + end + + # Get the next slice of a text based on markers + # @param text [String] + # @return [Array<(String,MatchData,String)>] + def slice(text) + first_tag = @markers.map { |x| text.match(x) } + .reject(&:nil?) + .min_by { |x| x.offset(0)[0] } + return text, nil, "" unless first_tag + + [first_tag.pre_match, first_tag, first_tag.post_match] + end + end + + # Slicer for unordered lists + class UnorderedSlicer < Slicer + # Parse list elements + def parse(text) + output = [] + buffer = "" + text.lines.each do |line| + if line.start_with? "- " and !buffer.empty? + output.append(make_element(buffer)) + buffer = "" + end + buffer += line[2..] + end + output.append(make_element(buffer)) unless buffer.empty? + output + end + + private + + def make_element(text) + ::RBMark::DOM::ListElement.parse(text) + end + end + + # Slicer for unordered lists + class OrderedSlicer < Slicer + # rubocop:disable Metrics/AbcSize + + # Parse list elements + def parse(text) + output = [] + buffer = "" + indent = text.match(/\A\d+\. /)[0].length + num = text.match(/\A(\d+)\. /)[1] + text.lines.each do |line| + if line.start_with?(/\d+\. /) and !buffer.empty? + output.append(make_element(buffer, num)) + buffer = "" + indent = line.match(/\A\d+\. /)[0].length + num = line.match(/\A(\d+)\. /)[1] + end + buffer += line[indent..] + end + output.append(make_element(buffer, num)) unless buffer.empty? + output + end + + # rubocop:enable Metrics/AbcSize + private + + def make_element(text, num) + element = ::RBMark::DOM::ListElement.parse(text) + element.property num: num.to_i + element + end + end + + # Quote block parser + class QuoteChunkParser < ChunkParser + # Tests for chunk being a block quote + # @param text [String] + # @return [Boolean] + def match?(text) + text.lines.map do |x| + x.match?(/\A\s*>(?:\s[^\n\r]+|)\Z/m) + end.all?(true) + end + + # Transforms text chunk into a block quote + # @param text + # @return [::RBMark::DOM::QuoteBlock] + def match(text) + text = text.lines.map do |x| + x.match(/\A\s*>(\s[^\n\r]+|)\Z/m)[1].to_s[1..] + end.join("\n") + ::RBMark::DOM::QuoteBlock.parse(text) + end + end + + # Paragraph block + class ParagraphChunkParser < ChunkParser + # Acts as a fallback for the basic paragraph chunk + # @param text [String] + # @return [Boolean] + def match?(_text) + true + end + + # Creates a new paragraph with the given text + def match(text) + ::RBMark::DOM::Paragraph.parse(text) + end + end + + # Code block + class CodeChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + text.match?(/\A```\w+[\r\n]{1,2}.*[\r\n]{1,2}```\Z/m) + end + + # Create a new element + def match(text) + lang, code = text.match( + /\A```(\w+)[\r\n]{1,2}(.*)[\r\n]{1,2}```\Z/m + )[1, 2] + element = ::RBMark::DOM::CodeBlock.new + element.property language: lang + text = ::RBMark::DOM::Text.new + text.content = code + element.append(text) + element + end + end + + # Heading chunk parser + class HeadingChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + text.match?(/\A\#{1,4}\s/) + end + + # Create a new element + def match(text) + case text.match(/\A\#{1,4}\s/)[0] + when "# " then ::RBMark::DOM::Heading1.parse(text[2..]) + when "## " then ::RBMark::DOM::Heading2.parse(text[3..]) + when "### " then ::RBMark::DOM::Heading3.parse(text[4..]) + when "#### " then ::RBMark::DOM::Heading4.parse(text[5..]) + end + end + end + + # Unordered list parser (chunk) + class UnorderedChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + return false unless text.start_with? "- " + + text.lines.map do |line| + line.match?(/\A(?:- .*| .*| )\Z/) + end.all?(true) + end + + # Create a new element + def match(text) + ::RBMark::DOM::ULBlock.parse(text) + end + end + + # Ordered list parser (chunk) + class OrderedChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + return false unless text.start_with?(/\d+\. /) + + indent = 0 + text.lines.each do |line| + if line.start_with?(/\d+\. /) + indent = line.match(/\A\d+\. /)[0].length + elsif line.start_with?(/\s+/) + return false if line.match(/\A\s+/)[0].length < indent + else + return false + end + end + true + end + + # Create a new element + def match(text) + ::RBMark::DOM::OLBlock.parse(text) + end + end + + # Indented block parser + class IndentChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + text.lines.map do |x| + x.start_with? " " or x.start_with? "\t" + end.all?(true) + end + + # Create a new element + def match(text) + text = text.lines.map { |x| x.match(/\A(?: {4}|\t)(.*)\Z/)[1] } + .join("\n") + ::RBMark::DOM::IndentBlock.parse(text) + end + end + + # Horizontal Rule block parser + class HRChunkParser < ChunkParser + # Check if a block matches the given parser rule + # @param text [String] + # @return [Boolean] + def match?(text) + text.match?(/\A-{3,}\Z/) + end + + # Create a new element + def match(text) + element = ::RBMark::DOM::HorizontalRule.new() + element.content = "" + element + end + end + + # Stub text parser + class TextInlineParser < InlineParser + # Stub method for creating new Text object + def match(text) + instance = ::RBMark::DOM::Text.new + instance.content = text + instance + end + end + + # Bold text + class BoldInlineParser < InlineParser + def initialize + super + @match_exp = /(?") - # Inline code (discord style) - .gsub(/(?#{code.gsub /[*`~_!\[]/,"\\\\\\0"}" - } - # Inline code (Markdown style) - .gsub(/(?#{code.gsub /[*`~_!\[]/,"\\\\\\0"}" - } - # Bold-italics - .gsub(/(?\\1") - # Bold - .gsub(/(?\\1") - # Italics - .gsub(/(?\\1") - # Strikethrough - .gsub(/(?\\1") - # Underline - .gsub(/(?\\1") - # Image - .gsub(/(?") - # Link - .gsub(/(?\\1") - super - end - end - ## Translator for linear leftmost tags. - # Leftmost linear tags open on the leftmost end of the string, and close once the line ends. These tags do not need to be explicitly closed. - class LeftmostTagTranslator < AbstractTranslator - def initialize(text) - @input = text - @output = text - super() - end - def to_html - # Headers - @output = @input.split("\n").map do |x| - x.gsub(/^(?"+content+"" - }.gsub(/^\-{3,}/,"
#{code.gsub /[|#*`~_!\[]/,"\\\\\\0"}
"
- }
- super()
- end
- end
- ## Translator for quotes in Markdown.
- # These deserve their own place in hell. As if the "yaml with triangle brackets instead of spaces" syntax wasn't horrible enough, each quote is its own markdown context.
- class QuoteTranslator < AbstractTranslator
- def initialize(text)
- if text.is_a? Array then
- @lines = text
- elsif text.is_a? String then
- @lines = text.split("\n")
- end
- @output = text
- super()
- end
- def input= (v)
- @lines = v.split("\n")
- @output = v
- end
- def input
- @lines.join("\n")
- end
- def to_html
- stack = []
- range = []
- @lines.each_with_index { |x,index|
- if x.match /^\s*> ?/ then
- range[0] = index if not range[0]
- range[1] = index
- else
- stack.append(range[0]..range[1]) if range[0] and range[1]
- range = []
- end
- }
- stack.append(range[0]..range[1]) if range[0] and range[1]
- stack.reverse.each { |r|
- @lines[r.begin] = "\n"+@lines[r.begin] - @lines[r.end] = @lines[r.end]+"\n" - @lines[r] = @lines[r].map { |line| - line.sub /^(\s*)> ?/,"\\1 " - } - @lines[r] = QuoteTranslator.new(@lines[r]).to_html - } - @output = @lines.join("\n") - super - end - end - - ## Table parser - # translates tables from a format in markdown to an html table - class TableTranslator < AbstractTranslator - def initialize(text) - @input = text - @output = text - super() - end - def to_html - lines = @output.split("\n") - table_testline = -1 - table_start = -1 - table_column_count = 0 - tables = [] - cur_table = [] - lines.each_with_index { |line,index| - if (table_start != -1) and (line.match /^\s*\|([^\|]*\|){#{table_column_count-1}}$/) then - if (table_testline == -1) then - if (line.match /^\s*\|(\-*\|){#{table_column_count-1}}$/) then - table_testline = 1 - else - table_start = -1 - cur_table = [] - end - else - cur_table.push (line.split("|").filter_map { |x| x.strip if x.match /\S+/ }) - end - elsif (table_start != -1) then - obj = {table: cur_table, start: table_start, end: index} - tables.push(obj) - table_start = -1 - cur_table = [] - table_testline = -1 - table_column_count = 0 - end - if (table_start == -1) and (line.start_with? /\s*\|/ ) and (line.match /^\s*\|.*\|/) then - table_start = index - table_column_count = line.count "|" - cur_table.push (line.split("|").filter_map { |x| x.strip if x.match /\S+/ }) - end - } - if cur_table != [] then - obj = {table: cur_table, start:table_start, end: lines.count-1} - tables.push(obj) - end - tables.reverse.each { |x| - lines[x[:start]..x[:end]] = (x[:table].map do |a2d| - (a2d.map { |x| (x.start_with? "#") ? "
Quote begins
->
-> yea
-> # header btw
-> > nextlevel quote
-> > more quote
-> > those are quotes
-> > yes
-> > > third level quote
-> > > yes
-> > second level again
-> > > third level again
-> > second level oioioi
-> >
-> > > third
-> > >
-> > >
-> > >
->
->
->
-> fin
-CODE
- ).to_html
-
-puts Markdown::CodeBlockTranslator.new(< Here's a bunch of shit i guess lmao idk
-```markdown
-test
-test
-test
-|1|2|3|
-|-|-|-|
-|a|b|c|
-
-| uneven rows | test | yes |
-|-|-|-|
-| sosiska | dinozavri | suda pihaem |
-| sosiska 2 | vitalya 2 | brat 2 |
-*** test ***
-piss
-cock
-__cock__
-# hi
-```
-> ok
-> here i go pissing
-> ***time to take a piss***
-> > pissing
-> > "what the hell are you doing"
-> > i'm taking a pieeees
-> > "why areyou not jomping at me thats what yourshupposed to do
-> > I might do it focking later
-> > ok
-> # bug
-> __cum__
-__mashup__
-
-| # sosiska | sosiska | suda pihaem |
-|-|-|-|
-| # 2 | chuvak ya ukral tvayu sardelku ))0)))0))))))) | __blya ((9((9((9)__ |
-| # azazaz lalka sasI | test | test |
-TEXT
- )+Markdown::QuoteTranslator+Markdown::LeftmostTagTranslator+Markdown::LinearTagTranslator+Markdown::TableTranslator+Markdown::BackslashTranslator)
- .to_html
-write = File.new("/tmp/test.html","w")
-write.write(test)
-write.close