Compare commits

..

No commits in common. "a022377f08680f09fd53cc10685038b8a17abe97" and "master" have entirely different histories.

15 changed files with 2000 additions and 357 deletions

View File

@ -1,22 +0,0 @@
---
include:
- "**/*.rb"
exclude:
- spec/**/*
- test/**/*
- vendor/**/*
- ".bundle/**/*"
require: ["minitest"]
domains: []
reporters:
- rubocop
- require_not_found
formatter:
rubocop:
cops: safe
except: []
only: []
extra_args: []
require_paths: []
plugins: []
max_files: 5000

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# rubymark
Minimalistic modular markdown parser in Ruby

479
bin/mdpp Executable file
View File

@ -0,0 +1,479 @@
#!/usr/bin/ruby
# frozen_string_literal: true
require 'optparse'
require 'rbmark'
require 'io/console'
require 'io/console/size'
module MDPP
# Module for managing terminal output
module TextManager
# ANSI SGR escape code for bg color
# @param text [String]
# @param properties [Hash]
# @return [String]
def bg(text, properties)
color = properties['bg']
if color.is_a? Integer
"\e[48;5;#{color}m#{text}\e[49m"
elsif color.is_a? String and color.match?(/\A#[A-Fa-f0-9]{6}\Z/)
vector = color.scan(/[A-Fa-f0-9]{2}/).map { |x| x.to_i(16) }
"\e[48;2;#{vector[0]};#{vector[1]};#{vector[2]}\e[49m"
else
Kernel.warn "WARNING: Invalid color - #{color}"
text
end
end
# ANSI SGR escape code for fg color
# @param text [String]
# @param properties [Hash]
# @return [String]
def fg(text, properties)
color = properties['fg']
if color.is_a? Integer
"\e[38;5;#{color}m#{text}\e[39m"
elsif color.is_a? String and color.match?(/\A#[A-Fa-f0-9]{6}\Z/)
vector = color.scan(/[A-Fa-f0-9]{2}/).map { |x| x.to_i(16) }
"\e[38;2;#{vector[0]};#{vector[1]};#{vector[2]}\e[39m"
else
Kernel.warn "WARNING: Invalid color - #{color}"
text
end
end
# ANSI SGR escape code for bold text
# @param text [String]
# @return [String]
def bold(text)
"\e[1m#{text}\e[22m"
end
# ANSI SGR escape code for italics text
# @param text [String]
# @return [String]
def italics(text)
"\e[3m#{text}\e[23m"
end
# ANSI SGR escape code for underline text
# @param text [String]
# @return [String]
def underline(text)
"\e[4m#{text}\e[24m"
end
# ANSI SGR escape code for strikethrough text
# @param text [String]
# @return [String]
def strikethrough(text)
"\e[9m#{text}\e[29m"
end
# Word wrapping algorithm
# @param text [String]
# @param width [Integer]
# @return [String]
def wordwrap(text, width)
words = text.split(/ +/)
output = []
line = ""
until words.empty?
word = words.shift
if word.length > width
words.prepend(word[width..])
word = word[..width - 1]
end
if line.length + word.length + 1 > width
output.append(line.lstrip)
line = word
next
end
line = [line, word].join(line.end_with?("\n") ? '' : ' ')
end
output.append(line.lstrip)
output.join("\n")
end
# Draw a screen-width box around text
# @param text [String]
# @param center_margins [Integer]
# @return [String]
def box(text)
size = IO.console.winsize[1] - 2
text = wordwrap(text, (size * 0.8).floor).lines.filter_map do |line|
"│#{line.strip.ljust(size)}│" unless line.empty?
end.join("\n")
<<~TEXT
╭#{'─' * size}╮
#{text}
╰#{'─' * size}╯
TEXT
end
# Draw text right-justified
def rjust(text)
size = IO.console.winsize[1]
wordwrap(text, (size * 0.8).floor).lines.filter_map do |line|
line.strip.rjust(size) unless line.empty?
end.join("\n")
end
# Draw text centered
def center(text)
size = IO.console.winsize[1]
wordwrap(text, (size * 0.8).floor).lines.filter_map do |line|
line.strip.center(size) unless line.empty?
end.join("\n")
end
# Underline the last line of the text piece
def underline_block(text)
textlines = text.lines
last = "".match(/()()()/)
textlines.each do |x|
current = x.match(/\A(\s*)(.+?)(\s*)\Z/)
last = current if current[2].length > last[2].length
end
ltxt = last[1]
ctxt = textlines.last.slice(last.offset(2)[0]..last.offset(2)[1] - 1)
rtxt = last[3]
textlines[-1] = [ltxt, underline(ctxt), rtxt].join('')
textlines.join("")
end
# Add extra newlines around the text
def extra_newlines(text)
size = IO.console.winsize[1]
textlines = text.lines
textlines.prepend("#{' ' * size}\n")
textlines.append("\n#{' ' * size}\n")
textlines.join("")
end
# Underline last line edge to edge
def underline_full_block(text)
textlines = text.lines
textlines[-1] = underline(textlines.last)
textlines.join("")
end
# Indent all lines
def indent(text, properties)
_indent(text, level: properties['level'])
end
# Indent all lines (inner)
def _indent(text, **_useless)
text.lines.map do |line|
" #{line}"
end.join("")
end
# Bulletpoints
def bullet(text, _number, properties)
level = properties['level']
"-#{_indent(text, level: level)[1..]}"
end
# Numbers
def numbered(text, number, properties)
level = properties['level']
"#{number}.#{_indent(text, level: level)[number.to_s.length + 1..]}"
end
# Sideline for quotes
def sideline(text)
text.lines.map do |line|
"│ #{line}"
end.join("")
end
# Long bracket for code blocks
def longbracket(text, properties)
textlines = text.lines
textlines = textlines.map do |line|
"│ #{line}"
end
textlines.prepend("┌ (#{properties['element'][:language]})\n")
textlines.append("\n└\n")
textlines.join("")
end
# Add text to bibliography
def bibliography(text, properties)
return "#{text}[#{properties['element'][:link]}]" if @options['nb']
@bibliography.append([text, properties['element'][:link]])
"#{text}[#{@bibliography.length + 1}]"
end
end
DEFAULT_STYLE = {
"RBMark::DOM::Paragraph" => {
"inline" => true,
"indent" => true
},
"RBMark::DOM::Text" => {
"inline" => true
},
"RBMark::DOM::Heading1" => {
"inline" => true,
"center" => true,
"bold" => true,
"extra_newlines" => true,
"underline_full_block" => true
},
"RBMark::DOM::Heading2" => {
"inline" => true,
"center" => true,
"underline_block" => true
},
"RBMark::DOM::Heading3" => {
"inline" => true,
"underline" => true,
"bold" => true,
"indent" => true
},
"RBMark::DOM::Heading4" => {
"inline" => true,
"underline" => true,
"indent" => true
},
"RBMark::DOM::InlineImage" => {
"bibliography" => true,
"inline" => true
},
"RBMark::DOM::InlineLink" => {
"bibliography" => true,
"inline" => true
},
"RBMark::DOM::InlinePre" => {
"inline" => true
},
"RBMark::DOM::InlineStrike" => {
"inline" => true,
"strikethrough" => true
},
"RBMark::DOM::InlineUnder" => {
"inline" => true,
"underline" => true
},
"RBMark::DOM::InlineItalics" => {
"inline" => true,
"italics" => true
},
"RBMark::DOM::InlineBold" => {
"inline" => true,
"bold" => true
},
"RBMark::DOM::QuoteBlock" => {
"sideline" => true
},
"RBMark::DOM::CodeBlock" => {
"longbracket" => true
},
"RBMark::DOM::ULBlock" => {
"bullet" => true
},
"RBMark::DOM::OLBlock" => {
"numbered" => true
},
"RBMark::DOM::HorizontalRule" => {
"extra_newlines" => true
},
"RBMark::DOM::IndentBlock" => {
"indent" => true
}
}.freeze
STYLE_PRIO0 = [
["numbered", true],
["bullet", true]
].freeze
STYLE_PRIO1 = [
["center", false],
["rjust", false],
["box", false],
["indent", true],
["underline", false],
["bold", false],
["italics", false],
["strikethrough", false],
["bg", true],
["fg", true],
["bibliography", true],
["extra_newlines", false],
["sideline", false],
["longbracket", true],
["underline_block", false],
["underline_full_block", false]
].freeze
# Primary document renderer
class Renderer
include ::MDPP::TextManager
# @param input [String]
# @param options [Hash]
def initialize(input, options)
@doc = RBMark::DOM::Document.parse(input)
@style = ::MDPP::DEFAULT_STYLE.dup
@bibliography = []
@options = options
return unless options['style']
@style = @style.map do |k, v|
v = v.merge(**options['style'][k]) if options['style'][k]
[k, v]
end.to_h
end
# Return rendered text
# @return [String]
def render
text = _render(@doc.children, @doc.properties)
text += _render_bibliography unless @bibliography.empty? or
@options['nb']
text
end
private
def _render_bibliography
size = IO.console.winsize[1]
text = "\n#{'─' * size}\n"
text += @bibliography.map.with_index do |element, index|
"- [#{index + 1}] #{wordwrap(element.join(': '), size - 15)}"
end.join("\n")
text
end
def _render(children, props)
blocks = children.map do |child|
case child
when ::RBMark::DOM::Text then child.content
when ::RBMark::DOM::InlineBreak then "\n"
when ::RBMark::DOM::HorizontalRule
size = IO.console.winsize[1]
"─" * size
else
child_props = get_props(child, props)
calc_wordwrap(
_render(child.children,
child_props),
props, child_props
)
end
end
apply_props(blocks, props)
end
def calc_wordwrap(obj, props, obj_props)
size = IO.console.winsize[1]
return obj if obj_props['center'] or
obj_props['rjust']
if !props['inline'] and obj_props['inline']
wordwrap(obj, size - 2 * (props['level'].to_i + 1))
else
obj
end
end
def get_props(obj, props)
new_props = @style[obj.class.to_s].dup || {}
if props["level"]
new_props["level"] = props["level"]
new_props["level"] += 1 unless new_props["inline"]
else
new_props["level"] = 2
end
new_props["element"] = obj.properties
new_props
end
def apply_props(blockarray, properties)
blockarray = prio0(blockarray, properties)
text = blockarray.join(properties['inline'] ? "" : "\n\n")
.gsub(/\n{2,}/, "\n\n")
prio1(text, properties)
end
def prio0(blocks, props)
::MDPP::STYLE_PRIO0.filter { |x| props.include? x[0] }.each do |style|
blocks = blocks.map.with_index do |block, index|
if style[1]
method(style[0].to_s).call(block, index + 1, props)
else
method(style[0].to_s).call(block, index + 1)
end
end
end
blocks
end
def prio1(block, props)
::MDPP::STYLE_PRIO1.filter { |x| props.include? x[0] }.each do |style|
block = if style[1]
method(style[0].to_s).call(block, props)
else
method(style[0].to_s).call(block)
end
end
block
end
end
end
options = {}
OptionParser.new do |opts|
opts.banner = <<~TEXT
MDPP - Markdown PrettyPrint based on RBMark parser
Usage: mdpp [options] <file | ->
TEXT
opts.on("-h", "--help", "Prints this help message") do
puts opts
exit 0
end
opts.on("-e", "--extension EXTENSION",
"require EXTENSION before parsing") do |libname|
require libname
end
opts.on(
"-c",
"--config CONFIG",
"try to load CONFIG (~/.config/mdpp.rb is loaded by default)"
) do |config|
# rubocop:disable Security/Eval
options.merge!(eval(File.read(config))) if File.exist?(config)
# rubocop:enable Security/Eval
end
opts.on(
"-b",
"--no-bibliography",
"Do not print bibliography (links, references, etc.) at the bottom"
) do
options["nb"] = true
end
end.parse!
# rubocop:disable Security/Eval
if File.exist?("#{ENV['HOME']}/.config/mdpp.rb")
options.merge!(eval(File.read("#{ENV['HOME']}/.config/mdpp.rb")))
end
# rubocop:enable Security/Eval
text = if ARGV[0].nil? or ARGV[0] == "-"
$stdin.read
else
File.read(ARGV[0])
end
renderer = MDPP::Renderer.new(text, options)
puts renderer.render

634
lib/rbmark.rb Normal file
View File

@ -0,0 +1,634 @@
# frozen_string_literal: true
module RBMark
# Module for representing parsing-related constructs
module Parsing
# Abstract scanner interface implementation
class Scanner
def initialize
@variants = []
end
# Scan text
# @param text [String]
# @return [Array<RBMark::DOM::DOMObject>]
def scan(_text)
raise StandardError, "Abstract method called"
# ...
end
attr_accessor :variants
end
# Line-level scanner for blocks
class LineScanner < Scanner
# (see ::RBMark::Parsing::Scanner#scan)
def scan(text, buffer: "", blocks: [], mode: nil)
prepare
lines = text.lines
lines.each_with_index do |line, index|
buffer += line
ahead = lines.fetch(index + 1, nil)
blocks, buffer, mode = try_begin(line,
blocks,
buffer,
mode,
lookahead: ahead)
if mode&.end?(line, lookahead: ahead, blocks: blocks, buffer: buffer)
blocks, buffer, mode = flush(blocks, buffer, mode)
end
end
flush(blocks, buffer, mode)[0]
end
# Predict mode for given line
# @param line [String]
# @return [Object]
def select_mode(line, **message)
@variants.find do |variant|
variant[0].begin?(line, **message)
end&.at(0)
end
private
# Attempt to open a new mode and, if possible, call :begin to prepare the block
def try_begin(line, blocks, buffer, mode, lookahead: nil)
return blocks, buffer, mode if mode
mode = select_mode(line, lookahead: lookahead,
blocks: blocks,
buffer: buffer)
blocks.append(mode.begin(line)) if mode.respond_to?(:begin)
[blocks, buffer, mode]
end
# Assign self as parent to all variants
# @return [void]
def prepare
@variants.each do |variant|
unless variant[0].is_a? ::RBMark::Parsing::BlockVariant
raise StandardError, "#{variant} is not a BlockVariant"
end
variant[0].parent = self
end
@variants.sort_by!(&:last)
end
# Flush the buffer using given mode
# @param blocks [Array<RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [Object]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
def flush(blocks, buffer, mode)
return blocks, "" if buffer == ""
mode.end(blocks.last, buffer) if mode.respond_to?(:end)
blocks.append(mode.flush(buffer)) if mode.respond_to?(:flush)
if mode.respond_to?(:restructure)
blocks, buffer, mode = mode.restructure(blocks, buffer, mode)
else
buffer = ""
mode = nil
end
[blocks, buffer, mode]
end
end
# Abstract variant interface
class Variant
end
# Abstract block-level variant
class BlockVariant < Variant
# Check if a block begins on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def begin?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# Check if a block ends on this line
# @param line [String]
# @param opts [Hash] options hash
# @option [String, nil] :lookahead next line over
# @option [Array<::RBMark::Parsing::BlockVariant>] :blocks current list of blocks
# @option [String] :buffer current state of buffer
# @return [Boolean]
def end?(_line, **_opts)
raise StandardError, "Abstract method called"
end
# @!method begin(buffer)
# Open a block to be later filled in by BlockVariant#end
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method end(block, buffer)
# Finalize a block opened by begin
# @param buffer [String]
# @return [void]
# @!method flush(buffer)
# Flush buffer and create a new DOM object
# @param buffer [String]
# @return [::RBMark::DOM::DOMObject]
# @!method restructure(blocks, buffer, mode)
# Restructure current set of blocks (if method is defined)
# @param blocks [Array<::RBMark::DOM::DOMObject>]
# @param buffer [String]
# @param mode [::RBMark::Parsing::Variant]
# @return [Array(Array<RBMark::DOM::DOMObject>, String, ::RBMark::Parsing::Variant)]
attr_accessor :parent
end
# Paragraph breaking variant
class BreakerVariant < BlockVariant
# Check that a paragraph matches the breaker
# @param buffer [String]
# @return [Class, nil]
def match(_buffer)
raise StandardError, "Abstract method called"
end
# @!method preprocess(buffer)
# preprocess buffer
# @param buffer [String]
# @return [String]
end
# Paragraph variant
class ParagraphVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/\S/)
end
# (see BlockVariant#end?)
def end?(line, lookahead: nil, **_opts)
return true if check_paragraph_breakers(line)
if lookahead
return false if check_paragraph_breakers(lookahead)
return false if lookahead.match(/^ {4}/)
!parent.select_mode(lookahead).is_a?(self.class)
else
true
end
end
# (see BlockVariant#flush)
# @sg-ignore
def flush(buffer)
dom_class = nil
breaker = parent.variants.find do |x|
x[0].is_a?(::RBMark::Parsing::BreakerVariant) &&
(dom_class = x[0].match(buffer))
end&.first
buffer = breaker.preprocess(buffer) if breaker.respond_to?(:preprocess)
(dom_class or ::RBMark::DOM::Paragraph).parse(buffer.strip)
end
private
def check_paragraph_breakers(line)
breakers = parent.variants.filter_map do |x|
x[0] if x[0].is_a? ::RBMark::Parsing::BreakerVariant
end
breakers.any? { |x| x.begin?(line, breaks_paragraph: true) }
end
end
# Thematic break variant
class ThematicBreakVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:[- ]{3,}|[_ ]{3,}|[* ]{3,})$/) &&
line.match?(/^ {0,3}[-_*]/) &&
(
line.count("-") >= 3 ||
line.count("_") >= 3 ||
line.count("*") >= 3
)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(_buffer)
::RBMark::DOM::HorizontalRule.new
end
end
# ATX Heading variant
class ATXHeadingVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^ {0,3}\#{1,6}(?: .*|)$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BlockVariant#flush)
def flush(buffer)
lvl, content = buffer.match(/^ {0,3}(\#{1,6})( .*|)$/)[1..2]
content = content.gsub(/( #+|)$/, "")
heading(lvl).parse(content.strip)
end
private
def heading(lvl)
case lvl.length
when 1 then ::RBMark::DOM::Heading1
when 2 then ::RBMark::DOM::Heading2
when 3 then ::RBMark::DOM::Heading3
when 4 then ::RBMark::DOM::Heading4
when 5 then ::RBMark::DOM::Heading5
when 6 then ::RBMark::DOM::Heading6
end
end
end
# Paragraph closing variant
class BlankSeparator < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(_buffer)
nil
end
end
# Setext heading variant
class SetextHeadingVariant < BreakerVariant
# (see BlockVariant#begin?)
def begin?(line, breaks_paragraph: nil, **_opts)
breaks_paragraph &&
line.match?(/^ {0,3}(?:-+|=+) *$/)
end
# (see BlockVariant#end?)
def end?(_line, **_opts)
true
end
# (see BreakerVariant#match)
def match(buffer)
return nil unless preprocess(buffer).match(/\S/)
heading(buffer.lines.last)
end
# (see BreakerVariant#preprocess)
def preprocess(buffer)
buffer.lines[..-2].join
end
private
def heading(buffer)
case buffer
when /^ {0,3}-+ *$/ then ::RBMark::DOM::Heading2
when /^ {0,3}=+ *$/ then ::RBMark::DOM::Heading1
end
end
end
# Indented literal block variant
class IndentedBlockVariant < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?: {4}|\t)/)
end
# (see BlockVariant#end?)
def end?(_line, lookahead: nil, **_opts)
!lookahead&.match?(/^(?: {4}.*|\s*|\t)$/)
end
# (see BlockVariant#flush)
def flush(buffer)
text = buffer.lines.map { |x| "#{x.chomp[4..]}\n" }.join
block = ::RBMark::DOM::IndentBlock.new
block.content = text # TODO: replace this with inline text
block
end
end
# Fenced code block
class FencedCodeBlock < BlockVariant
# (see BlockVariant#begin?)
def begin?(line, **_opts)
line.match?(/^(?:`{3,}[^`]*|~{3,}.*)$/)
end
# (see BlockVariant#end?)
def end?(line, blocks: nil, buffer: nil, **_opts)
buffer.lines.length > 1 and
line.strip == blocks.last.properties[:expected_closer]
end
# (see BlockVariant#begin)
def begin(buffer)
block = ::RBMark::DOM::CodeBlock.new
block.properties[:expected_closer] = buffer.match(/^(?:`{3,}|~{3,})/)[0]
block.properties[:infostring] = buffer.match(/^(?:`{3,}|~{3,})(.*)$/)[1]
.strip
block
end
# (see BlockVariant#end)
def end(block, buffer)
# TODO: replace this with inline text
block.properties.delete(:expected_closer)
block.content = buffer.lines[1..-2].join
end
end
end
# Module for representing abstract object hierarchy
module DOM
# Abstract container
class DOMObject
class << self
# Hook for initializing variables
# @param subclass [Class]
def inherited(subclass)
super
@subclasses ||= []
@subclasses.append(subclass)
subclass.variants = @variants.dup
subclass.variants ||= []
subclass.atomic_mode = @atomic_mode
subclass.scanner_class = @scanner_class
end
# Add potential sub-element variant
# @param cls [Class] DOMObject subclass
def variant(cls, prio: 1)
unless cls < ::RBMark::Parsing::Variant
raise StandardError, "#{cls} is not a DOMObject subclass"
end
@variants.append([cls, prio])
@subclasses&.each do |subclass|
subclass.variant(cls, prio)
end
end
# Set scanner class
# @param cls [Class] DOMObject subclass
def scanner(cls)
unless cls < ::RBMark::Parsing::Scanner
raise StandardError, "#{cls} is not a Scanner subclass"
end
@scanner_class = cls
@subclasses&.each do |subclass|
subclass.scanner(cls)
end
end
# Prepare scanner and variants
# @return [void]
def prepare
return if @prepared
@scanner = @scanner_class.new
@scanner.variants = @variants.map { |x| [x[0].new, x[1]] }
end
# Parse text from the given context
# @param text [String]
# @return [self]
def parse(text)
prepare unless @atomic_mode
instance = new
if @atomic_mode
instance.content = text
else
instance.append(*@scanner.scan(text))
end
instance
end
# Create a new instance of class or referenced class
# @return [self, Class]
def create
if @alt_for
@alt_for.new
else
new
end
end
# Set the atomic flag
# @return [void]
def atomic
@atomic_mode = true
end
attr_accessor :variants, :scanner_class, :alt_for, :atomic_mode
end
def initialize
@content = nil
@children = []
@properties = {}
end
# Set certain property in the properties hash
# @param properties [Hash] proeprties to update
def property(**properties)
@properties.update(**properties)
end
# Add child to container
# @param child [DOMObject]
def append(*children)
unless children.all? { |x| x.is_a? DOMObject }
raise StandardError, "one of #{children.inspect} is not a DOMObject"
end
@children.append(*children)
end
# Insert a child into the container
# @param child [DOMObject]
# @param index [Integer]
def insert(index, child)
raise StandardError, "not a DOMObject" unless child.is_a? DOMObject
@children.insert(index, child)
end
# Delete a child from container
# @param index [Integer]
def delete_at(index)
@children.delete_at(index)
end
# Get a child from the container
# @param key [Integer]
def [](key)
@children[key]
end
# Set text content of a DOMObject
# @param text [String]
def content=(text)
raise StandardError, "not a String" unless text.is_a? String
@content = text
end
# Get text content of a DOMObject
# @return [String, nil]
attr_reader :content, :children, :properties
end
# Inline text
class Text < DOMObject
end
# Inline preformatted text
class InlinePre < DOMObject
end
# Infline formattable text
class InlineFormattable < DOMObject
atomic
end
# Bold text
class InlineBold < InlineFormattable
end
# Italics text
class InlineItalics < InlineFormattable
end
# Inline italics text (alternative)
class InlineAltItalics < InlineFormattable
end
# Underline text
class InlineUnder < InlineFormattable
end
# Strikethrough text
class InlineStrike < InlineFormattable
end
# Hyperreferenced text
class InlineLink < InlineFormattable
end
# Image
class InlineImage < InlinePre
end
# Linebreak
class InlineBreak < DOMObject
end
# Document root
class Document < DOMObject
scanner ::RBMark::Parsing::LineScanner
variant ::RBMark::Parsing::ATXHeadingVariant
variant ::RBMark::Parsing::ThematicBreakVariant
variant ::RBMark::Parsing::SetextHeadingVariant
variant ::RBMark::Parsing::IndentedBlockVariant
variant ::RBMark::Parsing::FencedCodeBlock
variant ::RBMark::Parsing::BlankSeparator, prio: 9998
variant ::RBMark::Parsing::ParagraphVariant, prio: 9999
end
# Paragraph in a document (separated by 2 newlines)
class Paragraph < InlineFormattable
atomic
end
# Heading level 1
class Heading1 < InlineFormattable
end
# Heading level 2
class Heading2 < Heading1
end
# Heading level 3
class Heading3 < Heading1
end
# Heading level 4
class Heading4 < Heading1
end
# Heading level 5
class Heading5 < Heading1
end
# Heading level 6
class Heading6 < Heading1
end
# Preformatted code block
class CodeBlock < DOMObject
end
# Quote block
class QuoteBlock < Document
end
# Table
class TableBlock < DOMObject
end
# List element
class ListElement < Document
end
# Unordered list
class ULBlock < DOMObject
end
# Ordered list block
class OLBlock < DOMObject
end
# Indent block
class IndentBlock < DOMObject
end
# Horizontal rule
class HorizontalRule < DOMObject
atomic
end
end
end

9
lib/rbmark/renderers.rb Normal file
View File

@ -0,0 +1,9 @@
# frozen_string_literal: true
module RBMark
# Renderers from Markdown to expected output format
module Renderers
end
end
require_relative 'renderers/html'

View File

@ -0,0 +1,132 @@
# frozen_string_literal: true
require 'rbmark'
module RBMark
module Renderers
# HTML Renderer
class HTML
ELEMENT_MAP = {
"RBMark::DOM::InlinePre" => {
tag: "code",
inline: true
},
"RBMark::DOM::InlineBreak" => {
tag: "br",
inline: true
},
"RBMark::DOM::InlineBold" => {
tag: "strong",
inline: true
},
"RBMark::DOM::InlineItalics" => {
tag: "em",
inline: true
},
"RBMark::DOM::InlineUnder" => {
tag: "span",
inline: true,
style: "text-decoration: underline;"
},
"RBMark::DOM::InlineStrike" => {
tag: "s",
inline: true
},
"RBMark::DOM::InlineLink" => {
tag: "link",
href: true,
inline: true
},
"RBMark::DOM::InlineImage" => {
tag: "img",
src: true,
inline: true
},
"RBMark::DOM::ULBlock" => {
tag: "ul"
},
"RBMark::DOM::OLBlock" => {
tag: "ol"
},
"RBMark::DOM::IndentBlock" => {
tag: "pre"
},
"RBMark::DOM::ListElement" => {
tag: "li"
},
"RBMark::DOM::Paragraph" => {
tag: "p"
},
"RBMark::DOM::Heading1" => {
tag: "h1"
},
"RBMark::DOM::Heading2" => {
tag: "h2"
},
"RBMark::DOM::Heading3" => {
tag: "h3"
},
"RBMark::DOM::Heading4" => {
tag: "h4"
},
"RBMark::DOM::Heading5" => {
tag: "h5"
},
"RBMark::DOM::Heading6" => {
tag: "h6"
},
"RBMark::DOM::Document" => {
tag: "main"
},
"RBMark::DOM::CodeBlock" => {
tag: "pre",
outer: {
tag: "code"
}
},
"RBMark::DOM::QuoteBlock" => {
tag: "blockquote"
},
"RBMark::DOM::HorizontalRule" => {
tag: "hr"
},
"RBMark::DOM::Text" => nil
}.freeze
def initialize(dom, options)
@document = dom
@options = options
end
# Render document to HTML
def render
preambule if @options['preambule']
_render(@document, indent = 2)
postambule if @options['postambule']
end
private
def _render(element, indent = 0)
def preambule
@options['preambule'] or <<~TEXT
<!DOCTYPE HTML>
<html>
<head>
#{@document['head']}
</head>
<body>
TEXT
end
def postambule
@options['postambule'] or <<~TEXT
</body>
</html>
TEXT
end
end
end
end

View File

@ -1,217 +0,0 @@
## Filter-based Markdown translator.
#
module Markdown
## Superclass that defines behaviour of all translators
# @abstract Don't use directly - it only defins the ability to chain translators
class AbstractTranslator
attr_accessor :input
attr_accessor :output
def initialize()
@chain = []
end
def +(nextTranslator)
@chain.append nextTranslator
return self
end
def to_html
output = @output
@chain.each { |x|
x = x.new(output) if x.class == Class
x.to_html
output = x.output
}
return output
end
end
module_function
def html_highlighter; @html_highlighter end
def html_highlighter= v; @html_highlighter = v end
## Translator for linear tags in Markdown.
# A linear tag is any tag that starts anywhere on the line, and closes on the same exact line.
class LinearTagTranslator < AbstractTranslator
def initialize(text)
@input = text
@output = text
super()
end
def to_html
@output = @input
# Newline
.sub(/\s{2}[\n\r]/,"<br/>")
# Inline code (discord style)
.gsub(/(?<!\\)``(.*?[^\\])``/) {
code = Regexp.last_match[1]
"<code>#{code.gsub /[*`~_!\[]/,"\\\\\\0"}</code>"
}
# Inline code (Markdown style)
.gsub(/(?<!\\)`(.*?[^\\])`/) {
code = Regexp.last_match[1]
"<code>#{code.gsub /[*`~_!\[]/,"\\\\\\0"}</code>"
}
# Bold-italics
.gsub(/(?<!\\)\*\*\*(.*?[^\\])\*\*\*/,"<i><b>\\1</b></i>")
# Bold
.gsub(/(?<!\\)\*\*(.*?[^\\])\*\*/,"<b>\\1</b>")
# Italics
.gsub(/(?<!\\)\*(.*?[^\\])\*/,"<i>\\1</i>")
# Strikethrough
.gsub(/(?<!\\)~~(.*?[^\\])~~/,"<s>\\1</s>")
# Underline
.gsub(/(?<!\\)__(.*?[^\\])__/,"<span style=\"text-decoration: underline\">\\1</span>")
# Image
.gsub(/(?<!\\)!\[(.*)\]\((.*)\)/,"<img src=\"\\2\" alt=\"\\1\" />")
# Link
.gsub(/(?<!\\)\[(.*)\]\((.*)\)/,"<a href=\"\\2\">\\1</a>")
super
end
end
## Translator for linear leftmost tags.
# Leftmost linear tags open on the leftmost end of the string, and close once the line ends. These tags do not need to be explicitly closed.
class LeftmostTagTranslator < AbstractTranslator
def initialize(text)
@input = text
@output = text
super()
end
def to_html
# Headers
@output = @input.split("\n").map do |x|
x.gsub(/^(?<!\\)(\#{1,4})([^\n\r]*)/) {
level,content = Regexp.last_match[1..2]
"<h#{level.length}>"+content+"</h#{level.length}>"
}.gsub(/^\-{3,}/,"<hr>")
end.join("\n")
super
end
end
## Translator for code blocks in markdown
# Code blocks can have syntax highlighting. This class implements an attribute for providing a syntax highlighter, one handler per requested output.
class CodeBlockTranslator < AbstractTranslator
def initialize(text)
@input = text
@output = text
super()
end
def to_html
@output = @input.gsub(/(?:\n|^)```([\w_-]*)([\s\S]+?)```/) {
language,code = Regexp.last_match[1..2]
code = Markdown::html_highlighter.call(language,code) if Markdown::html_highlighter
"<pre><code>#{code.gsub /[|#*`~_!\[]/,"\\\\\\0"}</code></pre>"
}
super()
end
end
## Translator for quotes in Markdown.
# These deserve their own place in hell. As if the "yaml with triangle brackets instead of spaces" syntax wasn't horrible enough, each quote is its own markdown context.
class QuoteTranslator < AbstractTranslator
def initialize(text)
if text.is_a? Array then
@lines = text
elsif text.is_a? String then
@lines = text.split("\n")
end
@output = text
super()
end
def input= (v)
@lines = v.split("\n")
@output = v
end
def input
@lines.join("\n")
end
def to_html
stack = []
range = []
@lines.each_with_index { |x,index|
if x.match /^\s*> ?/ then
range[0] = index if not range[0]
range[1] = index
else
stack.append(range[0]..range[1]) if range[0] and range[1]
range = []
end
}
stack.append(range[0]..range[1]) if range[0] and range[1]
stack.reverse.each { |r|
@lines[r.begin] = "<blockquote>\n"+@lines[r.begin]
@lines[r.end] = @lines[r.end]+"\n</blockquote>"
@lines[r] = @lines[r].map { |line|
line.sub /^(\s*)> ?/,"\\1 "
}
@lines[r] = QuoteTranslator.new(@lines[r]).to_html
}
@output = @lines.join("\n")
super
end
end
## Table parser
# translates tables from a format in markdown to an html table
class TableTranslator < AbstractTranslator
def initialize(text)
@input = text
@output = text
super()
end
def to_html
lines = @output.split("\n")
table_testline = -1
table_start = -1
table_column_count = 0
tables = []
cur_table = []
lines.each_with_index { |line,index|
if (table_start != -1) and (line.match /^\s*\|([^\|]*\|){#{table_column_count-1}}$/) then
if (table_testline == -1) then
if (line.match /^\s*\|(\-*\|){#{table_column_count-1}}$/) then
table_testline = 1
else
table_start = -1
cur_table = []
end
else
cur_table.push (line.split("|").filter_map { |x| x.strip if x.match /\S+/ })
end
elsif (table_start != -1) then
obj = {table: cur_table, start: table_start, end: index}
tables.push(obj)
table_start = -1
cur_table = []
table_testline = -1
table_column_count = 0
end
if (table_start == -1) and (line.start_with? /\s*\|/ ) and (line.match /^\s*\|.*\|/) then
table_start = index
table_column_count = line.count "|"
cur_table.push (line.split("|").filter_map { |x| x.strip if x.match /\S+/ })
end
}
if cur_table != [] then
obj = {table: cur_table, start:table_start, end: lines.count-1}
tables.push(obj)
end
tables.reverse.each { |x|
lines[x[:start]..x[:end]] = (x[:table].map do |a2d|
(a2d.map { |x| (x.start_with? "#") ? " <th>"+x.sub(/^#\s+/,"")+"</th>" : " <td>"+x+"</td>"}).prepend(" <tr>").append(" </tr>")
end).flatten.prepend("<table>").append("</table>")
}
@output = lines.join("\n")
super()
end
end
# Backslash cleaner
# Cleans excessive backslashes after the translation
class BackslashTranslator < AbstractTranslator
def initialize(text)
@input = text
@output = text
end
def to_html
@output = @input.gsub(/\\(.)/,"\\1")
end
end
end

21
rbmark.gemspec Normal file
View File

@ -0,0 +1,21 @@
# frozen_string_literal: true
Gem::Specification.new do |s|
s.name = 'rbmark'
s.version = '0.5'
s.summary = <<~SUMMARY
Modular, extensible, HTML-agnostic Markdown parser
SUMMARY
s.description = <<~TEXT
RBMark is a Markdown parser that represents Markdown in a DOM-like
object structure, allowing for other interfaces to produce more
complex translators from Markdown to any given format.
TEXT
s.authors = ['yessiest']
s.email = 'yessiest@text.512mb.org'
s.license = 'Apache-2.0'
s.homepage = 'https://adastra7.net/git/Yessiest/rubymark'
s.files = Dir['lib/**/*.rb'] + Dir['bin/*']
s.required_ruby_version = '>= 3.0.0'
s.executables = ['mdpp']
end

118
test.rb
View File

@ -1,118 +0,0 @@
require_relative "markdown"
puts Markdown::LinearTagTranslator.new(<<CODE
*Italics*
**Bold**
***Bolitalics***
__underline__
__underline plus ***bolitalics***__
___invalid underline___
~~strikethrough ~~
`code that ignores ***all*** __Markdown__ [tags](https://nevergonnagiveyouup)`
me: google en passant
them: [holy hell!](https://google.com/q?=en+passant)
CODE
).to_html
puts Markdown::LeftmostTagTranslator.new(<<CODE
# Header v1
## Header v2
### Header v3
#### Header v4
##### Invalid header
#### Not a header
*** Also #### Not a header ***
CODE
).to_html
puts Markdown::QuoteTranslator.new(<<CODE
> Quote begins
>
> yea
> # header btw
> > nextlevel quote
> > more quote
> > those are quotes
> > yes
> > > third level quote
> > > yes
> > second level again
> > > third level again
> > second level oioioi
> >
> > > third
> > >
> > >
> > >
>
>
>
> fin
CODE
).to_html
puts Markdown::CodeBlockTranslator.new(<<CODE
```markdown
shmarkshmark
# pee pee
# piss
**ass**
__cock__
cock__
piss__
`shmark shmark`
```
CODE
).to_html
test = (Markdown::CodeBlockTranslator.new(<<TEXT
# Markdown garbage gallery
## Header level 2
### Header level 3
#### Header level 4
__[Underlined Link](https://google.com)__
__**unreal shitworks**__
split
---
![Fucking image idk](https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Ftse3.explicit.bing.net%2Fth%3Fid%3DOIP.qX1HmpFNHyaTfXv-SLnAJgHaDD%26pid%3DApi&f=1&ipt=dc0e92fdd701395eda76714338060dcf91c7ff9e228f108d8af6e1ba3decd1c2&ipo=images)
> Here's a bunch of shit i guess lmao idk
```markdown
test
test
test
|1|2|3|
|-|-|-|
|a|b|c|
| uneven rows | test | yes |
|-|-|-|
| sosiska | dinozavri | suda pihaem |
| sosiska 2 | vitalya 2 | brat 2 |
*** test ***
piss
cock
__cock__
# hi
```
> ok
> here i go pissing
> ***time to take a piss***
> > pissing
> > "what the hell are you doing"
> > i'm taking a pieeees
> > "why areyou not jomping at me thats what yourshupposed to do
> > I might do it focking later
> > ok
> # bug
> __cum__
__mashup__
| # sosiska | sosiska | suda pihaem |
|-|-|-|
| # 2 | chuvak ya ukral tvayu sardelku ))0)))0))))))) | __blya ((9((9((9)__ |
| # azazaz lalka sasI | test | test |
TEXT
)+Markdown::QuoteTranslator+Markdown::LeftmostTagTranslator+Markdown::LinearTagTranslator+Markdown::TableTranslator+Markdown::BackslashTranslator)
.to_html
write = File.new("/tmp/test.html","w")
write.write(test)
write.close

102
test/test_atx_headers.rb Normal file
View File

@ -0,0 +1,102 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test ATX Heading parsing compliance with CommonMark v0.31.2
class TestATXHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# ATX Heading level 1
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_simple_heading2
doc = ::RBMark::DOM::Document.parse(<<~DOC)
## ATX Heading level 2
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
end
def test_simple_heading3
doc = ::RBMark::DOM::Document.parse(<<~DOC)
### ATX Heading level 3
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading3, doc.children[0])
end
def test_simple_heading4
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### ATX Heading level 4
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
end
def test_simple_heading5
doc = ::RBMark::DOM::Document.parse(<<~DOC)
##### ATX Heading level 5
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading5, doc.children[0])
end
def test_simple_heading6
doc = ::RBMark::DOM::Document.parse(<<~DOC)
###### ATX Heading level 6
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading6, doc.children[0])
end
def test_simple_not_a_heading
doc = ::RBMark::DOM::Document.parse(<<~DOC)
####### NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_breaking_paragrpah
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph 1
# ATX Heading level 1
Paragraph 2
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_heading_sans_space
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#NOT an ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_heading_escaped
doc = ::RBMark::DOM::Document.parse(<<~DOC)
\\# Escaped ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### Heading level 4
### Heading level 3
## Heading level 2
# Heading level 1
# NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading3, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[4])
end
end

View File

@ -0,0 +1,147 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar*
=========
Foo *bar*
---------
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
def test_multiline_span
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar
baz*
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_span_inlining
doc = ::RBMark::DOM::Document.parse(<<~DOC)
start
Foo *bar
baz
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
skip
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
end
def test_content_indent
skip # TODO: implement this
end
def test_marker_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
Foo
=
Foo
=
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
end
def test_no_internal_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
-- - -
Foo
== =
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_block_level_priority
doc = ::RBMark::DOM::Document.parse(<<~DOC)
` Foo
------
`
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1])
end
def test_paragraph_breaking_only
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
------
DOC
skip # TODO: implement this
end
def test_paragraph_breaking_only_lazy_continuation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
continuation line
------
DOC
skip # TODO: implement this
end
def test_headings_back_to_back
doc = ::RBMark::DOM::Document.parse(<<~DOC)
heading1
------
heading2
------
heading3
======
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
end
def test_no_empty_headings
doc = ::RBMark::DOM::Document.parse(<<~DOC)
======
DOC
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_thematic_breaks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
----
----
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
end

View File

@ -0,0 +1,102 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test ATX Heading parsing compliance with CommonMark v0.31.2
class TestATXHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# ATX Heading level 1
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_simple_heading2
doc = ::RBMark::DOM::Document.parse(<<~DOC)
## ATX Heading level 2
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
end
def test_simple_heading3
doc = ::RBMark::DOM::Document.parse(<<~DOC)
### ATX Heading level 3
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading3, doc.children[0])
end
def test_simple_heading4
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### ATX Heading level 4
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
end
def test_simple_heading5
doc = ::RBMark::DOM::Document.parse(<<~DOC)
##### ATX Heading level 5
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading5, doc.children[0])
end
def test_simple_heading6
doc = ::RBMark::DOM::Document.parse(<<~DOC)
###### ATX Heading level 6
Paragraph
DOC
assert_instance_of(::RBMark::DOM::Heading6, doc.children[0])
end
def test_simple_not_a_heading
doc = ::RBMark::DOM::Document.parse(<<~DOC)
####### NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_breaking_paragrpah
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph 1
# ATX Heading level 1
Paragraph 2
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_heading_sans_space
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#NOT an ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_heading_escaped
doc = ::RBMark::DOM::Document.parse(<<~DOC)
\\# Escaped ATX heading
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
end
def test_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
#### Heading level 4
### Heading level 3
## Heading level 2
# Heading level 1
# NOT a heading
DOC
assert_instance_of(::RBMark::DOM::Heading4, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading3, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[4])
end
end

97
test/test_indent_block.rb Normal file
View File

@ -0,0 +1,97 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
indented code block
without space mangling
int main() {
printf("Hello world!\\n");
}
DOC
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
end
def test_list_item_precedence
skip # TODO: implement this
end
def test_numbered_list_item_precednce
skip # TODO: implement this
end
def test_check_indent_contents
skip # TODO: yet again please implement this at some point thanks
end
def test_long_chunk
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
indented code block
without space mangling
int main() {
printf("Hello world!\\n");
}
there are many space changes here and blank lines that
should *NOT* affect the way this is parsed
DOC
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
end
def test_does_not_interrupt_paragraph
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Paragraph begins here
paragraph does the stupid wacky shit that somebody thinks is very funny
paragraph keeps doing that shit
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_begins_at_first_sight_of_four_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
text
This is an indent block
This is a paragraph
DOC
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[0])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[2])
end
def test_interrupts_all_other_blocks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
# Heading
foo
Heading
------
foo
----
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[2])
assert_instance_of(::RBMark::DOM::IndentBlock, doc.children[3])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4])
end
def test_check_blank_lines_contents
skip # TODO: PLEASE I FUCKING BEG YOU IMPLEMENT THIS
end
def test_check_contents_trailing_spaces
skip # TODO: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAa
end
end

147
test/test_setext_headers.rb Normal file
View File

@ -0,0 +1,147 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test Setext Heading parsing compliance with CommonMark v0.31.2
class TestSetextHeadings < Minitest::Test
def test_simple_heading1
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar*
=========
Foo *bar*
---------
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
def test_multiline_span
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo *bar
baz*
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[0])
assert_equal(1, doc.children.length)
end
def test_span_inlining
doc = ::RBMark::DOM::Document.parse(<<~DOC)
start
Foo *bar
baz
====
DOC
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
skip
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
end
def test_content_indent
skip # TODO: implement this
end
def test_marker_indent
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
------------------------------
Foo
=
Foo
=
Foo
=
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[3])
end
def test_no_internal_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
-- - -
Foo
== =
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_block_level_priority
doc = ::RBMark::DOM::Document.parse(<<~DOC)
` Foo
------
`
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Paragraph, doc.children[1])
end
def test_paragraph_breaking_only
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
------
DOC
skip # TODO: implement this
end
def test_paragraph_breaking_only_lazy_continuation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
> text
continuation line
------
DOC
skip # TODO: implement this
end
def test_headings_back_to_back
doc = ::RBMark::DOM::Document.parse(<<~DOC)
heading1
------
heading2
------
heading3
======
DOC
assert_instance_of(::RBMark::DOM::Heading2, doc.children[0])
assert_instance_of(::RBMark::DOM::Heading2, doc.children[1])
assert_instance_of(::RBMark::DOM::Heading1, doc.children[2])
end
def test_no_empty_headings
doc = ::RBMark::DOM::Document.parse(<<~DOC)
======
DOC
refute_instance_of(::RBMark::DOM::Heading1, doc.children[0])
end
def test_thematic_breaks
doc = ::RBMark::DOM::Document.parse(<<~DOC)
----
----
DOC
refute_instance_of(::RBMark::DOM::Heading2, doc.children[0])
refute_instance_of(::RBMark::DOM::Heading2, doc.children[1])
end
end

View File

@ -0,0 +1,127 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../lib/rbmark'
# Test thematic break parsing compliance with CommonMark v0.31.2
class TestThematicBreaks < Minitest::Test
def test_simple
doc = ::RBMark::DOM::Document.parse(<<~DOC)
---
***
___
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
end
def test_simple_invalid
doc = ::RBMark::DOM::Document.parse(<<~DOC)
+++
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
doc = ::RBMark::DOM::Document.parse(<<~DOC)
===
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_simple_less_characters
doc = ::RBMark::DOM::Document.parse(<<~DOC)
--
**
__
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
end
def test_indentation
doc = ::RBMark::DOM::Document.parse(<<~DOC)
***
***
***
***
***
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[4])
end
def test_indentation_mixed_classes
doc = ::RBMark::DOM::Document.parse(<<~DOC)
Foo
***
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children.last)
end
def test_line_length
doc = ::RBMark::DOM::Document.parse(<<~DOC)
_________________________________
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_mixed_spaces
doc = ::RBMark::DOM::Document.parse(<<~DOC)
- - -
** * ** * ** * **
- - - -
- - - -
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
end
def test_mixed_characters
doc = ::RBMark::DOM::Document.parse(<<~DOC)
_ _ _ _ a
a------
---a---
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[2])
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[3])
end
def test_mixed_markets
doc = ::RBMark::DOM::Document.parse(<<~DOC)
*-*
DOC
refute_instance_of(::RBMark::DOM::HorizontalRule, doc.children[0])
end
def test_interrupt_list
doc = ::RBMark::DOM::Document.parse(<<~DOC)
- foo
***
- bar
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
end
def test_interrupt_paragraph
doc = ::RBMark::DOM::Document.parse(<<~DOC)
foo
***
bar
DOC
assert_instance_of(::RBMark::DOM::HorizontalRule, doc.children[1])
end
end