HTML renderer fixes, additional compliance

This commit is contained in:
Yessiest 2025-03-07 21:29:24 +00:00
parent 06e861ffcd
commit 1fb5f15ead
7 changed files with 188 additions and 40 deletions

14
lib/mmmd.rb Normal file
View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
require_relative 'mmmd/blankshell'
require_relative 'mmmd/renderers'
# Extensible, multi-format markdown processor
module MMMD
# Parse a Markdown document into a DOM form
# @param doc [String]
# @return [::PointBlank::DOM::Document]
def self.parse(doc)
::PointBlank::DOM::Document.parse(doc)
end
end

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true # frozen_string_literal: true
require_relative 'util' require_relative 'util'
require 'uri'
# Modular, extensible approach to parsing markdown as close as # Modular, extensible approach to parsing markdown as close as
# it gets to CommonMark spec (as of version 0.31.2). # it gets to CommonMark spec (as of version 0.31.2).
@ -75,13 +76,13 @@ module PointBlank
def read_destination(text) def read_destination(text)
if (result = text.match(/\A<.*?(?<![^\\]\\)>/m)) && if (result = text.match(/\A<.*?(?<![^\\]\\)>/m)) &&
!result[0][1..].match?(/(?<![^\\]\\)</) !result[0][1..].match?(/(?<![^\\]\\)</)
[result[0].gsub(/\\(?=[><])/, '')[1..-2], [process_destination(result[0].gsub(/\\(?=[><])/, '')[1..-2]),
text.delete_prefix(result[0]).lstrip] text.delete_prefix(result[0]).lstrip]
elsif (result = text.match(/\A\S+/)) && elsif (result = text.match(/\A\S+/)) &&
!result[0].start_with?('<') && !result[0].start_with?('<') &&
result && result &&
balanced?(result[0]) balanced?(result[0])
[result[0], [process_destination(result[0]),
text.delete_prefix(result[0]).lstrip] text.delete_prefix(result[0]).lstrip]
else else
[nil, text] [nil, text]
@ -95,15 +96,15 @@ module PointBlank
def read_title(text) def read_title(text)
if text.start_with?("'") && if text.start_with?("'") &&
(result = text.match(/\A'.*?(?<!\\)'/m)) (result = text.match(/\A'.*?(?<!\\)'/m))
[result[0][1..-2], [process_title(result[0][1..-2]),
text.delete_prefix(result[0]).lstrip] text.delete_prefix(result[0]).lstrip]
elsif text.start_with?('"') && elsif text.start_with?('"') &&
(result = text.match(/\A".*?(?<!\\)"/m)) (result = text.match(/\A".*?(?<!\\)"/m))
[result[0][1..-2], [process_title(result[0][1..-2]),
text.delete_prefix(result[0]).lstrip] text.delete_prefix(result[0]).lstrip]
elsif text.start_with?('(') && elsif text.start_with?('(') &&
(result = find_balanced_end(text)) (result = find_balanced_end(text))
[text[1..(result - 1)], [process_title(text[1..(result - 1)]),
text.delete_prefix(text[..result]).lstrip] text.delete_prefix(text[..result]).lstrip]
else else
[nil, text] [nil, text]
@ -125,7 +126,7 @@ module PointBlank
return [nil, text] unless destination return [nil, text] unless destination
title, remaining = read_title(remaining) title, remaining = read_title(remaining)
properties[:destination] = destination properties[:uri] = destination
properties[:title] = title properties[:title] = title
close_bracket = true close_bracket = true
end end
@ -169,6 +170,28 @@ module PointBlank
end end
nil nil
end end
# Process destination string
# @param string [String]
# @return [String]
def process_destination(string)
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1')
string = string.gsub("\n", " ")
URI.encode_uri_component(
MMMD::EntityUtils.decode_entities(string)
)
end
# Process title string
# @param string [String]
# @return [String]
def process_title(string)
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1')
string = string.gsub("\n", " ")
MMMD::EntityUtils.decode_entities(string)
end
end end
class LineScanner class LineScanner

View File

@ -1,9 +1,11 @@
# frozen_string_literal: true # frozen_string_literal: true
module RBMark $LOAD_PATH.append(__dir__)
module MMMD
# Renderers from Markdown to expected output format # Renderers from Markdown to expected output format
module Renderers module Renderers
autoload :HTML, 'renderers/html'
autoload :PlainTerm, 'renderers/plainterm'
end end
end end
require_relative 'renderers/html'

View File

@ -7,7 +7,8 @@ module MMMD
module HTMLConstants module HTMLConstants
ELEMENT_MAP = { ELEMENT_MAP = {
"PointBlank::DOM::InlinePre" => { "PointBlank::DOM::InlinePre" => {
tag: "pre" tag: "code",
style: "white-space: pre;"
}, },
"PointBlank::DOM::InlineBreak" => { "PointBlank::DOM::InlineBreak" => {
tag: "br" tag: "br"
@ -26,12 +27,14 @@ module MMMD
tag: "s" tag: "s"
}, },
"PointBlank::DOM::InlineLink" => { "PointBlank::DOM::InlineLink" => {
tag: "link", tag: "a",
href: true href: true
}, },
"PointBlank::DOM::InlineImage" => { "PointBlank::DOM::InlineImage" => {
tag: "img", tag: "img",
src: true src: true,
inline: true,
alt: true
}, },
"PointBlank::DOM::ULBlock" => { "PointBlank::DOM::ULBlock" => {
tag: "ul" tag: "ul"
@ -88,7 +91,8 @@ module MMMD
tag: "blockquote" tag: "blockquote"
}, },
"PointBlank::DOM::HorizontalRule" => { "PointBlank::DOM::HorizontalRule" => {
tag: "hr" tag: "hr",
inline: true
}, },
"PointBlank::DOM::Text" => { "PointBlank::DOM::Text" => {
sanitize: true sanitize: true
@ -128,41 +132,80 @@ module MMMD
def initialize(dom, options) def initialize(dom, options)
@document = dom @document = dom
@options = options @options = options
@options["linewrap"] ||= 80
@options["init_level"] ||= 2 @options["init_level"] ||= 2
@options["indent"] ||= 2 @options["indent"] ||= 2
mapmanager = HTMLConstants::MapManager.new(options) mapmanager = HTMLConstants::MapManager.new(options)
@mapping = mapmanager.mapping @mapping = mapmanager.mapping
return unless @options["nowrap"]
@options["init_level"] = 0
@mapping.delete("PointBlank::DOM::Document")
end end
# Render document to HTML # Render document to HTML
def render def render
text = _render(@document, @options, level: @options["init_level"]) text = _render(@document, @options, level: @options["init_level"])
@options["init_level"].times { text = indent(text) } @options["init_level"].times { text = indent(text) }
[ if @options["nowrap"]
preambule, text
text, else
postambule [
].join("\n") preambule,
remove_pre_spaces(text),
postambule
].join("\n")
end
end end
private private
# Find and remove extra spaces inbetween preformatted text
# @param string [String]
# @return [String]
def remove_pre_spaces(string)
output = []
buffer = []
open = nil
string.lines.each do |line|
opentoken = line.match?(/<pre>/)
closetoken = line.match?(/<\/pre>/)
if closetoken
open = false
buffer = strip_leading_spaces_in_buffer(buffer)
output.append(*buffer)
buffer = []
end
(open ? buffer : output).append(line)
open = true if opentoken && !closetoken
end
output.append(*buffer) unless buffer.empty?
output.join('')
end
# Strip leading spaces in the buffer
# @param lines [Array<String>]
# @return [Array<String>]
def strip_leading_spaces_in_buffer(buffer)
minprefix = buffer.map { |x| x.match(/^ */)[0] }
.min_by(&:length)
buffer.map do |line|
line.delete_prefix(minprefix)
end
end
# Word wrapping algorithm # Word wrapping algorithm
# @param text [String] # @param text [String]
# @param width [Integer] # @param width [Integer]
# @return [String] # @return [String]
def wordwrap(text, width) def wordwrap(text, width)
words = text.split(/( +)/) words = text.split(/( +|<[^>]+>)/)
output = [] output = []
line = "" line = ""
length = 0 length = 0
until words.empty? until words.empty?
word = words.shift word = words.shift
wordlength = word.length wordlength = word.length
if wordlength > width
words.prepend(word[width..])
word = word[..width - 1]
end
if length + wordlength + 1 > width if length + wordlength + 1 > width
output.append(line.lstrip) output.append(line.lstrip)
line = word line = word
@ -176,7 +219,7 @@ module MMMD
output.join("\n") output.join("\n")
end end
def _render(element, options, inline: false, level: 0) def _render(element, options, inline: false, level: 0, literaltext: false)
modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) || modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
element.is_a?(::PointBlank::DOM::Paragraph) element.is_a?(::PointBlank::DOM::Paragraph)
inline ||= modeswitch inline ||= modeswitch
@ -184,24 +227,39 @@ module MMMD
text = if element.children.empty? text = if element.children.empty?
element.content element.content
else else
literal = @mapping[element.class.name][:inline] || literaltext
element.children.map do |child| element.children.map do |child|
_render(child, options, inline: inline, _render(child, options, inline: inline,
level: level) level: level,
literaltext: literal)
end.join(inline ? '' : "\n") end.join(inline ? '' : "\n")
end end
run_filters(text, element, level: level, run_filters(text, element, level: level,
inline: inline, inline: inline,
modeswitch: modeswitch) modeswitch: modeswitch,
literaltext: literaltext)
end end
def run_filters(text, element, level:, inline:, modeswitch:) def run_filters(text, element, level:, inline:, modeswitch:,
literaltext:)
element_style = @mapping[element.class.name] element_style = @mapping[element.class.name]
hsize = 80 - (level * @options["indent"]) return text unless element_style
return text if literaltext
hsize = @options["linewrap"] - (level * @options["indent"])
text = wordwrap(text, hsize) if modeswitch text = wordwrap(text, hsize) if modeswitch
if element_style[:sanitize] if element_style[:sanitize]
text = MMMD::EntityUtils.encode_entities(text) text = MMMD::EntityUtils.encode_entities(text)
end end
opentag, closetag = construct_tags(element_style) if element_style[:inline]
innerclose(element, element_style, text)
else
openclose(text, element, element_style, inline)
end
end
def openclose(text, element, element_style, inline)
opentag, closetag = construct_tags(element_style, element)
if inline if inline
opentag + text + closetag opentag + text + closetag
else else
@ -211,21 +269,54 @@ module MMMD
end end
end end
def construct_tags(style) def innerclose(element, style, text)
props = element.properties
tag = "<#{style[:tag]}"
tag += " style=#{style[:style].inspect}" if style[:style]
tag += " href=#{read_link(element)}" if style[:href]
tag += " alt=#{text.inspect}" if style[:alt]
tag += " src=#{read_link(element)}" if style[:src]
tag += " title=#{read_title(element)}" if style[:title] && props[:title]
tag += ">"
if style[:outer]
outeropen, outerclose = construct_tags(style[:outer], element)
tag = outeropen + tag + outerclose
end
tag
end
def construct_tags(style, element)
return ["", ""] unless style && style[:tag] return ["", ""] unless style && style[:tag]
props = element.properties
opentag = "<#{style[:tag]}" opentag = "<#{style[:tag]}"
closetag = "</#{style[:tag]}>" closetag = "</#{style[:tag]}>"
opentag += " style=#{style[:style].dump}" if style["style"] opentag += " style=#{style[:style].inspect}" if style[:style]
opentag += " href=#{read_link(element)}" if style[:href]
opentag += " src=#{read_link(element)}" if style[:src]
opentag += " title=#{read_title(element)}" if style[:title] &&
props[:title]
opentag += ">" opentag += ">"
if style[:outer] if style[:outer]
outeropen, outerclose = construct_tags(style[:outer]) outeropen, outerclose = construct_tags(style[:outer], element)
opentag = outeropen + opentag opentag = outeropen + opentag
closetag += outerclose closetag += outerclose
end end
[opentag, closetag] [opentag, closetag]
end end
def read_title(element)
title = element.properties[:title]
title = MMMD::EntityUtils.encode_entities(title)
title.dump
end
def read_link(element)
link = element.properties[:uri]
link = MMMD::EntityUtils.encode_entities(link)
link.dump
end
def indent(text) def indent(text)
text.lines.map do |line| text.lines.map do |line|
"#{' ' * @options["indent"]}#{line}" "#{' ' * @options["indent"]}#{line}"
@ -233,13 +324,17 @@ module MMMD
end end
def preambule def preambule
head = @options['head']
headinfo = "#{indent(<<~HEAD.rstrip)}\n " if head
<head>
#{head.is_a?(Array) ? head.join("\n") : head}
</head>
HEAD
headinfo ||= " "
@options['preambule'] or <<~TEXT.rstrip @options['preambule'] or <<~TEXT.rstrip
<!DOCTYPE HTML> <!DOCTYPE HTML>
<html> <html>
<head> #{headinfo}<body>
#{@options['head']}
</head>
<body>
TEXT TEXT
end end

View File

@ -312,6 +312,9 @@ module MMMD
"PointBlank::DOM::QuoteBlock" => { "PointBlank::DOM::QuoteBlock" => {
leftline: true, leftline: true,
increase_level: true increase_level: true
},
"PointBlank::DOM::HorizontalRule" => {
underline_full_block: true
} }
}.freeze }.freeze

View File

@ -22,6 +22,21 @@ module MMMD
end end
end end
# Encode unsafe html entities in string (ASCII-compatible)
# @param string [String]
# @return [String]
# @sg-ignore
def self.encode_entities_ascii(string)
string.gsub("&", "&amp;")
.gsub("<", "&lt;")
.gsub(">", "&gt;")
.gsub('"', "&quot;")
.gsub("'", "&#39;")
.gsub(/[^\x00-\x7F]/) do |match|
"&#x#{match.codepoints[0]};"
end
end
# Encode unsafe html entities in string # Encode unsafe html entities in string
# @param string [String] # @param string [String]
# @return [String] # @return [String]
@ -32,9 +47,6 @@ module MMMD
.gsub(">", "&gt;") .gsub(">", "&gt;")
.gsub('"', "&quot;") .gsub('"', "&quot;")
.gsub("'", "&#39;") .gsub("'", "&#39;")
.gsub(/[^\x00-\x7F]/) do |match|
"&#x#{match.codepoints[0]};"
end
end end
end end
end end

View File

@ -1 +0,0 @@
mmmd