HTML entity encoding implemented, HTML renderer implemented

This commit is contained in:
Yessiest 2025-03-07 18:35:56 +00:00
parent 65471b5a1b
commit 06e861ffcd
4 changed files with 2476 additions and 73 deletions

View File

@ -1,9 +1,12 @@
# frozen_string_literal: true # frozen_string_literal: true
require_relative 'util'
# Modular, extensible approach to parsing markdown as close as # Modular, extensible approach to parsing markdown as close as
# it gets to CommonMark spec (as of version 0.31.2). # it gets to CommonMark spec (as of version 0.31.2).
module PointBlank module PointBlank
module Parsing module Parsing
# Shared methods for parsing links
module LinkSharedMethods module LinkSharedMethods
# Normalize a label # Normalize a label
# @param string [String] # @param string [String]
@ -592,6 +595,7 @@ module PointBlank
# Open block if it hasn't been opened yet # Open block if it hasn't been opened yet
def open(line) def open(line)
return if @open return if @open
pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/) pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/)
&.captures &.captures
return unless marker return unless marker
@ -919,6 +923,7 @@ module PointBlank
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/, string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1') '\\1')
string = string.gsub("\n", " ") string = string.gsub("\n", " ")
string = MMMD::EntityUtils.decode_entities(string)
obj.content = string obj.content = string
obj obj
end end
@ -1082,6 +1087,7 @@ module PointBlank
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/, string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
'\\1') '\\1')
string = string.gsub("\n", " ") string = string.gsub("\n", " ")
string = MMMD::EntityUtils.decode_entities(string)
obj.content = string obj.content = string
obj obj
end end

2233
lib/mmmd/entities.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,119 +1,243 @@
# frozen_string_literal: true # frozen_string_literal: true
require_relative "../util"
module MMMD module MMMD
module Renderers module Renderers
# HTML Renderer module HTMLConstants
class HTML
ELEMENT_MAP = { ELEMENT_MAP = {
"RBMark::DOM::InlinePre" => { "PointBlank::DOM::InlinePre" => {
tag: "code",
inline: true
},
"RBMark::DOM::InlineBreak" => {
tag: "br",
inline: true
},
"RBMark::DOM::InlineBold" => {
tag: "strong",
inline: true
},
"RBMark::DOM::InlineItalics" => {
tag: "em",
inline: true
},
"RBMark::DOM::InlineUnder" => {
tag: "span",
inline: true,
style: "text-decoration: underline;"
},
"RBMark::DOM::InlineStrike" => {
tag: "s",
inline: true
},
"RBMark::DOM::InlineLink" => {
tag: "link",
href: true,
inline: true
},
"RBMark::DOM::InlineImage" => {
tag: "img",
src: true,
inline: true
},
"RBMark::DOM::ULBlock" => {
tag: "ul"
},
"RBMark::DOM::OLBlock" => {
tag: "ol"
},
"RBMark::DOM::IndentBlock" => {
tag: "pre" tag: "pre"
}, },
"RBMark::DOM::ListElement" => { "PointBlank::DOM::InlineBreak" => {
tag: "br"
},
"PointBlank::DOM::InlineStrong" => {
tag: "strong"
},
"PointBlank::DOM::InlineEmphasis" => {
tag: "em"
},
"PointBlank::DOM::InlineUnder" => {
tag: "span",
style: "text-decoration: underline;"
},
"PointBlank::DOM::InlineStrike" => {
tag: "s"
},
"PointBlank::DOM::InlineLink" => {
tag: "link",
href: true
},
"PointBlank::DOM::InlineImage" => {
tag: "img",
src: true
},
"PointBlank::DOM::ULBlock" => {
tag: "ul"
},
"PointBlank::DOM::OLBlock" => {
tag: "ol"
},
"PointBlank::DOM::IndentBlock" => {
tag: "pre"
},
"PointBlank::DOM::ULListElement" => {
tag: "li" tag: "li"
}, },
"RBMark::DOM::Paragraph" => { "PointBlank::DOM::OLListElement" => {
tag: "li"
},
"PointBlank::DOM::Paragraph" => {
tag: "p" tag: "p"
}, },
"RBMark::DOM::Heading1" => { "PointBlank::DOM::SetextHeading1" => {
tag: "h1" tag: "h1"
}, },
"RBMark::DOM::Heading2" => { "PointBlank::DOM::SetextHeading2" => {
tag: "h2" tag: "h2"
}, },
"RBMark::DOM::Heading3" => { "PointBlank::DOM::ATXHeading1" => {
tag: "h1"
},
"PointBlank::DOM::ATXHeading2" => {
tag: "h2"
},
"PointBlank::DOM::ATXHeading3" => {
tag: "h3" tag: "h3"
}, },
"RBMark::DOM::Heading4" => { "PointBlank::DOM::ATXHeading4" => {
tag: "h4" tag: "h4"
}, },
"RBMark::DOM::Heading5" => { "PointBlank::DOM::ATXHeading5" => {
tag: "h5" tag: "h5"
}, },
"RBMark::DOM::Heading6" => { "PointBlank::DOM::ATXHeading6" => {
tag: "h6" tag: "h6"
}, },
"RBMark::DOM::Document" => { "PointBlank::DOM::Document" => {
tag: "main" tag: "main"
}, },
"RBMark::DOM::CodeBlock" => { "PointBlank::DOM::CodeBlock" => {
tag: "pre", tag: "pre",
outer: { outer: {
tag: "code" tag: "code"
} }
}, },
"RBMark::DOM::QuoteBlock" => { "PointBlank::DOM::QuoteBlock" => {
tag: "blockquote" tag: "blockquote"
}, },
"RBMark::DOM::HorizontalRule" => { "PointBlank::DOM::HorizontalRule" => {
tag: "hr" tag: "hr"
}, },
"RBMark::DOM::Text" => nil "PointBlank::DOM::Text" => {
sanitize: true
}
}.freeze }.freeze
# Class for managing styles and style overrides
class MapManager
class << self
# Define a default mapping for specified class
# @param key [String] class name
# @param mapping [Hash] mapping
# @return [void]
def define_mapping(key, mapping)
@mapping ||= ELEMENT_MAP.dup
@mapping[key] = mapping
end
# Get computed mapping
# @return [Hash]
def mapping
@mapping ||= ELEMENT_MAP.dup
end
end
def initialize(overrides)
@mapping = self.class.mapping
@mapping = @mapping.merge(overrides["mapping"]) if overrides["mapping"]
end
attr_reader :mapping
end
end
# HTML Renderer
class HTML
def initialize(dom, options) def initialize(dom, options)
@document = dom @document = dom
@options = options @options = options
@options["init_level"] ||= 2
@options["indent"] ||= 2
mapmanager = HTMLConstants::MapManager.new(options)
@mapping = mapmanager.mapping
end end
# Render document to HTML # Render document to HTML
def render def render
preambule if @options['preambule'] text = _render(@document, @options, level: @options["init_level"])
_render(@document, indent = 2) @options["init_level"].times { text = indent(text) }
postambule if @options['postambule'] [
preambule,
text,
postambule
].join("\n")
end end
private private
def _render(element, indent = 0) # Word wrapping algorithm
# @param text [String]
# @param width [Integer]
# @return [String]
def wordwrap(text, width)
words = text.split(/( +)/)
output = []
line = ""
length = 0
until words.empty?
word = words.shift
wordlength = word.length
if wordlength > width
words.prepend(word[width..])
word = word[..width - 1]
end
if length + wordlength + 1 > width
output.append(line.lstrip)
line = word
length = wordlength
next
end
length += wordlength
line += word
end
output.append(line.lstrip)
output.join("\n")
end
def _render(element, options, inline: false, level: 0)
modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
element.is_a?(::PointBlank::DOM::Paragraph)
inline ||= modeswitch
level += 1 unless inline
text = if element.children.empty?
element.content
else
element.children.map do |child|
_render(child, options, inline: inline,
level: level)
end.join(inline ? '' : "\n")
end
run_filters(text, element, level: level,
inline: inline,
modeswitch: modeswitch)
end
def run_filters(text, element, level:, inline:, modeswitch:)
element_style = @mapping[element.class.name]
hsize = 80 - (level * @options["indent"])
text = wordwrap(text, hsize) if modeswitch
if element_style[:sanitize]
text = MMMD::EntityUtils.encode_entities(text)
end
opentag, closetag = construct_tags(element_style)
if inline
opentag + text + closetag
else
[opentag,
indent(text),
closetag].join("\n")
end
end
def construct_tags(style)
return ["", ""] unless style && style[:tag]
opentag = "<#{style[:tag]}"
closetag = "</#{style[:tag]}>"
opentag += " style=#{style[:style].dump}" if style["style"]
opentag += ">"
if style[:outer]
outeropen, outerclose = construct_tags(style[:outer])
opentag = outeropen + opentag
closetag += outerclose
end
[opentag, closetag]
end
def indent(text)
text.lines.map do |line|
"#{' ' * @options["indent"]}#{line}"
end.join('')
end
def preambule def preambule
@options['preambule'] or <<~TEXT @options['preambule'] or <<~TEXT.rstrip
<!DOCTYPE HTML> <!DOCTYPE HTML>
<html> <html>
<head> <head>
#{@document['head']} #{@options['head']}
</head> </head>
<body> <body>
TEXT TEXT

40
lib/mmmd/util.rb Normal file
View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
require 'json'
module MMMD
# Utils for working with entities in strings
module EntityUtils
ENTITY_DATA = JSON.parse(File.read("#{__dir__}/entities.json"))
# Decode html entities in string
# @param string [String]
# @return [String]
def self.decode_entities(string)
string = string.gsub(/&#\d{1,7};/) do |match|
match[1..-2].to_i.chr("UTF-8")
end
string = string.gsub(/&#[xX][\dA-Fa-f]{1,6};/) do |match|
match[3..-2].to_i(16).chr("UTF-8")
end
string.gsub(/&\w+;/) do |match|
ENTITY_DATA[match] ? ENTITY_DATA[match]["characters"] : match
end
end
# Encode unsafe html entities in string
# @param string [String]
# @return [String]
# @sg-ignore
def self.encode_entities(string)
string.gsub("&", "&amp;")
.gsub("<", "&lt;")
.gsub(">", "&gt;")
.gsub('"', "&quot;")
.gsub("'", "&#39;")
.gsub(/[^\x00-\x7F]/) do |match|
"&#x#{match.codepoints[0]};"
end
end
end
end