HTML entity encoding implemented, HTML renderer implemented
This commit is contained in:
parent
65471b5a1b
commit
06e861ffcd
|
@ -1,9 +1,12 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'util'
|
||||
|
||||
# Modular, extensible approach to parsing markdown as close as
|
||||
# it gets to CommonMark spec (as of version 0.31.2).
|
||||
module PointBlank
|
||||
module Parsing
|
||||
# Shared methods for parsing links
|
||||
module LinkSharedMethods
|
||||
# Normalize a label
|
||||
# @param string [String]
|
||||
|
@ -592,6 +595,7 @@ module PointBlank
|
|||
# Open block if it hasn't been opened yet
|
||||
def open(line)
|
||||
return if @open
|
||||
|
||||
pre, num, marker, off = line.match(/\A( {0,3})(\d+)([).])(\s+)/)
|
||||
&.captures
|
||||
return unless marker
|
||||
|
@ -919,6 +923,7 @@ module PointBlank
|
|||
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
||||
'\\1')
|
||||
string = string.gsub("\n", " ")
|
||||
string = MMMD::EntityUtils.decode_entities(string)
|
||||
obj.content = string
|
||||
obj
|
||||
end
|
||||
|
@ -1082,6 +1087,7 @@ module PointBlank
|
|||
string = string.gsub(/\\([!"\#$%&'()*+,\-.\/:;<=>?@\[\\\]\^_`{|}~])/,
|
||||
'\\1')
|
||||
string = string.gsub("\n", " ")
|
||||
string = MMMD::EntityUtils.decode_entities(string)
|
||||
obj.content = string
|
||||
obj
|
||||
end
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,128 +1,252 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "../util"
|
||||
|
||||
module MMMD
|
||||
module Renderers
|
||||
# HTML Renderer
|
||||
class HTML
|
||||
module HTMLConstants
|
||||
ELEMENT_MAP = {
|
||||
"RBMark::DOM::InlinePre" => {
|
||||
tag: "code",
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineBreak" => {
|
||||
tag: "br",
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineBold" => {
|
||||
tag: "strong",
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineItalics" => {
|
||||
tag: "em",
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineUnder" => {
|
||||
tag: "span",
|
||||
inline: true,
|
||||
style: "text-decoration: underline;"
|
||||
},
|
||||
"RBMark::DOM::InlineStrike" => {
|
||||
tag: "s",
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineLink" => {
|
||||
tag: "link",
|
||||
href: true,
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::InlineImage" => {
|
||||
tag: "img",
|
||||
src: true,
|
||||
inline: true
|
||||
},
|
||||
"RBMark::DOM::ULBlock" => {
|
||||
tag: "ul"
|
||||
},
|
||||
"RBMark::DOM::OLBlock" => {
|
||||
tag: "ol"
|
||||
},
|
||||
"RBMark::DOM::IndentBlock" => {
|
||||
"PointBlank::DOM::InlinePre" => {
|
||||
tag: "pre"
|
||||
},
|
||||
"RBMark::DOM::ListElement" => {
|
||||
"PointBlank::DOM::InlineBreak" => {
|
||||
tag: "br"
|
||||
},
|
||||
"PointBlank::DOM::InlineStrong" => {
|
||||
tag: "strong"
|
||||
},
|
||||
"PointBlank::DOM::InlineEmphasis" => {
|
||||
tag: "em"
|
||||
},
|
||||
"PointBlank::DOM::InlineUnder" => {
|
||||
tag: "span",
|
||||
style: "text-decoration: underline;"
|
||||
},
|
||||
"PointBlank::DOM::InlineStrike" => {
|
||||
tag: "s"
|
||||
},
|
||||
"PointBlank::DOM::InlineLink" => {
|
||||
tag: "link",
|
||||
href: true
|
||||
},
|
||||
"PointBlank::DOM::InlineImage" => {
|
||||
tag: "img",
|
||||
src: true
|
||||
},
|
||||
"PointBlank::DOM::ULBlock" => {
|
||||
tag: "ul"
|
||||
},
|
||||
"PointBlank::DOM::OLBlock" => {
|
||||
tag: "ol"
|
||||
},
|
||||
"PointBlank::DOM::IndentBlock" => {
|
||||
tag: "pre"
|
||||
},
|
||||
"PointBlank::DOM::ULListElement" => {
|
||||
tag: "li"
|
||||
},
|
||||
"RBMark::DOM::Paragraph" => {
|
||||
"PointBlank::DOM::OLListElement" => {
|
||||
tag: "li"
|
||||
},
|
||||
"PointBlank::DOM::Paragraph" => {
|
||||
tag: "p"
|
||||
},
|
||||
"RBMark::DOM::Heading1" => {
|
||||
"PointBlank::DOM::SetextHeading1" => {
|
||||
tag: "h1"
|
||||
},
|
||||
"RBMark::DOM::Heading2" => {
|
||||
"PointBlank::DOM::SetextHeading2" => {
|
||||
tag: "h2"
|
||||
},
|
||||
"RBMark::DOM::Heading3" => {
|
||||
"PointBlank::DOM::ATXHeading1" => {
|
||||
tag: "h1"
|
||||
},
|
||||
"PointBlank::DOM::ATXHeading2" => {
|
||||
tag: "h2"
|
||||
},
|
||||
"PointBlank::DOM::ATXHeading3" => {
|
||||
tag: "h3"
|
||||
},
|
||||
"RBMark::DOM::Heading4" => {
|
||||
"PointBlank::DOM::ATXHeading4" => {
|
||||
tag: "h4"
|
||||
},
|
||||
"RBMark::DOM::Heading5" => {
|
||||
"PointBlank::DOM::ATXHeading5" => {
|
||||
tag: "h5"
|
||||
},
|
||||
"RBMark::DOM::Heading6" => {
|
||||
"PointBlank::DOM::ATXHeading6" => {
|
||||
tag: "h6"
|
||||
},
|
||||
"RBMark::DOM::Document" => {
|
||||
"PointBlank::DOM::Document" => {
|
||||
tag: "main"
|
||||
},
|
||||
"RBMark::DOM::CodeBlock" => {
|
||||
"PointBlank::DOM::CodeBlock" => {
|
||||
tag: "pre",
|
||||
outer: {
|
||||
tag: "code"
|
||||
}
|
||||
},
|
||||
"RBMark::DOM::QuoteBlock" => {
|
||||
"PointBlank::DOM::QuoteBlock" => {
|
||||
tag: "blockquote"
|
||||
},
|
||||
"RBMark::DOM::HorizontalRule" => {
|
||||
"PointBlank::DOM::HorizontalRule" => {
|
||||
tag: "hr"
|
||||
},
|
||||
"RBMark::DOM::Text" => nil
|
||||
"PointBlank::DOM::Text" => {
|
||||
sanitize: true
|
||||
}
|
||||
}.freeze
|
||||
|
||||
# Class for managing styles and style overrides
|
||||
class MapManager
|
||||
class << self
|
||||
# Define a default mapping for specified class
|
||||
# @param key [String] class name
|
||||
# @param mapping [Hash] mapping
|
||||
# @return [void]
|
||||
def define_mapping(key, mapping)
|
||||
@mapping ||= ELEMENT_MAP.dup
|
||||
@mapping[key] = mapping
|
||||
end
|
||||
|
||||
# Get computed mapping
|
||||
# @return [Hash]
|
||||
def mapping
|
||||
@mapping ||= ELEMENT_MAP.dup
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(overrides)
|
||||
@mapping = self.class.mapping
|
||||
@mapping = @mapping.merge(overrides["mapping"]) if overrides["mapping"]
|
||||
end
|
||||
|
||||
attr_reader :mapping
|
||||
end
|
||||
end
|
||||
|
||||
# HTML Renderer
|
||||
class HTML
|
||||
def initialize(dom, options)
|
||||
@document = dom
|
||||
@options = options
|
||||
@options["init_level"] ||= 2
|
||||
@options["indent"] ||= 2
|
||||
mapmanager = HTMLConstants::MapManager.new(options)
|
||||
@mapping = mapmanager.mapping
|
||||
end
|
||||
|
||||
# Render document to HTML
|
||||
def render
|
||||
preambule if @options['preambule']
|
||||
_render(@document, indent = 2)
|
||||
postambule if @options['postambule']
|
||||
text = _render(@document, @options, level: @options["init_level"])
|
||||
@options["init_level"].times { text = indent(text) }
|
||||
[
|
||||
preambule,
|
||||
text,
|
||||
postambule
|
||||
].join("\n")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def _render(element, indent = 0)
|
||||
# Word wrapping algorithm
|
||||
# @param text [String]
|
||||
# @param width [Integer]
|
||||
# @return [String]
|
||||
def wordwrap(text, width)
|
||||
words = text.split(/( +)/)
|
||||
output = []
|
||||
line = ""
|
||||
length = 0
|
||||
until words.empty?
|
||||
word = words.shift
|
||||
wordlength = word.length
|
||||
if wordlength > width
|
||||
words.prepend(word[width..])
|
||||
word = word[..width - 1]
|
||||
end
|
||||
if length + wordlength + 1 > width
|
||||
output.append(line.lstrip)
|
||||
line = word
|
||||
length = wordlength
|
||||
next
|
||||
end
|
||||
length += wordlength
|
||||
line += word
|
||||
end
|
||||
output.append(line.lstrip)
|
||||
output.join("\n")
|
||||
end
|
||||
|
||||
def _render(element, options, inline: false, level: 0)
|
||||
modeswitch = element.is_a?(::PointBlank::DOM::LeafBlock) ||
|
||||
element.is_a?(::PointBlank::DOM::Paragraph)
|
||||
inline ||= modeswitch
|
||||
level += 1 unless inline
|
||||
text = if element.children.empty?
|
||||
element.content
|
||||
else
|
||||
element.children.map do |child|
|
||||
_render(child, options, inline: inline,
|
||||
level: level)
|
||||
end.join(inline ? '' : "\n")
|
||||
end
|
||||
run_filters(text, element, level: level,
|
||||
inline: inline,
|
||||
modeswitch: modeswitch)
|
||||
end
|
||||
|
||||
def run_filters(text, element, level:, inline:, modeswitch:)
|
||||
element_style = @mapping[element.class.name]
|
||||
hsize = 80 - (level * @options["indent"])
|
||||
text = wordwrap(text, hsize) if modeswitch
|
||||
if element_style[:sanitize]
|
||||
text = MMMD::EntityUtils.encode_entities(text)
|
||||
end
|
||||
opentag, closetag = construct_tags(element_style)
|
||||
if inline
|
||||
opentag + text + closetag
|
||||
else
|
||||
[opentag,
|
||||
indent(text),
|
||||
closetag].join("\n")
|
||||
end
|
||||
end
|
||||
|
||||
def construct_tags(style)
|
||||
return ["", ""] unless style && style[:tag]
|
||||
|
||||
opentag = "<#{style[:tag]}"
|
||||
closetag = "</#{style[:tag]}>"
|
||||
opentag += " style=#{style[:style].dump}" if style["style"]
|
||||
opentag += ">"
|
||||
if style[:outer]
|
||||
outeropen, outerclose = construct_tags(style[:outer])
|
||||
opentag = outeropen + opentag
|
||||
closetag += outerclose
|
||||
end
|
||||
[opentag, closetag]
|
||||
end
|
||||
|
||||
def indent(text)
|
||||
text.lines.map do |line|
|
||||
"#{' ' * @options["indent"]}#{line}"
|
||||
end.join('')
|
||||
end
|
||||
|
||||
def preambule
|
||||
@options['preambule'] or <<~TEXT
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
#{@document['head']}
|
||||
</head>
|
||||
<body>
|
||||
@options['preambule'] or <<~TEXT.rstrip
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
#{@options['head']}
|
||||
</head>
|
||||
<body>
|
||||
TEXT
|
||||
end
|
||||
|
||||
def postambule
|
||||
@options['postambule'] or <<~TEXT
|
||||
</body>
|
||||
</html>
|
||||
</body>
|
||||
</html>
|
||||
TEXT
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'json'
|
||||
|
||||
module MMMD
|
||||
# Utils for working with entities in strings
|
||||
module EntityUtils
|
||||
ENTITY_DATA = JSON.parse(File.read("#{__dir__}/entities.json"))
|
||||
|
||||
# Decode html entities in string
|
||||
# @param string [String]
|
||||
# @return [String]
|
||||
def self.decode_entities(string)
|
||||
string = string.gsub(/&#\d{1,7};/) do |match|
|
||||
match[1..-2].to_i.chr("UTF-8")
|
||||
end
|
||||
string = string.gsub(/&#[xX][\dA-Fa-f]{1,6};/) do |match|
|
||||
match[3..-2].to_i(16).chr("UTF-8")
|
||||
end
|
||||
string.gsub(/&\w+;/) do |match|
|
||||
ENTITY_DATA[match] ? ENTITY_DATA[match]["characters"] : match
|
||||
end
|
||||
end
|
||||
|
||||
# Encode unsafe html entities in string
|
||||
# @param string [String]
|
||||
# @return [String]
|
||||
# @sg-ignore
|
||||
def self.encode_entities(string)
|
||||
string.gsub("&", "&")
|
||||
.gsub("<", "<")
|
||||
.gsub(">", ">")
|
||||
.gsub('"', """)
|
||||
.gsub("'", "'")
|
||||
.gsub(/[^\x00-\x7F]/) do |match|
|
||||
"&#x#{match.codepoints[0]};"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue