Здравко обнови решението на 23.11.2011 21:45 (преди почти 13 години)
+# encoding: utf-8
+
+module StringUtils
+ def self.gsub_nonmatching_parts(string, regexp)
+ "".tap do |result|
+ head, match, tail = string.partition(regexp)
+ while match != "" or tail != ""
+ result << yield(head) + match
+ head, match, tail = tail.partition(regexp)
+ end
+ result << yield(head)
+ end
+ end
+end
+
+class BlockType
+ attr_reader :localizer, :tokens_matcher, :token_tags
+
+ def initialize(localizer, tokens_matcher, allows_formatting, tags,
+ token_tags = nil, implies_paragraph_parsing = false)
+ @localizer = localizer
+ @tokens_matcher = tokens_matcher
+ @allows_formatting = allows_formatting
+ @tags = tags
+ @token_tags = token_tags
+ @implies_paragraph_parsing = implies_paragraph_parsing
+ end
+
+ def allows_formatting?
+ @allows_formatting
+ end
+
+ def implies_paragraph_parsing?
+ @implies_paragraph_parsing
+ end
+
+ def tags(match)
+ @tags.respond_to?("call") ? @tags.call(match) : @tags
+ end
+end
+
+class LinksType
+ def matcher
+ /\[([^\]\n]*)\]\(([^\)\n]*)\)/
+ end
+
+ def format(match)
+ "<a href=\"#{match[2]}\">#{match[1]}</a>"
+ end
+end
+
+class EmphasisType
+ def matcher
+ /\*\*((\*?[^\*\n]+)+)\*\*|_([^_\n]+)_/
+ end
+
+ def format(match)
+ result = match[1].nil? ? match[3] : match[1]
+ result = result.gsub(matcher) { format($~) }
+
+ if match[1].nil?
+ "<em>#{result}</em>"
+ else
+ "<strong>#{result}</strong>"
+ end
+ end
+end
+
+class Formatter
+ def initialize(text)
+ @text = text
+ end
+
+ BLOCK_TYPES = [
+ BlockType.new(
+ /^(\#{1,4})[[:blank:]]+\S.*$/,
+ /^\#{1,4}[[:blank:]]+(?<token>\S.*)$/,
+ true, ->(match) { "h#{match[1].length}" }
+ ),
+ BlockType.new(
+ /(^ .*\n)*(^ .*$)/,
+ /^ (?<token>.*)$/,
+ false, ["pre", "code"]
+ ),
+ BlockType.new(
+ /(^>[[:blank:]]+.*\n)*(^>[[:blank:]]+.*$)/,
+ /^>[[:blank:]]+(?<token>.*)$/,
+ true, "blockquote", nil, true
+ ),
+ BlockType.new(
+ /(^\*[[:blank:]]+\S.*\n)*(^\*[[:blank:]]+\S.*$)/,
+ /^\*[[:blank:]]+(?<token>\S.*)$/,
+ true, "ul", "li"
+ ),
+ BlockType.new(
+ /(^\d\.[[:blank:]]+\S.*\n)*(^\d\.[[:blank:]]+\S.*$)/,
+ /^\d\.[[:blank:]]+(?<token>\S.*)$/,
+ true, "ol", "li"
+ ),
+ ]
+ BLOCK_MATCHER = Regexp.union(*BLOCK_TYPES.map(&:localizer))
+ INLINE_TYPES = [
+ LinksType.new,
+ EmphasisType.new,
+ ]
+ ESCAPE_SEQUENCES = [
+ [/&/, "&"],
+ [/</, "<"],
+ [/>/, ">"],
+ [/"/, """],
+ ]
+
+ def to_html
+ result = StringUtils::gsub_nonmatching_parts(@text, BLOCK_MATCHER) do |text|
+ remove_whitespaces(text)
+ end
+ result = StringUtils::gsub_nonmatching_parts(result, BLOCK_MATCHER) do |text|
+ apply_formatting(escape(text)).gsub(/\n\n(\n*)/, "\n</p>\\1<p>\n")
+ end
+
+ BLOCK_TYPES.each { |block| result = parse_block(result, block) }
+
+ arrange_paragraphs("<p>" + result + "</p>")
+ end
+
+ alias to_s to_html
+
+ def inspect
+ @text
+ end
+
+ private
+
+ def remove_whitespaces(text)
+ text.gsub(/^[[:blank:]]*(.*\S)?[[:blank:]]*$/, "\\1")
+ end
+
+ def add_tags(text, tags)
+ "<#{[*tags].join "><"}>#{text}</#{[*tags].reverse.join "></"}>"
+ end
+
+ def escape(text)
+ result = text
+
+ ESCAPE_SEQUENCES.each do |pattern, replacement|
+ result = result.gsub(pattern, replacement)
+ end
+
+ result
+ end
+
+ def apply_formatting(text)
+ result = remove_whitespaces(text)
+
+ INLINE_TYPES.each do |inline|
+ result = result.gsub(inline.matcher) { inline.format($~) }
+ end
+
+ result
+ end
+
+ def parse_tokens(match, block)
+ match.to_s.gsub(block.tokens_matcher) do
+ token = escape($~["token"])
+ token = apply_formatting(token) if block.allows_formatting?
+ token = " " + add_tags(token, block.token_tags) unless block.token_tags.nil?
+ token
+ end
+ end
+
+ def parse_block(text, block)
+ text.gsub(block.localizer) do
+ match = $~
+ replacement = parse_tokens(match, block)
+ replacement = "\n" + replacement + "\n" unless block.token_tags.nil?
+ if block.implies_paragraph_parsing?
+ replacement = "<p>" + replacement.gsub(/(\n{2,})/, "</p>\\1<p>") + "</p>"
+ end
+ "</p>" + add_tags(replacement, block.tags(match)) + "<p>"
+ end
+ end
+
+ def arrange_paragraphs(text)
+ result = text
+ fixes = [
+ [/<p>(\n+)/, "\\1<p>"],
+ [/(\n+)<\/p>/, "</p>\\1"],
+ [/<p>\s*<\/p>/, ""],
+ ]
+ fixes.each { |pattern, fix| result = result.gsub(pattern, fix) }
+
+ result.strip
+ end
+end