Камен обнови решението на 21.11.2011 00:55 (преди около 13 години)
+class String
+ # Substitute the regex with the replacement until there is no match
+ # This is used instead of gsub in situations that each replacement depends on
+ # the previous replacement (because the search for the regex always begins
+ # at the start of the string)
+ def sub_match(regex, replacement)
+ result = self
+ new_formated = result.sub(regex, replacement)
+ while result != new_formated do
+ result = new_formated
+ new_formated = result.sub(regex, replacement)
+ end
+
+ return result
+ end
+end
+
+class GeneralFormatter
+ def escape_symbols(string)
+ # Escape symbols
+ formated = string
+ formated = formated.gsub(/&/, '&')
+ formated = formated.gsub(/</, '<')
+ formated = formated.gsub(/(?<content>.)>/, '\k<content>>')
+ formated = formated.gsub(/"/, '"')
+ end
+
+ def remove_whitespace(string)
+ # Remove whitespace from beginning and end of lines
+ formated = string
+ formated = formated.gsub(/^ {1,3}(?<content>\S)/, '\k<content>')
+ formated = formated.gsub(/^(?<content>[^\s>].*?) *$/, '\k<content>')
+ end
+
+ def remove_lines(string)
+ # Remove beginning and ending empty lines
+ formated = string
+ formated = formated.sub_match(/\A *\n/, '')
+ formated = formated.sub_match(/ *\n*\z/, '')
+ end
+end
+
+class TagFormatter
+ def format_tags(string)
+ formated = string
+
+ formated = format_headers(formated)
+ formated = format_styles(formated)
+
+ formated = format_ordered_lists(formated)
+ formated = format_unordered_lists(formated)
+
+ # Quotes (Content is not empty - create <blockquote> for each line
+ # and then remove as appropriate to leave only the enclosing ones)
+ formated = format_quotes(formated)
+
+ formated = format_code_block(formated)
+ formated = format_paragraph(formated)
+ end
+
+ def format_paragraph_lists(string)
+ formated = string
+ # A list cannot be inside a paragraph
+ formated = formated.gsub('<p><li>', ' <li>')
+ formated = formated.gsub('</li></p>', '</li>')
+ formated = formated.gsub('<p><ul></p>', '<ul>')
+ formated = formated.gsub('<p></ul></p>', '</ul>')
+ formated = formated.gsub('<p><ol></p>', '<ol>')
+ formated = formated.gsub('<p></ol></p>', '</ol>')
+ end
+
+ def format_paragraph_codeblocks(string)
+ formated = string
+ # A header cannot be inside a paragraph
+ formated = formated.gsub(/<p>(?<content><h[1234]>)/, '\k<content>')
+ formated = formated.gsub(/(?<content><\/h[1234]>)<\/p>/, '\k<content>')
+ # A blockquote cannot be inside a paragraph
+ formated = formated.gsub('<p><blockquote>', '<blockquote><p>')
+ formated = formated.gsub('</blockquote></p>', '</p></blockquote>')
+ # A code cannot be inside a paragraph
+ formated = formated.gsub('<p><pre><code>', '<pre><code>')
+ formated = formated.gsub('</code></pre></p>', '</code></pre>')
+ formated = formated.gsub(/<\/code><\/pre>\n<pre><code>/, "\n")
+ end
+
+ def format_paragraph(string)
+ # Paragraphs
+ formated = string
+ formated = formated.gsub(/^ *(?<content>\S)/, '<p>\k<content>')
+ formated = formated.gsub(/(?<content>\S) *$/, '\k<content></p>')
+
+ formated = format_paragraph_codeblocks(formated)
+ formated = format_paragraph_lists(formated)
+
+ formated = formated.gsub('<p></p>', '')
+ formated = formated.gsub(/<\/p>\n<p>/, "\n")
+ end
+
+ def format_headers(string)
+ # Headers - they can have many leading spaces, but "content" should not be empty
+ formated = string
+ formated = formated.gsub(/^#### +(?<content>\S.*)/, '<h4>\k<content></h4>')
+ formated = formated.gsub(/^### +(?<content>\S.*)/, '<h3>\k<content></h3>')
+ formated = formated.gsub(/^## +(?<content>\S.*)/, '<h2>\k<content></h2>')
+ formated = formated.gsub(/^# +(?<content>\S.*)/, '<h1>\k<content></h1>')
+ end
+
+ def format_styles(string)
+ formated = string
+ # Italic
+ formated = formated.gsub(/_(?<content>.+?)_/, '<em>\k<content></em>')
+ # Strong
+ formated = formated.gsub(/\*\*(?<content>.+?)\*\*/, '<strong>\k<content></strong>')
+
+ # Fix incorrect Strong/Italic mix-up (carefull with the regex greed)
+ formated = formated.sub_match(/(?<c1><strong>.*?)<em>(?<c2>.*?<\/strong>.*?)<\/em>/,
+ '\k<c1>_\k<c2>_')
+ formated = formated.sub_match(/(?<c1><em>.*?)<strong>(?<c2>.*?<\/em>.*?)<\/strong>/,
+ '\k<c1>**\k<c2>**')
+
+ # Links (both content and link should not be empty)
+ formated = formated.gsub(/\[(?<c>.+)\]\((?<l>.+)\)/, '<a href="\k<l>">\k<c></a>')
+ end
+
+ def format_quotes(string)
+ formated = string
+ formated = formated.gsub(/^> (?<content>.*)/, '<blockquote>\k<content></blockquote>')
+ formated = formated.gsub(/<\/blockquote>\n<blockquote>/, "\n")
+ end
+
+ def format_unordered_lists(string)
+ formated = string
+ formated = formated.gsub(/^\* (?<content>.+)/, '<ul> <li>\k<content></li></ul>')
+ formated = formated.gsub(/<\/ul>\n<ul>/, "\n")
+ formated = formated.gsub('<ul>', "<ul>\n")
+ formated = formated.gsub('</ul>', "\n</ul>")
+ end
+
+ def format_ordered_lists(string)
+ formated = string
+ formated = formated.gsub(/^[0-9]\. (?<content>.+)/, '<ol> <li>\k<content></li></ol>')
+ formated = formated.gsub(/<\/ol>\n<ol>/, "\n")
+ formated = formated.gsub('<ol>', "<ol>\n")
+ formated = formated.gsub('</ol>', "\n</ol>")
+ end
+
+ def format_code_block(string)
+ # Code
+ format = string
+ format = format.gsub(/^ {4,4}(?<c>.*)/, '<pre><code>\k<c></code></pre>')
+
+ # Remove italic,strong and links inside code blocks
+ format = format.sub_match(/^<pre><code>(?<c1>.*?)<em>(?<c2>.*?)<\/em>/,
+ '<pre><code>\k<c1>_\k<c2>_')
+
+ format = format.sub_match(/^<pre><code>(?<c1>.*?)<strong>(?<c2>.*?)<\/strong>/,
+ '<pre><code>\k<c1>**\k<c2>**')
+
+ format = format.sub_match(/^<pre><code>(?<c>.*?)<a href="(?<l>.*?)">(?<d>.*?)<\/a>/,
+ '<pre><code>\k<c>[\k<d>](\k<l>)')
+ end
+end
+
+class Formatter
+ def initialize(unformated_string)
+ @unformated_string = unformated_string
+ end
+
+ def to_html
+ formated = @unformated_string
+ general_formatter = GeneralFormatter.new
+ tag_formatter = TagFormatter.new
+
+ formated = general_formatter.escape_symbols(formated)
+ formated = general_formatter.remove_whitespace(formated)
+
+ formated = tag_formatter.format_tags(formated)
+
+ formated = general_formatter.remove_lines(formated)
+
+ return formated
+ end
+
+ def to_s
+ return to_html
+ end
+
+ def inspect
+ return @unformated_string
+ end
+end