module WordPressImport class Page include ::ActionView::Helpers::TagHelper include ::ActionView::Helpers::TextHelper attr_reader :node def initialize(node) @node = node end def inspect "WordPress::Page(#{post_id}): #{title}" end def title node.xpath("title").text end def content node.xpath("content:encoded").text end def content_formatted formatted = format_syntax_highlighter(format_paragraphs(content)) # remove all tags inside
that simple_format created # TODO: replace format_paragraphs with a method, that ignores pre-tags formatted.gsub!(/()(.+?)(<\/pre>)/m) do |match| "#{$1}#{strip_tags($2)}#{$3}" end formatted end def creator node.xpath("dc:creator").text end def post_date Time.parse node.xpath("wp:post_date").text end def publish_date Time.parse node.xpath("pubDate").text end def post_name node.xpath("wp:post_name").text end def post_id node.xpath("wp:post_id").text.to_i end def parent_id dump_id = node.xpath("wp:post_parent").text.to_i dump_id == 0 ? nil : dump_id end def status node.xpath("wp:status").text end def draft? status != 'publish' end def published? ! draft? end def ==(other) post_id == other.post_id end #NEED: # creator -> "user_id" # wp:post_name -> "slug" # pubDate -> "published_at" #OK: # title -> "title" # content:encoded -> "body" # wp:post_date_gmt -> "created_at" def to_rails # :user_id => creator page = ::Page.create!(:id => post_id, :title => title, :created_at => post_date, :slug => post_name, :published_at => publish_date, :body => content_formatted) end private def format_paragraphs(text, html_options={}) # WordPress doesn't export -Tags, so let's run a simple_format over # the content. As we trust ourselves, no sanatize. This code is heavily # inspired by the simple_format rails helper text = ''.html_safe if text.nil? start_tag = tag('p', html_options, true) text.gsub!(/\r?\n/, "
\n\n#{start_tag}") # 2+ newline -> paragraph text.insert 0, start_tag text.html_safe.safe_concat("") end def format_syntax_highlighter(text) # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/): # In WordPress you can (via a plugin) enclose code in [lang][/lang] # blocks, which are converted to a
\n") # \r\n and \n -> line break text.gsub!(/\n\n+/, "-tag with a class corresponding # to the language. # # Example: # [ruby]p "Hello World"[/ruby] # ->p "Hello world"text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '\2') end end end