From bd4cccd91ad4061abc872a38720960c125c6dcaa Mon Sep 17 00:00:00 2001
From: Will Bradley <bradley.will@gmail.com>
Date: Wed, 5 Mar 2014 21:27:57 -0700
Subject: [PATCH] Refining post/author/dump behavior

---
 README.rdoc             |  4 +++
 lib/wordpress/author.rb | 15 +++++++--
 lib/wordpress/dump.rb   |  5 +++
 lib/wordpress/page.rb   | 23 +++++++++-----
 lib/wordpress/post.rb   | 69 +++++++++++++++++------------------------
 5 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/README.rdoc b/README.rdoc
index 976da2d..0e55154 100644
--- a/README.rdoc
+++ b/README.rdoc
@@ -11,6 +11,10 @@ If your site (blog) structure uses new urls, the links WILL break! For example,
 the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug". 
 So your inner site links will point to the old WP url. 
 
+== TODO
+- attachments
+- categories
+- tags
 
 == Prerequisites
 
diff --git a/lib/wordpress/author.rb b/lib/wordpress/author.rb
index 105693e..c88a2a6 100644
--- a/lib/wordpress/author.rb
+++ b/lib/wordpress/author.rb
@@ -6,6 +6,12 @@ module WordPressImport
       @author_node = author_node
     end
 
+    def name
+      name = author_node.xpath("wp:author_display_name").text
+      name = author_node.xpath("wp:author_first_name").text + " " + author_node.xpath("wp:author_first_name").text if name.blank?
+      name
+    end
+
     def login
       author_node.xpath("wp:author_login").text
     end
@@ -22,13 +28,16 @@ module WordPressImport
       "WordPress::Author: #{login} <#{email}>"
     end
 
-    def to_refinery
-      user = User.find_or_initialize_by_username_and_email(login, email)
+    def to_rails
+      user = ::User.find_or_initialize_by_email(email)
+      user.wp_username = login
+
       unless user.persisted?
+        user.name = name
         user.password = 'password'
         user.password_confirmation = 'password'
-        user.save
       end
+      user.save
       user
     end
   end
diff --git a/lib/wordpress/dump.rb b/lib/wordpress/dump.rb
index 41cac23..c5ceb04 100644
--- a/lib/wordpress/dump.rb
+++ b/lib/wordpress/dump.rb
@@ -9,6 +9,11 @@ module WordPressImport
         unless File.file?(file_name) && File.readable?(file_name)
       
       file = File.open(file_name)
+      
+      if file.size >= 10485760 # 10MB
+        puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks, or double-checking the import results."
+      end
+
       @doc = Nokogiri::XML(file)
     end
 
diff --git a/lib/wordpress/page.rb b/lib/wordpress/page.rb
index c2e1827..32f24aa 100644
--- a/lib/wordpress/page.rb
+++ b/lib/wordpress/page.rb
@@ -38,7 +38,15 @@ module WordPressImport
     end
 
     def post_date
-      DateTime.parse node.xpath("wp:post_date").text
+      Time.parse node.xpath("wp:post_date").text
+    end
+
+    def publish_date
+      Time.parse node.xpath("pubDate").text
+    end
+
+    def post_name
+      node.xpath("wp:post_name").text
     end
 
     def post_id
@@ -67,7 +75,7 @@ module WordPressImport
     end
 
     #NEED:
-    # dc:creator ->  "user_id"
+    # creator ->  "user_id"
     # wp:post_name ->   "slug"
     # pubDate -> "published_at"
     #OK:
@@ -75,12 +83,11 @@ module WordPressImport
     # content:encoded ->     "body"
     # wp:post_date_gmt -> "created_at"
 
-    def to_refinery
+    def to_rails
+      # :user_id => creator
       page = ::Page.create!(:id => post_id, :title => title, 
-        :created_at => post_date, :draft => draft?)
-
-      page.parts.create(:title => 'Body', :body => content_formatted)
-      page
+        :created_at => post_date, :slug => post_name, 
+        :published_at => publish_date, :body => content_formatted)
     end
 
     private 
@@ -92,7 +99,7 @@ module WordPressImport
       text = ''.html_safe if text.nil?
       start_tag = tag('p', html_options, true)
       
-      text.gsub!(/\r\n?/, "\n")                    # \r\n and \r -> \n
+      text.gsub!(/\r?\n/, "<br/>\n")               # \r\n and \n -> line break
       text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}")  # 2+ newline  -> paragraph
       text.insert 0, start_tag
 
diff --git a/lib/wordpress/post.rb b/lib/wordpress/post.rb
index 453b61d..62a7d98 100644
--- a/lib/wordpress/post.rb
+++ b/lib/wordpress/post.rb
@@ -29,53 +29,40 @@ module WordPressImport
       end
     end
 
-    def to_refinery
-      user = ::User.find_by_username(creator) || ::User.first
-      raise "Referenced User doesn't exist! Make sure the authors are imported first." \
-        unless user
+    def to_rails
 
-      begin
-        post = ::BlogPost.new :title => title, :body => content_formatted,
-          :draft => draft?, :published_at => post_date, :created_at => post_date,
-          :user_id => user.id, :tag_list => tag_list
-        post.save!
+      user = ::User.find_by_wp_username(creator)
 
-        ::BlogPost.transaction do
-          categories.each do |category|
-            post.categories << category.to_refinery
-          end
-
-          comments.each do |comment|
-            comment = comment.to_refinery
-            comment.post = post
-            comment.save
-          end
-        end
-      rescue ActiveRecord::RecordInvalid
-        # if the title has already been taken (WP allows duplicates here,
-        # refinery doesn't) append the post_id to it, making it unique
-        post.title = "#{title}-#{post_id}"
-        post.save
+      if user.nil? 
+        raise "User with wp_username #{creator} not found"
       end
 
-      post
-    end
+      post = ::Post.find_or_initialize_by(:id => post_id, :slug => post_name)
 
-    def self.create_blog_page_if_necessary
-      # refinerycms wants a page at /blog, so let's make sure there is one
-      # taken from the original db seeds from refinery-blog
-      unless ::Page.where("link_url = ?", '/blog').exists?
-        page = ::Page.create(
-          :title => "Blog",
-          :link_url => "/blog",
-          :deletable => false,
-          :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
-          :menu_match => "^/blogs?(\/|\/.+?|)$"
-        )
+      post.assign_attributes( 
+        :user_id => user.id, :title => title, 
+        :created_at => post_date, 
+        :published_at => publish_date)
+      # :body => content_formatted taken care of by translation below
 
-        ::Page.default_parts.each do |default_page_part|
-          page.parts.create(:title => default_page_part, :body => nil)
-        end
+      if post.translations.blank?
+        translation = post.translations.build
+      else
+        translation = post.translations.first
+      end
+      
+      translation.locale = "en"
+      translation.title = title
+      translation.body = content_formatted
+      translation.save
+      
+      post.save
+
+      if post.errors.blank?
+        return post.reload
+      else
+        puts post.inspect
+        raise post.errors.full_messages.to_s
       end
     end