From bd4cccd91ad4061abc872a38720960c125c6dcaa Mon Sep 17 00:00:00 2001 From: Will Bradley Date: Wed, 5 Mar 2014 21:27:57 -0700 Subject: [PATCH] Refining post/author/dump behavior --- README.rdoc | 4 +++ lib/wordpress/author.rb | 15 +++++++-- lib/wordpress/dump.rb | 5 +++ lib/wordpress/page.rb | 23 +++++++++----- lib/wordpress/post.rb | 69 +++++++++++++++++------------------------ 5 files changed, 64 insertions(+), 52 deletions(-) diff --git a/README.rdoc b/README.rdoc index 976da2d..0e55154 100644 --- a/README.rdoc +++ b/README.rdoc @@ -11,6 +11,10 @@ If your site (blog) structure uses new urls, the links WILL break! For example, the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug". So your inner site links will point to the old WP url. +== TODO +- attachments +- categories +- tags == Prerequisites diff --git a/lib/wordpress/author.rb b/lib/wordpress/author.rb index 105693e..c88a2a6 100644 --- a/lib/wordpress/author.rb +++ b/lib/wordpress/author.rb @@ -6,6 +6,12 @@ module WordPressImport @author_node = author_node end + def name + name = author_node.xpath("wp:author_display_name").text + name = author_node.xpath("wp:author_first_name").text + " " + author_node.xpath("wp:author_first_name").text if name.blank? + name + end + def login author_node.xpath("wp:author_login").text end @@ -22,13 +28,16 @@ module WordPressImport "WordPress::Author: #{login} <#{email}>" end - def to_refinery - user = User.find_or_initialize_by_username_and_email(login, email) + def to_rails + user = ::User.find_or_initialize_by_email(email) + user.wp_username = login + unless user.persisted? + user.name = name user.password = 'password' user.password_confirmation = 'password' - user.save end + user.save user end end diff --git a/lib/wordpress/dump.rb b/lib/wordpress/dump.rb index 41cac23..c5ceb04 100644 --- a/lib/wordpress/dump.rb +++ b/lib/wordpress/dump.rb @@ -9,6 +9,11 @@ module WordPressImport unless File.file?(file_name) && File.readable?(file_name) file = File.open(file_name) + + if file.size >= 10485760 # 10MB + puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks, or double-checking the import results." + end + @doc = Nokogiri::XML(file) end diff --git a/lib/wordpress/page.rb b/lib/wordpress/page.rb index c2e1827..32f24aa 100644 --- a/lib/wordpress/page.rb +++ b/lib/wordpress/page.rb @@ -38,7 +38,15 @@ module WordPressImport end def post_date - DateTime.parse node.xpath("wp:post_date").text + Time.parse node.xpath("wp:post_date").text + end + + def publish_date + Time.parse node.xpath("pubDate").text + end + + def post_name + node.xpath("wp:post_name").text end def post_id @@ -67,7 +75,7 @@ module WordPressImport end #NEED: - # dc:creator -> "user_id" + # creator -> "user_id" # wp:post_name -> "slug" # pubDate -> "published_at" #OK: @@ -75,12 +83,11 @@ module WordPressImport # content:encoded -> "body" # wp:post_date_gmt -> "created_at" - def to_refinery + def to_rails + # :user_id => creator page = ::Page.create!(:id => post_id, :title => title, - :created_at => post_date, :draft => draft?) - - page.parts.create(:title => 'Body', :body => content_formatted) - page + :created_at => post_date, :slug => post_name, + :published_at => publish_date, :body => content_formatted) end private @@ -92,7 +99,7 @@ module WordPressImport text = ''.html_safe if text.nil? start_tag = tag('p', html_options, true) - text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n + text.gsub!(/\r?\n/, "
\n") # \r\n and \n -> line break text.gsub!(/\n\n+/, "

\n\n#{start_tag}") # 2+ newline -> paragraph text.insert 0, start_tag diff --git a/lib/wordpress/post.rb b/lib/wordpress/post.rb index 453b61d..62a7d98 100644 --- a/lib/wordpress/post.rb +++ b/lib/wordpress/post.rb @@ -29,53 +29,40 @@ module WordPressImport end end - def to_refinery - user = ::User.find_by_username(creator) || ::User.first - raise "Referenced User doesn't exist! Make sure the authors are imported first." \ - unless user + def to_rails - begin - post = ::BlogPost.new :title => title, :body => content_formatted, - :draft => draft?, :published_at => post_date, :created_at => post_date, - :user_id => user.id, :tag_list => tag_list - post.save! + user = ::User.find_by_wp_username(creator) - ::BlogPost.transaction do - categories.each do |category| - post.categories << category.to_refinery - end - - comments.each do |comment| - comment = comment.to_refinery - comment.post = post - comment.save - end - end - rescue ActiveRecord::RecordInvalid - # if the title has already been taken (WP allows duplicates here, - # refinery doesn't) append the post_id to it, making it unique - post.title = "#{title}-#{post_id}" - post.save + if user.nil? + raise "User with wp_username #{creator} not found" end - post - end + post = ::Post.find_or_initialize_by(:id => post_id, :slug => post_name) - def self.create_blog_page_if_necessary - # refinerycms wants a page at /blog, so let's make sure there is one - # taken from the original db seeds from refinery-blog - unless ::Page.where("link_url = ?", '/blog').exists? - page = ::Page.create( - :title => "Blog", - :link_url => "/blog", - :deletable => false, - :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1), - :menu_match => "^/blogs?(\/|\/.+?|)$" - ) + post.assign_attributes( + :user_id => user.id, :title => title, + :created_at => post_date, + :published_at => publish_date) + # :body => content_formatted taken care of by translation below - ::Page.default_parts.each do |default_page_part| - page.parts.create(:title => default_page_part, :body => nil) - end + if post.translations.blank? + translation = post.translations.build + else + translation = post.translations.first + end + + translation.locale = "en" + translation.title = title + translation.body = content_formatted + translation.save + + post.save + + if post.errors.blank? + return post.reload + else + puts post.inspect + raise post.errors.full_messages.to_s end end