From 90665484c760efdfa8fd71f21056b5eac73a5087 Mon Sep 17 00:00:00 2001 From: Marc Remolt Date: Mon, 13 Jun 2011 18:48:17 +0200 Subject: [PATCH] Resource import working * added rake tasks --- lib/tasks/wordpress.rake | 51 +++++++++++++--- lib/wordpress/attachment.rb | 83 ++++++++++++++++++++++----- spec/lib/wordpress/attachment_spec.rb | 37 ++++++++++-- spec/spec_helper.rb | 12 ---- spec/support/fakeweb.rb | 12 ++++ 5 files changed, 154 insertions(+), 41 deletions(-) create mode 100644 spec/support/fakeweb.rb diff --git a/lib/tasks/wordpress.rake b/lib/tasks/wordpress.rake index 44c9bbe..b629298 100644 --- a/lib/tasks/wordpress.rake +++ b/lib/tasks/wordpress.rake @@ -19,7 +19,6 @@ namespace :wordpress do dump = Refinery::WordPress::Dump.new(params[:file_name]) dump.authors.each(&:to_refinery) - attachments = dump.attachments.each(&:to_refinery) only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false dump.posts(only_published).each(&:to_refinery) @@ -29,12 +28,6 @@ namespace :wordpress do ENV["MODEL"] = 'BlogPost' Rake::Task["friendly_id:redo_slugs"].invoke ENV.delete("MODEL") - - # parse all created BlogPosts bodys and replace the old wordpress image uls - # with the newly created ones - attachments.each do |attachment| - attachment.replace_image_url_in_blog_posts - end end desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump" @@ -55,7 +48,7 @@ namespace :wordpress do end end - desc "import cms data from a Refinery::WordPress XML dump" + desc "import cms data from a WordPress XML dump" task :import_pages, :file_name do |task, params| Rake::Task["environment"].invoke dump = Refinery::WordPress::Dump.new(params[:file_name]) @@ -79,10 +72,50 @@ namespace :wordpress do ENV.delete("MODEL") end - desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump" + desc "reset cms tables and then import cms data from a WordPress XML dump" task :reset_and_import_pages, :file_name do |task, params| Rake::Task["environment"].invoke Rake::Task["wordpress:reset_pages"].invoke Rake::Task["wordpress:import_pages"].invoke(params[:file_name]) end + + + desc "Reset the media relevant tables for a clean import" + task :reset_media do + Rake::Task["environment"].invoke + + %w(images resources).each do |table_name| + p "Truncating #{table_name} ..." + ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" + end + end + + desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts" + task :import_and_replace_media, :file_name do |task, params| + Rake::Task["environment"].invoke + dump = Refinery::WordPress::Dump.new(params[:file_name]) + + attachments = dump.attachments.each(&:to_refinery) + + # parse all created BlogPost and Page bodys and replace the old wordpress media uls + # with the newly created ones + attachments.each do |attachment| + attachment.replace_url + end + end + + desc "reset media tables and then import media data from a WordPress XML dump" + task :reset_import_and_replace_media, :file_name do |task, params| + Rake::Task["environment"].invoke + Rake::Task["wordpress:reset_media"].invoke + Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name]) + end + + desc "reset and import all data (see the other tasks)" + task :full_import, :file_name do |task, params| + Rake::Task["environment"].invoke + Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name]) + Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name]) + Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name]) + end end diff --git a/lib/wordpress/attachment.rb b/lib/wordpress/attachment.rb index 3026163..a8ab414 100644 --- a/lib/wordpress/attachment.rb +++ b/lib/wordpress/attachment.rb @@ -3,6 +3,7 @@ module Refinery class Attachment attr_reader :node attr_reader :refinery_image + attr_reader :refinery_resource def initialize(node) @node = node @@ -28,6 +29,14 @@ module Refinery node.xpath("wp:attachment_url").text end + def url_pattern + url_parts = url.split('.') + extension = url_parts.pop + url_without_extension = url_parts.join('.') + + /#{url_without_extension}(-\d+x\d+)?\.#{extension}/ + end + def image? url.match /\.(png|jpg|jpeg|gif)$/ end @@ -36,21 +45,15 @@ module Refinery if image? to_image else - to_file + to_resource end end - def replace_image_url_in_blog_posts - ::BlogPost.all.each do |post| - if post.body.include? url - url_parts = url.split('.') - extension = url_parts.pop - url_without_extension = url_parts.join('.') - pattern = /#{url_without_extension}(-\d+x\d+)?\.#{extension}/ - - post.body = post.body.gsub(pattern, refinery_image.image.url) - post.save! - end + def replace_url + if image? + replace_image_url + else + replace_resource_url end end @@ -66,8 +69,60 @@ module Refinery image end - def to_file - raise "to_file is not implemented yet, sorry!" + def to_resource + resource = ::Resource.new + resource.created_at = post_date + resource.file_url = url + resource.save! + + @refinery_resource = resource + resource + end + + def replace_image_url + replace_image_url_in_blog_posts + replace_image_url_in_pages + end + + def replace_resource_url + replace_resource_url_in_blog_posts + replace_resource_url_in_pages + end + + def replace_image_url_in_blog_posts + replace_url_in_blog_posts(refinery_image.image.url) + end + + def replace_image_url_in_pages + replace_url_in_pages(refinery_image.image.url) + end + + def replace_resource_url_in_blog_posts + replace_url_in_blog_posts(refinery_resource.file.url) + end + + def replace_resource_url_in_pages + replace_url_in_pages(refinery_resource.file.url) + end + + def replace_url_in_blog_posts(new_url) + ::BlogPost.all.each do |post| + if (! post.body.empty?) && post.body.include?(url) + post.body = post.body.gsub(url_pattern, new_url) + post.save! + end + end + end + + def replace_url_in_pages(new_url) + ::Page.all.each do |page| + page.parts.each do |part| + if (! part.body.to_s.blank?) && part.body.include?(url) + part.body = part.body.gsub(url_pattern, new_url) + part.save! + end + end + end end end diff --git a/spec/lib/wordpress/attachment_spec.rb b/spec/lib/wordpress/attachment_spec.rb index bb08ee6..a2b5ee2 100644 --- a/spec/lib/wordpress/attachment_spec.rb +++ b/spec/lib/wordpress/attachment_spec.rb @@ -27,15 +27,16 @@ describe Refinery::WordPress::Attachment, :type => :model do end end - describe "#replace_image_url" do + describe "#replace_url" do let(:post) { BlogPost.first } before do test_dump.authors.each(&:to_refinery) test_dump.posts.each(&:to_refinery) + @image = attachment.to_refinery - attachment.replace_image_url_in_blog_posts + attachment.replace_url end specify { post.body.should_not include attachment.url } @@ -45,8 +46,6 @@ describe Refinery::WordPress::Attachment, :type => :model do it "should replace attachment urls in the generated BlogPosts" do post.body.should include(@image.image.url) end - - end end @@ -60,8 +59,34 @@ describe Refinery::WordPress::Attachment, :type => :model do specify { attachment.should_not be_an_image } describe '#to_refinery' do - it "should raise an exception for now" do - lambda { attachment.to_refinery }.should raise_error + before do + @resource = attachment.to_refinery + end + + specify { Resource.should have(1).record } + specify { @resource.should be_a(Resource) } + + it "should copy the attributes from Attachment" do + @resource.created_at.should == attachment.post_date + @resource.file.url.end_with?(attachment.file_name).should be_true + end + + end + + describe '#replace_resource_url' do + let(:page_part) { Page.last.parts.first } + + before do + test_dump.pages.each(&:to_refinery) + @resource = attachment.to_refinery + attachment.replace_url + end + + specify { page_part.body.should_not include attachment.url } + specify { page_part.body.should_not include 'wp-content' } + + it "should replace attachment urls in the generated BlogPosts" do + page_part.body.should include(@resource.file.url) end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index c5a4018..de25430 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -5,18 +5,6 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__) require "rails/test_help" require "rspec/rails" require "database_cleaner" -require "fakeweb" - -FakeWeb.allow_net_connect = false - -# Simulating download of wordpress file attachments. The dump expects the files -# to be at the given URLs -FakeWeb.register_uri(:get, - "http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png", - :body => File.new('spec/fixtures/200px-Tux.svg_.png').read, - :content_type => "image/png") - -FakeWeb.register_uri(:get, "http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt", :body => "Hello World!", :content_type => "text/plain") ActionMailer::Base.delivery_method = :test ActionMailer::Base.perform_deliveries = true diff --git a/spec/support/fakeweb.rb b/spec/support/fakeweb.rb new file mode 100644 index 0000000..529f447 --- /dev/null +++ b/spec/support/fakeweb.rb @@ -0,0 +1,12 @@ +require "fakeweb" + +FakeWeb.allow_net_connect = false + +# Simulating download of wordpress file attachments. The dump expects the files +# to be at the given URLs +FakeWeb.register_uri(:get, + "http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png", + :body => File.new('spec/fixtures/200px-Tux.svg_.png').read, + :content_type => "image/png") + +FakeWeb.register_uri(:get, "http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt", :body => "Hello World!", :content_type => "text/plain")