Resource import working

* added rake tasks
This commit is contained in:
Marc Remolt 2011-06-13 18:48:17 +02:00
parent a24ea686fa
commit 90665484c7
5 changed files with 154 additions and 41 deletions

View File

@ -19,7 +19,6 @@ namespace :wordpress do
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = Refinery::WordPress::Dump.new(params[:file_name])
dump.authors.each(&:to_refinery) dump.authors.each(&:to_refinery)
attachments = dump.attachments.each(&:to_refinery)
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.posts(only_published).each(&:to_refinery) dump.posts(only_published).each(&:to_refinery)
@ -29,12 +28,6 @@ namespace :wordpress do
ENV["MODEL"] = 'BlogPost' ENV["MODEL"] = 'BlogPost'
Rake::Task["friendly_id:redo_slugs"].invoke Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL") ENV.delete("MODEL")
# parse all created BlogPosts bodys and replace the old wordpress image uls
# with the newly created ones
attachments.each do |attachment|
attachment.replace_image_url_in_blog_posts
end
end end
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump" desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
@ -55,7 +48,7 @@ namespace :wordpress do
end end
end end
desc "import cms data from a Refinery::WordPress XML dump" desc "import cms data from a WordPress XML dump"
task :import_pages, :file_name do |task, params| task :import_pages, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = Refinery::WordPress::Dump.new(params[:file_name])
@ -79,10 +72,50 @@ namespace :wordpress do
ENV.delete("MODEL") ENV.delete("MODEL")
end end
desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump" desc "reset cms tables and then import cms data from a WordPress XML dump"
task :reset_and_import_pages, :file_name do |task, params| task :reset_and_import_pages, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_pages"].invoke Rake::Task["wordpress:reset_pages"].invoke
Rake::Task["wordpress:import_pages"].invoke(params[:file_name]) Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
end end
desc "Reset the media relevant tables for a clean import"
task :reset_media do
Rake::Task["environment"].invoke
%w(images resources).each do |table_name|
p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end
end
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
attachments = dump.attachments.each(&:to_refinery)
# parse all created BlogPost and Page bodys and replace the old wordpress media uls
# with the newly created ones
attachments.each do |attachment|
attachment.replace_url
end
end
desc "reset media tables and then import media data from a WordPress XML dump"
task :reset_import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_media"].invoke
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end
desc "reset and import all data (see the other tasks)"
task :full_import, :file_name do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
end
end end

View File

@ -3,6 +3,7 @@ module Refinery
class Attachment class Attachment
attr_reader :node attr_reader :node
attr_reader :refinery_image attr_reader :refinery_image
attr_reader :refinery_resource
def initialize(node) def initialize(node)
@node = node @node = node
@ -28,6 +29,14 @@ module Refinery
node.xpath("wp:attachment_url").text node.xpath("wp:attachment_url").text
end end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image? def image?
url.match /\.(png|jpg|jpeg|gif)$/ url.match /\.(png|jpg|jpeg|gif)$/
end end
@ -36,21 +45,15 @@ module Refinery
if image? if image?
to_image to_image
else else
to_file to_resource
end end
end end
def replace_image_url_in_blog_posts def replace_url
::BlogPost.all.each do |post| if image?
if post.body.include? url replace_image_url
url_parts = url.split('.') else
extension = url_parts.pop replace_resource_url
url_without_extension = url_parts.join('.')
pattern = /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
post.body = post.body.gsub(pattern, refinery_image.image.url)
post.save!
end
end end
end end
@ -66,8 +69,60 @@ module Refinery
image image
end end
def to_file def to_resource
raise "to_file is not implemented yet, sorry!" resource = ::Resource.new
resource.created_at = post_date
resource.file_url = url
resource.save!
@refinery_resource = resource
resource
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_image.image.url)
end
def replace_image_url_in_pages
replace_url_in_pages(refinery_image.image.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(refinery_resource.file.url)
end
def replace_url_in_blog_posts(new_url)
::BlogPost.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.parts.each do |part|
if (! part.body.to_s.blank?) && part.body.include?(url)
part.body = part.body.gsub(url_pattern, new_url)
part.save!
end
end
end
end end
end end

View File

@ -27,15 +27,16 @@ describe Refinery::WordPress::Attachment, :type => :model do
end end
end end
describe "#replace_image_url" do describe "#replace_url" do
let(:post) { BlogPost.first } let(:post) { BlogPost.first }
before do before do
test_dump.authors.each(&:to_refinery) test_dump.authors.each(&:to_refinery)
test_dump.posts.each(&:to_refinery) test_dump.posts.each(&:to_refinery)
@image = attachment.to_refinery @image = attachment.to_refinery
attachment.replace_image_url_in_blog_posts attachment.replace_url
end end
specify { post.body.should_not include attachment.url } specify { post.body.should_not include attachment.url }
@ -45,8 +46,6 @@ describe Refinery::WordPress::Attachment, :type => :model do
it "should replace attachment urls in the generated BlogPosts" do it "should replace attachment urls in the generated BlogPosts" do
post.body.should include(@image.image.url) post.body.should include(@image.image.url)
end end
end end
end end
@ -60,8 +59,34 @@ describe Refinery::WordPress::Attachment, :type => :model do
specify { attachment.should_not be_an_image } specify { attachment.should_not be_an_image }
describe '#to_refinery' do describe '#to_refinery' do
it "should raise an exception for now" do before do
lambda { attachment.to_refinery }.should raise_error @resource = attachment.to_refinery
end
specify { Resource.should have(1).record }
specify { @resource.should be_a(Resource) }
it "should copy the attributes from Attachment" do
@resource.created_at.should == attachment.post_date
@resource.file.url.end_with?(attachment.file_name).should be_true
end
end
describe '#replace_resource_url' do
let(:page_part) { Page.last.parts.first }
before do
test_dump.pages.each(&:to_refinery)
@resource = attachment.to_refinery
attachment.replace_url
end
specify { page_part.body.should_not include attachment.url }
specify { page_part.body.should_not include 'wp-content' }
it "should replace attachment urls in the generated BlogPosts" do
page_part.body.should include(@resource.file.url)
end end
end end
end end

View File

@ -5,18 +5,6 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__)
require "rails/test_help" require "rails/test_help"
require "rspec/rails" require "rspec/rails"
require "database_cleaner" require "database_cleaner"
require "fakeweb"
FakeWeb.allow_net_connect = false
# Simulating download of wordpress file attachments. The dump expects the files
# to be at the given URLs
FakeWeb.register_uri(:get,
"http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png",
:body => File.new('spec/fixtures/200px-Tux.svg_.png').read,
:content_type => "image/png")
FakeWeb.register_uri(:get, "http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt", :body => "Hello World!", :content_type => "text/plain")
ActionMailer::Base.delivery_method = :test ActionMailer::Base.delivery_method = :test
ActionMailer::Base.perform_deliveries = true ActionMailer::Base.perform_deliveries = true

12
spec/support/fakeweb.rb Normal file
View File

@ -0,0 +1,12 @@
require "fakeweb"
FakeWeb.allow_net_connect = false
# Simulating download of wordpress file attachments. The dump expects the files
# to be at the given URLs
FakeWeb.register_uri(:get,
"http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png",
:body => File.new('spec/fixtures/200px-Tux.svg_.png').read,
:content_type => "image/png")
FakeWeb.register_uri(:get, "http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt", :body => "Hello World!", :content_type => "text/plain")