wordpress-import/lib/tasks/wordpress.rake

188 lines
7.0 KiB
Ruby
Raw Normal View History

require 'wordpress'
namespace :wordpress do
desc "Reset the blog relevant tables for a clean import"
task :reset_blog do
Rake::Task["environment"].invoke
2014-03-07 00:25:39 +00:00
%w(posts post_translations taggings tags).each do |table_name|
p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end
end
2014-03-04 22:47:53 +00:00
desc "import blog data from a WordPressImport XML dump"
task :import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
p "Loading XML from #{params[:file_name]} (using blog #{params[:blog_slug]}) ..."
2014-03-04 22:47:53 +00:00
dump = WordPressImport::Dump.new(params[:file_name])
2014-03-07 00:25:39 +00:00
p "Importing #{dump.authors.count} authors ..."
dump.authors.each(&:to_rails)
2014-03-07 00:25:39 +00:00
# by default, import all; unless $ONLY_PUBLISHED = "true"
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
2014-03-07 00:25:39 +00:00
p "Importing #{dump.posts(only_published).count} posts ..."
if only_published
p "(only published posts)"
else
p "(export ONLY_PUBLISHED=true to import only published posts)"
end
dump.posts(only_published).each{|p| p.to_rails(params[:blog_slug]) }
end
2014-03-04 22:47:53 +00:00
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
task :reset_and_import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke
Rake::Task["wordpress:import_blog"].invoke(params[:file_name], params[:blog_slug])
end
2014-03-12 18:11:07 +00:00
desc "download images in posts to public folder"
task :download_post_images, :host_match do |task, params|
raise "Error: you must specify a host to match for this download (i.e. rake wordpress:download_post_images['mywebsite']" if params[:host_match].blank?
2014-03-12 18:11:07 +00:00
Rake::Task["environment"].invoke
2014-03-12 18:11:07 +00:00
# scrape images
2014-03-12 22:14:14 +00:00
@posts = ::Post.all
@posts.each do |post|
2014-03-12 18:11:07 +00:00
doc = Nokogiri::HTML(post.body)
doc.css("img").each do |img|
# find remote file path
remote_file = img.attributes["src"].text
# load uri
begin
remote_uri = URI(remote_file)
2014-03-12 22:14:14 +00:00
rescue => error
puts "Error parsing URL #{remote_file}: #{error.message}"
end
2014-03-12 18:11:07 +00:00
2014-03-12 22:14:14 +00:00
# only download if the image is a LFA-hosted image
if remote_uri && remote_uri.host.match(params[:host_match]) != nil
# find a local path for it
local_file = File.expand_path(File.join(Rails.public_path,remote_uri.path))
# only download if not already there or if it's zero bytes
unless File.size?(local_file)
# create local folders if necessary
dirname = File.dirname(local_file)
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end
# save remote file to local
begin
remote_file_io = open(remote_file)
File.open(local_file,'wb'){ |f| f.write(remote_file_io.read) }
2014-03-12 22:57:20 +00:00
puts "Saved file: #{local_file}"
2014-03-12 22:14:14 +00:00
rescue OpenURI::HTTPError => error
puts "Error saving file #{remote_file}: #{error.message}"
2014-03-12 18:11:07 +00:00
end
end
end
end
end
2014-03-12 22:14:14 +00:00
puts "Finished downloding images from #{@posts.count} posts"
end
2014-03-12 18:11:07 +00:00
2014-03-07 00:25:39 +00:00
# desc "Reset the cms relevant tables for a clean import"
# task :reset_pages do
# Rake::Task["environment"].invoke
2014-03-07 00:25:39 +00:00
# %w(page_part_translations page_translations page_parts pages).each do |table_name|
# p "Truncating #{table_name} ..."
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
# end
# end
2014-03-07 00:25:39 +00:00
# desc "import cms data from a WordPress XML dump"
# task :import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# dump = WordPressImport::Dump.new(params[:file_name])
2014-03-07 00:25:39 +00:00
# only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
# dump.pages(only_published).each(&:to_rails)
2014-03-07 00:25:39 +00:00
# # After all pages are persisted we can now create the parent - child
# # relationships. This is necessary, as WordPress doesn't dump the pages in
# # a correct order.
# dump.pages(only_published).each do |dump_page|
# page = ::Page.find(dump_page.post_id)
# page.parent_id = dump_page.parent_id
# page.save!
# end
# WordPressImport::Post.create_blog_page_if_necessary
2014-03-07 00:25:39 +00:00
# ENV["MODEL"] = 'Page'
# Rake::Task["friendly_id:redo_slugs"].invoke
# ENV.delete("MODEL")
# end
2014-03-07 00:25:39 +00:00
# desc "reset cms tables and then import cms data from a WordPress XML dump"
# task :reset_and_import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# Rake::Task["wordpress:reset_pages"].invoke
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
# end
desc "Reset the media relevant tables for a clean import"
task :reset_media do
Rake::Task["environment"].invoke
2014-03-07 00:25:39 +00:00
%w(rich_rich_files).each do |table_name|
p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end
end
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
2014-03-04 22:47:53 +00:00
dump = WordPressImport::Dump.new(params[:file_name])
p "Importing #{dump.attachments.each_slice(200).first.count} attachments ..."
attachments = dump.attachments.each_slice(200).first.each(&:to_rails)
unless $ATTACHMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED IMPORTING ATTACHMENTS:"
$ATTACHMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
2014-03-07 00:25:39 +00:00
# parse all created Post and Page bodys and replace the old wordpress media urls
# with the newly created ones
2014-03-07 00:25:39 +00:00
p "Replacing attachment URLs found in posts/pages ..."
attachments.each(&:replace_url)
unless $REPLACEMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED REPLACING ATTACHMENTS:"
$REPLACEMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
end
desc "reset media tables and then import media data from a WordPress XML dump"
task :reset_import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_media"].invoke
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end
desc "reset and import all data (see the other tasks)"
task :full_import, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name],params[:blog_slug])
2014-03-07 00:25:39 +00:00
#Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
#Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end
end