188 lines
7.0 KiB
Ruby
188 lines
7.0 KiB
Ruby
require 'wordpress'
|
|
|
|
namespace :wordpress do
|
|
desc "Reset the blog relevant tables for a clean import"
|
|
task :reset_blog do
|
|
Rake::Task["environment"].invoke
|
|
|
|
%w(posts post_translations taggings tags).each do |table_name|
|
|
p "Truncating #{table_name} ..."
|
|
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
|
end
|
|
|
|
end
|
|
|
|
desc "import blog data from a WordPressImport XML dump"
|
|
task :import_blog, :file_name, :blog_slug do |task, params|
|
|
Rake::Task["environment"].invoke
|
|
p "Loading XML from #{params[:file_name]} (using blog #{params[:blog_slug]}) ..."
|
|
dump = WordPressImport::Dump.new(params[:file_name])
|
|
|
|
p "Importing #{dump.authors.count} authors ..."
|
|
dump.authors.each(&:to_rails)
|
|
|
|
# by default, import all; unless $ONLY_PUBLISHED = "true"
|
|
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
|
p "Importing #{dump.posts(only_published).count} posts ..."
|
|
|
|
if only_published
|
|
p "(only published posts)"
|
|
else
|
|
p "(export ONLY_PUBLISHED=true to import only published posts)"
|
|
end
|
|
|
|
dump.posts(only_published).each{|p| p.to_rails(params[:blog_slug]) }
|
|
end
|
|
|
|
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
|
|
task :reset_and_import_blog, :file_name, :blog_slug do |task, params|
|
|
Rake::Task["environment"].invoke
|
|
Rake::Task["wordpress:reset_blog"].invoke
|
|
Rake::Task["wordpress:import_blog"].invoke(params[:file_name], params[:blog_slug])
|
|
end
|
|
|
|
|
|
desc "download images in posts to public folder"
|
|
task :download_post_images, :host_match do |task, params|
|
|
raise "Error: you must specify a host to match for this download (i.e. rake wordpress:download_post_images['mywebsite']" if params[:host_match].blank?
|
|
|
|
Rake::Task["environment"].invoke
|
|
|
|
# scrape images
|
|
@posts = ::Post.all
|
|
@posts.each do |post|
|
|
doc = Nokogiri::HTML(post.body)
|
|
doc.css("img").each do |img|
|
|
# find remote file path
|
|
remote_file = img.attributes["src"].text
|
|
# load uri
|
|
begin
|
|
remote_uri = URI(remote_file)
|
|
rescue => error
|
|
puts "Error parsing URL #{remote_file}: #{error.message}"
|
|
end
|
|
|
|
# only download if the image is a LFA-hosted image
|
|
if remote_uri && remote_uri.host.match(params[:host_match]) != nil
|
|
# find a local path for it
|
|
local_file = File.expand_path(File.join(Rails.public_path,remote_uri.path))
|
|
# only download if not already there or if it's zero bytes
|
|
unless File.size?(local_file)
|
|
# create local folders if necessary
|
|
dirname = File.dirname(local_file)
|
|
unless File.directory?(dirname)
|
|
FileUtils.mkdir_p(dirname)
|
|
end
|
|
# save remote file to local
|
|
begin
|
|
remote_file_io = open(remote_file)
|
|
File.open(local_file,'wb'){ |f| f.write(remote_file_io.read) }
|
|
puts "Saved file: #{local_file}"
|
|
rescue OpenURI::HTTPError => error
|
|
puts "Error saving file #{remote_file}: #{error.message}"
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
puts "Finished downloding images from #{@posts.count} posts"
|
|
|
|
end
|
|
|
|
# desc "Reset the cms relevant tables for a clean import"
|
|
# task :reset_pages do
|
|
# Rake::Task["environment"].invoke
|
|
|
|
# %w(page_part_translations page_translations page_parts pages).each do |table_name|
|
|
# p "Truncating #{table_name} ..."
|
|
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
|
# end
|
|
# end
|
|
|
|
# desc "import cms data from a WordPress XML dump"
|
|
# task :import_pages, :file_name do |task, params|
|
|
# Rake::Task["environment"].invoke
|
|
# dump = WordPressImport::Dump.new(params[:file_name])
|
|
|
|
# only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
|
# dump.pages(only_published).each(&:to_rails)
|
|
|
|
# # After all pages are persisted we can now create the parent - child
|
|
# # relationships. This is necessary, as WordPress doesn't dump the pages in
|
|
# # a correct order.
|
|
# dump.pages(only_published).each do |dump_page|
|
|
# page = ::Page.find(dump_page.post_id)
|
|
# page.parent_id = dump_page.parent_id
|
|
# page.save!
|
|
# end
|
|
|
|
# WordPressImport::Post.create_blog_page_if_necessary
|
|
|
|
# ENV["MODEL"] = 'Page'
|
|
# Rake::Task["friendly_id:redo_slugs"].invoke
|
|
# ENV.delete("MODEL")
|
|
# end
|
|
|
|
# desc "reset cms tables and then import cms data from a WordPress XML dump"
|
|
# task :reset_and_import_pages, :file_name do |task, params|
|
|
# Rake::Task["environment"].invoke
|
|
# Rake::Task["wordpress:reset_pages"].invoke
|
|
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
|
# end
|
|
|
|
|
|
desc "Reset the media relevant tables for a clean import"
|
|
task :reset_media do
|
|
Rake::Task["environment"].invoke
|
|
|
|
%w(rich_rich_files).each do |table_name|
|
|
p "Truncating #{table_name} ..."
|
|
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
|
end
|
|
end
|
|
|
|
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
|
|
task :import_and_replace_media, :file_name do |task, params|
|
|
Rake::Task["environment"].invoke
|
|
dump = WordPressImport::Dump.new(params[:file_name])
|
|
|
|
p "Importing #{dump.attachments.each_slice(200).first.count} attachments ..."
|
|
attachments = dump.attachments.each_slice(200).first.each(&:to_rails)
|
|
unless $ATTACHMENT_EXCEPTIONS.blank?
|
|
p "----------------------------------------------------------"
|
|
p "ERRORS WERE ENCOUNTERED IMPORTING ATTACHMENTS:"
|
|
$ATTACHMENT_EXCEPTIONS.each{|exception| puts exception}
|
|
p "----------------------------------------------------------"
|
|
end
|
|
|
|
# parse all created Post and Page bodys and replace the old wordpress media urls
|
|
# with the newly created ones
|
|
p "Replacing attachment URLs found in posts/pages ..."
|
|
attachments.each(&:replace_url)
|
|
|
|
unless $REPLACEMENT_EXCEPTIONS.blank?
|
|
p "----------------------------------------------------------"
|
|
p "ERRORS WERE ENCOUNTERED REPLACING ATTACHMENTS:"
|
|
$REPLACEMENT_EXCEPTIONS.each{|exception| puts exception}
|
|
p "----------------------------------------------------------"
|
|
end
|
|
end
|
|
|
|
desc "reset media tables and then import media data from a WordPress XML dump"
|
|
task :reset_import_and_replace_media, :file_name do |task, params|
|
|
Rake::Task["environment"].invoke
|
|
Rake::Task["wordpress:reset_media"].invoke
|
|
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
|
end
|
|
|
|
desc "reset and import all data (see the other tasks)"
|
|
task :full_import, :file_name, :blog_slug do |task, params|
|
|
Rake::Task["environment"].invoke
|
|
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name],params[:blog_slug])
|
|
#Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
|
|
#Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
|
|
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
|
end
|
|
|
|
end
|