Changing attachment to use paperclip
This commit is contained in:
parent
bd4cccd91a
commit
d4b2457787
|
@ -5,8 +5,7 @@ namespace :wordpress do
|
||||||
task :reset_blog do
|
task :reset_blog do
|
||||||
Rake::Task["environment"].invoke
|
Rake::Task["environment"].invoke
|
||||||
|
|
||||||
%w(taggings tags blog_comments blog_categories blog_categories_blog_posts
|
%w(posts post_translations taggings tags).each do |table_name|
|
||||||
blog_posts).each do |table_name|
|
|
||||||
p "Truncating #{table_name} ..."
|
p "Truncating #{table_name} ..."
|
||||||
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
||||||
end
|
end
|
||||||
|
@ -16,18 +15,23 @@ namespace :wordpress do
|
||||||
desc "import blog data from a WordPressImport XML dump"
|
desc "import blog data from a WordPressImport XML dump"
|
||||||
task :import_blog, :file_name do |task, params|
|
task :import_blog, :file_name do |task, params|
|
||||||
Rake::Task["environment"].invoke
|
Rake::Task["environment"].invoke
|
||||||
|
p "Loading XML from #{params[:file_name]} ..."
|
||||||
dump = WordPressImport::Dump.new(params[:file_name])
|
dump = WordPressImport::Dump.new(params[:file_name])
|
||||||
|
|
||||||
dump.authors.each(&:to_refinery)
|
p "Importing #{dump.authors.count} authors ..."
|
||||||
|
dump.authors.each(&:to_rails)
|
||||||
|
|
||||||
|
# by default, import all; unless $ONLY_PUBLISHED = "true"
|
||||||
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
||||||
dump.posts(only_published).each(&:to_refinery)
|
p "Importing #{dump.posts(only_published).count} posts ..."
|
||||||
|
|
||||||
|
if only_published
|
||||||
|
p "(only published posts)"
|
||||||
|
else
|
||||||
|
p "(export ONLY_PUBLISHED=true to import only published posts)"
|
||||||
|
end
|
||||||
|
|
||||||
WordPressImport::Post.create_blog_page_if_necessary
|
dump.posts(only_published).each(&:to_rails)
|
||||||
|
|
||||||
ENV["MODEL"] = 'BlogPost'
|
|
||||||
Rake::Task["friendly_id:redo_slugs"].invoke
|
|
||||||
ENV.delete("MODEL")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
|
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
|
||||||
|
@ -38,53 +42,53 @@ namespace :wordpress do
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
desc "Reset the cms relevant tables for a clean import"
|
# desc "Reset the cms relevant tables for a clean import"
|
||||||
task :reset_pages do
|
# task :reset_pages do
|
||||||
Rake::Task["environment"].invoke
|
# Rake::Task["environment"].invoke
|
||||||
|
|
||||||
%w(page_part_translations page_translations page_parts pages).each do |table_name|
|
# %w(page_part_translations page_translations page_parts pages).each do |table_name|
|
||||||
p "Truncating #{table_name} ..."
|
# p "Truncating #{table_name} ..."
|
||||||
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
||||||
end
|
# end
|
||||||
end
|
# end
|
||||||
|
|
||||||
desc "import cms data from a WordPress XML dump"
|
# desc "import cms data from a WordPress XML dump"
|
||||||
task :import_pages, :file_name do |task, params|
|
# task :import_pages, :file_name do |task, params|
|
||||||
Rake::Task["environment"].invoke
|
# Rake::Task["environment"].invoke
|
||||||
dump = WordPressImport::Dump.new(params[:file_name])
|
# dump = WordPressImport::Dump.new(params[:file_name])
|
||||||
|
|
||||||
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
# only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
|
||||||
dump.pages(only_published).each(&:to_refinery)
|
# dump.pages(only_published).each(&:to_rails)
|
||||||
|
|
||||||
# After all pages are persisted we can now create the parent - child
|
# # After all pages are persisted we can now create the parent - child
|
||||||
# relationships. This is necessary, as WordPress doesn't dump the pages in
|
# # relationships. This is necessary, as WordPress doesn't dump the pages in
|
||||||
# a correct order.
|
# # a correct order.
|
||||||
dump.pages(only_published).each do |dump_page|
|
# dump.pages(only_published).each do |dump_page|
|
||||||
page = ::Page.find(dump_page.post_id)
|
# page = ::Page.find(dump_page.post_id)
|
||||||
page.parent_id = dump_page.parent_id
|
# page.parent_id = dump_page.parent_id
|
||||||
page.save!
|
# page.save!
|
||||||
end
|
# end
|
||||||
|
|
||||||
WordPressImport::Post.create_blog_page_if_necessary
|
# WordPressImport::Post.create_blog_page_if_necessary
|
||||||
|
|
||||||
ENV["MODEL"] = 'Page'
|
# ENV["MODEL"] = 'Page'
|
||||||
Rake::Task["friendly_id:redo_slugs"].invoke
|
# Rake::Task["friendly_id:redo_slugs"].invoke
|
||||||
ENV.delete("MODEL")
|
# ENV.delete("MODEL")
|
||||||
end
|
# end
|
||||||
|
|
||||||
desc "reset cms tables and then import cms data from a WordPress XML dump"
|
# desc "reset cms tables and then import cms data from a WordPress XML dump"
|
||||||
task :reset_and_import_pages, :file_name do |task, params|
|
# task :reset_and_import_pages, :file_name do |task, params|
|
||||||
Rake::Task["environment"].invoke
|
# Rake::Task["environment"].invoke
|
||||||
Rake::Task["wordpress:reset_pages"].invoke
|
# Rake::Task["wordpress:reset_pages"].invoke
|
||||||
Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
|
||||||
end
|
# end
|
||||||
|
|
||||||
|
|
||||||
desc "Reset the media relevant tables for a clean import"
|
desc "Reset the media relevant tables for a clean import"
|
||||||
task :reset_media do
|
task :reset_media do
|
||||||
Rake::Task["environment"].invoke
|
Rake::Task["environment"].invoke
|
||||||
|
|
||||||
%w(images resources).each do |table_name|
|
%w(rich_rich_files).each do |table_name|
|
||||||
p "Truncating #{table_name} ..."
|
p "Truncating #{table_name} ..."
|
||||||
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
|
||||||
end
|
end
|
||||||
|
@ -95,13 +99,14 @@ namespace :wordpress do
|
||||||
Rake::Task["environment"].invoke
|
Rake::Task["environment"].invoke
|
||||||
dump = WordPressImport::Dump.new(params[:file_name])
|
dump = WordPressImport::Dump.new(params[:file_name])
|
||||||
|
|
||||||
attachments = dump.attachments.each(&:to_refinery)
|
p "Importing #{dump.attachments.each_slice(25).first.count} attachments ..."
|
||||||
|
attachments = dump.attachments.each_slice(25).first.each(&:to_rails)
|
||||||
|
p "Errors were encountered: #{$ATTACHMENT_EXCEPTIONS.inspect}" unless $ATTACHMENT_EXCEPTIONS.blank?
|
||||||
|
|
||||||
# parse all created BlogPost and Page bodys and replace the old wordpress media uls
|
# parse all created Post and Page bodys and replace the old wordpress media urls
|
||||||
# with the newly created ones
|
# with the newly created ones
|
||||||
attachments.each do |attachment|
|
p "Replacing attachment URLs found in posts/pages ..."
|
||||||
attachment.replace_url
|
attachments.each(&:replace_url)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
desc "reset media tables and then import media data from a WordPress XML dump"
|
desc "reset media tables and then import media data from a WordPress XML dump"
|
||||||
|
@ -115,7 +120,16 @@ namespace :wordpress do
|
||||||
task :full_import, :file_name do |task, params|
|
task :full_import, :file_name do |task, params|
|
||||||
Rake::Task["environment"].invoke
|
Rake::Task["environment"].invoke
|
||||||
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
|
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
|
||||||
Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
|
#Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
|
||||||
Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
|
#Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
|
||||||
|
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
desc "Local First master import (no resets)"
|
||||||
|
task :lfa_import, :file_name do |task, params|
|
||||||
|
Rake::Task["environment"].invoke
|
||||||
|
Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
|
||||||
|
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
module WordPressImport
|
module WordPressImport
|
||||||
class Attachment
|
class Attachment
|
||||||
attr_reader :node
|
attr_reader :node
|
||||||
attr_reader :refinery_image
|
attr_reader :paperclip_image
|
||||||
attr_reader :refinery_resource
|
attr_reader :paperclip_file
|
||||||
|
|
||||||
def initialize(node)
|
def initialize(node)
|
||||||
@node = node
|
@node = node
|
||||||
|
@ -40,42 +40,79 @@ module WordPressImport
|
||||||
url.match /\.(png|jpg|jpeg|gif)$/
|
url.match /\.(png|jpg|jpeg|gif)$/
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_refinery
|
def to_rails
|
||||||
if image?
|
begin
|
||||||
to_image
|
if image?
|
||||||
else
|
to_image
|
||||||
to_resource
|
else
|
||||||
|
to_file
|
||||||
|
end
|
||||||
|
rescue Exception => ex
|
||||||
|
message = "ERROR saving attachment #{url} -- #{ex.message}"
|
||||||
|
p message
|
||||||
|
$ATTACHMENT_EXCEPTIONS = [] if $ATTACHMENT_EXCEPTIONS.blank?
|
||||||
|
$ATTACHMENT_EXCEPTIONS << message
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_url
|
def replace_url
|
||||||
|
@occurrance_count = 0
|
||||||
if image?
|
if image?
|
||||||
replace_image_url
|
replace_image_url
|
||||||
else
|
else
|
||||||
replace_resource_url
|
replace_resource_url
|
||||||
end
|
end
|
||||||
|
p "Replaced #{@occurrance_count} occurrances of #{url}"
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def to_image
|
def rich_file_clean_file_name(full_file_name)
|
||||||
image = ::Image.new
|
extension = File.extname(full_file_name).gsub(/^\.+/, '')
|
||||||
image.created_at = post_date
|
filename = full_file_name.gsub(/\.#{extension}$/, '')
|
||||||
image.image_url = url
|
|
||||||
image.save!
|
filename = CGI::unescape(filename)
|
||||||
|
filename = CGI::unescape(filename)
|
||||||
|
|
||||||
|
extension = extension.downcase
|
||||||
|
filename = filename.downcase.gsub(/[^a-z0-9]+/i, '-')
|
||||||
|
|
||||||
|
"#{filename}.#{extension}"
|
||||||
|
end
|
||||||
|
|
||||||
@refinery_image = image
|
def to_image
|
||||||
|
# avoid duplicates; use our storage system's filename cleaner for lookup
|
||||||
|
image = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
|
||||||
|
|
||||||
|
if image.rich_file.instance.id.blank?
|
||||||
|
p "Importing image #{file_name}"
|
||||||
|
image.simplified_type = "image"
|
||||||
|
image.created_at = post_date
|
||||||
|
image.rich_file = URI.parse(url)
|
||||||
|
image.save!
|
||||||
|
else
|
||||||
|
p "image #{file_name} already exists..."
|
||||||
|
end
|
||||||
|
|
||||||
|
@paperclip_image = image
|
||||||
image
|
image
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_resource
|
def to_file
|
||||||
resource = ::Resource.new
|
# avoid duplicates; use our storage system's filename cleaner for lookup
|
||||||
resource.created_at = post_date
|
file = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
|
||||||
resource.file_url = url
|
|
||||||
resource.save!
|
|
||||||
|
|
||||||
@refinery_resource = resource
|
if file.rich_file.instance.id.blank?
|
||||||
resource
|
p "Importing file #{file_name}"
|
||||||
|
file.created_at = post_date
|
||||||
|
file.rich_file = URI.parse(url) if file.rich_file.blank?
|
||||||
|
file.save!
|
||||||
|
else
|
||||||
|
p "file #{file_name} already exists..."
|
||||||
|
end
|
||||||
|
|
||||||
|
@paperclip_file = file
|
||||||
|
file
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_image_url
|
def replace_image_url
|
||||||
|
@ -89,24 +126,26 @@ module WordPressImport
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_image_url_in_blog_posts
|
def replace_image_url_in_blog_posts
|
||||||
replace_url_in_blog_posts(refinery_image.image.url)
|
replace_url_in_blog_posts(paperclip_image.rich_file.url)
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_image_url_in_pages
|
def replace_image_url_in_pages
|
||||||
replace_url_in_pages(refinery_image.image.url)
|
replace_url_in_pages(paperclip_image.rich_file.url)
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_resource_url_in_blog_posts
|
def replace_resource_url_in_blog_posts
|
||||||
replace_url_in_blog_posts(refinery_resource.file.url)
|
replace_url_in_blog_posts(paperclip_file.rich_file.url)
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_resource_url_in_pages
|
def replace_resource_url_in_pages
|
||||||
replace_url_in_pages(refinery_resource.file.url)
|
replace_url_in_pages(paperclip_file.rich_file.url)
|
||||||
end
|
end
|
||||||
|
|
||||||
def replace_url_in_blog_posts(new_url)
|
def replace_url_in_blog_posts(new_url)
|
||||||
::BlogPost.all.each do |post|
|
::Post.all.each do |post|
|
||||||
if (! post.body.empty?) && post.body.include?(url)
|
byebug if post.id == 168
|
||||||
|
if ((! post.body.empty?) && post.body.include?(url))
|
||||||
|
@occurrance_count++
|
||||||
post.body = post.body.gsub(url_pattern, new_url)
|
post.body = post.body.gsub(url_pattern, new_url)
|
||||||
post.save!
|
post.save!
|
||||||
end
|
end
|
||||||
|
@ -115,10 +154,13 @@ module WordPressImport
|
||||||
|
|
||||||
def replace_url_in_pages(new_url)
|
def replace_url_in_pages(new_url)
|
||||||
::Page.all.each do |page|
|
::Page.all.each do |page|
|
||||||
page.parts.each do |part|
|
page.translations.each do |translation|
|
||||||
if (! part.body.to_s.blank?) && part.body.include?(url)
|
translation.parts.each do |part|
|
||||||
part.body = part.body.gsub(url_pattern, new_url)
|
if (! part.content.to_s.blank?) && part.content.include?(url)
|
||||||
part.save!
|
@occurrance_count++
|
||||||
|
part.content = part.content.gsub(url_pattern, new_url)
|
||||||
|
part.save!
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,15 +3,17 @@ module WordPressImport
|
||||||
attr_reader :doc
|
attr_reader :doc
|
||||||
|
|
||||||
def initialize(file_name)
|
def initialize(file_name)
|
||||||
file_name = File.expand_path(file_name)
|
begin
|
||||||
|
file_name = File.expand_path(file_name)
|
||||||
raise "Given file '#{file_name}' no file or not readable." \
|
raise "error" unless File.file?(file_name) && File.readable?(file_name)
|
||||||
unless File.file?(file_name) && File.readable?(file_name)
|
rescue
|
||||||
|
raise "Given file '#{file_name}' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file']"
|
||||||
|
end
|
||||||
|
|
||||||
file = File.open(file_name)
|
file = File.open(file_name)
|
||||||
|
|
||||||
if file.size >= 10485760 # 10MB
|
if file.size >= 10485760 # 10MB
|
||||||
puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks, or double-checking the import results."
|
puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results."
|
||||||
end
|
end
|
||||||
|
|
||||||
@doc = Nokogiri::XML(file)
|
@doc = Nokogiri::XML(file)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user