Changing attachment to use paperclip
This commit is contained in:
		
							parent
							
								
									bd4cccd91a
								
							
						
					
					
						commit
						d4b2457787
					
				@ -5,8 +5,7 @@ namespace :wordpress do
 | 
			
		||||
  task :reset_blog do
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
 | 
			
		||||
    %w(taggings tags blog_comments blog_categories blog_categories_blog_posts 
 | 
			
		||||
       blog_posts).each do |table_name|
 | 
			
		||||
    %w(posts post_translations taggings tags).each do |table_name|
 | 
			
		||||
      p "Truncating #{table_name} ..."
 | 
			
		||||
      ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
 | 
			
		||||
    end
 | 
			
		||||
@ -16,18 +15,23 @@ namespace :wordpress do
 | 
			
		||||
  desc "import blog data from a WordPressImport XML dump"
 | 
			
		||||
  task :import_blog, :file_name do |task, params|
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    p "Loading XML from #{params[:file_name]} ..."
 | 
			
		||||
    dump = WordPressImport::Dump.new(params[:file_name])
 | 
			
		||||
 | 
			
		||||
    dump.authors.each(&:to_refinery)
 | 
			
		||||
    p "Importing #{dump.authors.count} authors ..."
 | 
			
		||||
    dump.authors.each(&:to_rails)
 | 
			
		||||
    
 | 
			
		||||
    # by default, import all; unless $ONLY_PUBLISHED = "true"
 | 
			
		||||
    only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
 | 
			
		||||
    dump.posts(only_published).each(&:to_refinery)
 | 
			
		||||
    p "Importing #{dump.posts(only_published).count} posts ..."
 | 
			
		||||
    
 | 
			
		||||
    if only_published
 | 
			
		||||
      p "(only published posts)" 
 | 
			
		||||
    else
 | 
			
		||||
      p "(export ONLY_PUBLISHED=true to import only published posts)"
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    WordPressImport::Post.create_blog_page_if_necessary
 | 
			
		||||
 | 
			
		||||
    ENV["MODEL"] = 'BlogPost'
 | 
			
		||||
    Rake::Task["friendly_id:redo_slugs"].invoke
 | 
			
		||||
    ENV.delete("MODEL")
 | 
			
		||||
    dump.posts(only_published).each(&:to_rails)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  desc "reset blog tables and then import blog data from a WordPressImport XML dump"
 | 
			
		||||
@ -38,53 +42,53 @@ namespace :wordpress do
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  desc "Reset the cms relevant tables for a clean import"
 | 
			
		||||
  task :reset_pages do
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
  # desc "Reset the cms relevant tables for a clean import"
 | 
			
		||||
  # task :reset_pages do
 | 
			
		||||
  #   Rake::Task["environment"].invoke
 | 
			
		||||
 | 
			
		||||
    %w(page_part_translations page_translations page_parts pages).each do |table_name|
 | 
			
		||||
      p "Truncating #{table_name} ..."
 | 
			
		||||
      ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
  #   %w(page_part_translations page_translations page_parts pages).each do |table_name|
 | 
			
		||||
  #     p "Truncating #{table_name} ..."
 | 
			
		||||
  #     ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
 | 
			
		||||
  #   end
 | 
			
		||||
  # end
 | 
			
		||||
 | 
			
		||||
  desc "import cms data from a WordPress XML dump"
 | 
			
		||||
  task :import_pages, :file_name do |task, params|
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    dump = WordPressImport::Dump.new(params[:file_name])
 | 
			
		||||
  # desc "import cms data from a WordPress XML dump"
 | 
			
		||||
  # task :import_pages, :file_name do |task, params|
 | 
			
		||||
  #   Rake::Task["environment"].invoke
 | 
			
		||||
  #   dump = WordPressImport::Dump.new(params[:file_name])
 | 
			
		||||
 | 
			
		||||
    only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
 | 
			
		||||
    dump.pages(only_published).each(&:to_refinery)
 | 
			
		||||
  #   only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
 | 
			
		||||
  #   dump.pages(only_published).each(&:to_rails)
 | 
			
		||||
 | 
			
		||||
    # After all pages are persisted we can now create the parent - child
 | 
			
		||||
    # relationships. This is necessary, as WordPress doesn't dump the pages in
 | 
			
		||||
    # a correct order. 
 | 
			
		||||
    dump.pages(only_published).each do |dump_page|
 | 
			
		||||
      page = ::Page.find(dump_page.post_id)
 | 
			
		||||
      page.parent_id = dump_page.parent_id
 | 
			
		||||
      page.save!
 | 
			
		||||
    end
 | 
			
		||||
  #   # After all pages are persisted we can now create the parent - child
 | 
			
		||||
  #   # relationships. This is necessary, as WordPress doesn't dump the pages in
 | 
			
		||||
  #   # a correct order. 
 | 
			
		||||
  #   dump.pages(only_published).each do |dump_page|
 | 
			
		||||
  #     page = ::Page.find(dump_page.post_id)
 | 
			
		||||
  #     page.parent_id = dump_page.parent_id
 | 
			
		||||
  #     page.save!
 | 
			
		||||
  #   end
 | 
			
		||||
 | 
			
		||||
    WordPressImport::Post.create_blog_page_if_necessary
 | 
			
		||||
  #   WordPressImport::Post.create_blog_page_if_necessary
 | 
			
		||||
        
 | 
			
		||||
    ENV["MODEL"] = 'Page'
 | 
			
		||||
    Rake::Task["friendly_id:redo_slugs"].invoke
 | 
			
		||||
    ENV.delete("MODEL")
 | 
			
		||||
  end
 | 
			
		||||
  #   ENV["MODEL"] = 'Page'
 | 
			
		||||
  #   Rake::Task["friendly_id:redo_slugs"].invoke
 | 
			
		||||
  #   ENV.delete("MODEL")
 | 
			
		||||
  # end
 | 
			
		||||
  
 | 
			
		||||
  desc "reset cms tables and then import cms data from a WordPress XML dump"
 | 
			
		||||
  task :reset_and_import_pages, :file_name do |task, params|
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    Rake::Task["wordpress:reset_pages"].invoke
 | 
			
		||||
    Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
 | 
			
		||||
  end
 | 
			
		||||
  # desc "reset cms tables and then import cms data from a WordPress XML dump"
 | 
			
		||||
  # task :reset_and_import_pages, :file_name do |task, params|
 | 
			
		||||
  #   Rake::Task["environment"].invoke
 | 
			
		||||
  #   Rake::Task["wordpress:reset_pages"].invoke
 | 
			
		||||
  #   Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
 | 
			
		||||
  # end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  desc "Reset the media relevant tables for a clean import"
 | 
			
		||||
  task :reset_media do
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
 | 
			
		||||
    %w(images resources).each do |table_name|
 | 
			
		||||
    %w(rich_rich_files).each do |table_name|
 | 
			
		||||
      p "Truncating #{table_name} ..."
 | 
			
		||||
      ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
 | 
			
		||||
    end
 | 
			
		||||
@ -95,13 +99,14 @@ namespace :wordpress do
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    dump = WordPressImport::Dump.new(params[:file_name])
 | 
			
		||||
    
 | 
			
		||||
    attachments = dump.attachments.each(&:to_refinery)
 | 
			
		||||
    p "Importing #{dump.attachments.each_slice(25).first.count} attachments ..."
 | 
			
		||||
    attachments = dump.attachments.each_slice(25).first.each(&:to_rails)
 | 
			
		||||
    p "Errors were encountered: #{$ATTACHMENT_EXCEPTIONS.inspect}" unless $ATTACHMENT_EXCEPTIONS.blank?
 | 
			
		||||
    
 | 
			
		||||
    # parse all created BlogPost and Page bodys and replace the old wordpress media uls 
 | 
			
		||||
    # parse all created Post and Page bodys and replace the old wordpress media urls 
 | 
			
		||||
    # with the newly created ones
 | 
			
		||||
    attachments.each do |attachment|
 | 
			
		||||
      attachment.replace_url
 | 
			
		||||
    end
 | 
			
		||||
    p "Replacing attachment URLs found in posts/pages ..."
 | 
			
		||||
    attachments.each(&:replace_url)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  desc "reset media tables and then import media data from a WordPress XML dump"
 | 
			
		||||
@ -115,7 +120,16 @@ namespace :wordpress do
 | 
			
		||||
  task :full_import, :file_name do |task, params|
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
 | 
			
		||||
    Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
 | 
			
		||||
    Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
 | 
			
		||||
    #Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
 | 
			
		||||
    #Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
 | 
			
		||||
    Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  desc "Local First master import (no resets)"
 | 
			
		||||
  task :lfa_import, :file_name do |task, params|
 | 
			
		||||
    Rake::Task["environment"].invoke
 | 
			
		||||
    Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
 | 
			
		||||
    Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
@ -1,8 +1,8 @@
 | 
			
		||||
module WordPressImport
 | 
			
		||||
  class Attachment
 | 
			
		||||
    attr_reader :node
 | 
			
		||||
    attr_reader :refinery_image
 | 
			
		||||
    attr_reader :refinery_resource
 | 
			
		||||
    attr_reader :paperclip_image
 | 
			
		||||
    attr_reader :paperclip_file
 | 
			
		||||
 | 
			
		||||
    def initialize(node)
 | 
			
		||||
      @node = node
 | 
			
		||||
@ -40,42 +40,79 @@ module WordPressImport
 | 
			
		||||
      url.match /\.(png|jpg|jpeg|gif)$/ 
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def to_refinery
 | 
			
		||||
      if image?
 | 
			
		||||
        to_image
 | 
			
		||||
      else
 | 
			
		||||
        to_resource
 | 
			
		||||
    def to_rails
 | 
			
		||||
      begin
 | 
			
		||||
        if image?
 | 
			
		||||
          to_image
 | 
			
		||||
        else
 | 
			
		||||
          to_file
 | 
			
		||||
        end
 | 
			
		||||
      rescue Exception => ex
 | 
			
		||||
        message = "ERROR saving attachment #{url} -- #{ex.message}"
 | 
			
		||||
        p message
 | 
			
		||||
        $ATTACHMENT_EXCEPTIONS = [] if $ATTACHMENT_EXCEPTIONS.blank? 
 | 
			
		||||
        $ATTACHMENT_EXCEPTIONS << message
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def replace_url
 | 
			
		||||
      @occurrance_count = 0
 | 
			
		||||
      if image?
 | 
			
		||||
        replace_image_url
 | 
			
		||||
      else
 | 
			
		||||
        replace_resource_url
 | 
			
		||||
      end
 | 
			
		||||
      p "Replaced #{@occurrance_count} occurrances of #{url}"
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    def to_image
 | 
			
		||||
      image = ::Image.new
 | 
			
		||||
      image.created_at = post_date
 | 
			
		||||
      image.image_url = url
 | 
			
		||||
      image.save!
 | 
			
		||||
    def rich_file_clean_file_name(full_file_name)     
 | 
			
		||||
      extension = File.extname(full_file_name).gsub(/^\.+/, '')
 | 
			
		||||
      filename = full_file_name.gsub(/\.#{extension}$/, '')
 | 
			
		||||
      
 | 
			
		||||
      filename = CGI::unescape(filename)
 | 
			
		||||
      filename = CGI::unescape(filename)
 | 
			
		||||
      
 | 
			
		||||
      extension = extension.downcase
 | 
			
		||||
      filename = filename.downcase.gsub(/[^a-z0-9]+/i, '-')
 | 
			
		||||
      
 | 
			
		||||
      "#{filename}.#{extension}"
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
      @refinery_image = image
 | 
			
		||||
    def to_image
 | 
			
		||||
      # avoid duplicates; use our storage system's filename cleaner for lookup
 | 
			
		||||
      image = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
 | 
			
		||||
 | 
			
		||||
      if image.rich_file.instance.id.blank?
 | 
			
		||||
        p "Importing image #{file_name}"
 | 
			
		||||
        image.simplified_type = "image"
 | 
			
		||||
        image.created_at = post_date
 | 
			
		||||
        image.rich_file = URI.parse(url)
 | 
			
		||||
        image.save!
 | 
			
		||||
      else
 | 
			
		||||
        p "image #{file_name} already exists..."
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      @paperclip_image = image
 | 
			
		||||
      image
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def to_resource
 | 
			
		||||
      resource = ::Resource.new
 | 
			
		||||
      resource.created_at = post_date
 | 
			
		||||
      resource.file_url = url
 | 
			
		||||
      resource.save!
 | 
			
		||||
    def to_file
 | 
			
		||||
      # avoid duplicates; use our storage system's filename cleaner for lookup
 | 
			
		||||
      file = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
 | 
			
		||||
 | 
			
		||||
      @refinery_resource = resource
 | 
			
		||||
      resource
 | 
			
		||||
      if file.rich_file.instance.id.blank?
 | 
			
		||||
        p "Importing file #{file_name}"
 | 
			
		||||
        file.created_at = post_date
 | 
			
		||||
        file.rich_file = URI.parse(url) if file.rich_file.blank?
 | 
			
		||||
        file.save!
 | 
			
		||||
      else
 | 
			
		||||
        p "file #{file_name} already exists..."
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      @paperclip_file = file
 | 
			
		||||
      file
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def replace_image_url
 | 
			
		||||
@ -89,24 +126,26 @@ module WordPressImport
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def replace_image_url_in_blog_posts
 | 
			
		||||
      replace_url_in_blog_posts(refinery_image.image.url)
 | 
			
		||||
      replace_url_in_blog_posts(paperclip_image.rich_file.url)
 | 
			
		||||
    end
 | 
			
		||||
    
 | 
			
		||||
    def replace_image_url_in_pages
 | 
			
		||||
      replace_url_in_pages(refinery_image.image.url)
 | 
			
		||||
      replace_url_in_pages(paperclip_image.rich_file.url)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def replace_resource_url_in_blog_posts
 | 
			
		||||
      replace_url_in_blog_posts(refinery_resource.file.url)
 | 
			
		||||
      replace_url_in_blog_posts(paperclip_file.rich_file.url)
 | 
			
		||||
    end
 | 
			
		||||
    
 | 
			
		||||
    def replace_resource_url_in_pages
 | 
			
		||||
      replace_url_in_pages(refinery_resource.file.url)
 | 
			
		||||
      replace_url_in_pages(paperclip_file.rich_file.url)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def replace_url_in_blog_posts(new_url)
 | 
			
		||||
      ::BlogPost.all.each do |post|
 | 
			
		||||
        if (! post.body.empty?) && post.body.include?(url)
 | 
			
		||||
      ::Post.all.each do |post|
 | 
			
		||||
        byebug if post.id == 168
 | 
			
		||||
        if ((! post.body.empty?) && post.body.include?(url))
 | 
			
		||||
          @occurrance_count++
 | 
			
		||||
          post.body = post.body.gsub(url_pattern, new_url)
 | 
			
		||||
          post.save!
 | 
			
		||||
        end
 | 
			
		||||
@ -115,10 +154,13 @@ module WordPressImport
 | 
			
		||||
 | 
			
		||||
    def replace_url_in_pages(new_url)
 | 
			
		||||
      ::Page.all.each do |page|
 | 
			
		||||
        page.parts.each do |part|
 | 
			
		||||
          if (! part.body.to_s.blank?) && part.body.include?(url)
 | 
			
		||||
            part.body = part.body.gsub(url_pattern, new_url)
 | 
			
		||||
            part.save!
 | 
			
		||||
        page.translations.each do |translation|
 | 
			
		||||
          translation.parts.each do |part|
 | 
			
		||||
            if (! part.content.to_s.blank?) && part.content.include?(url)
 | 
			
		||||
              @occurrance_count++
 | 
			
		||||
              part.content = part.content.gsub(url_pattern, new_url)
 | 
			
		||||
              part.save!
 | 
			
		||||
            end
 | 
			
		||||
          end
 | 
			
		||||
        end
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
@ -3,15 +3,17 @@ module WordPressImport
 | 
			
		||||
    attr_reader :doc
 | 
			
		||||
 | 
			
		||||
    def initialize(file_name)
 | 
			
		||||
      file_name = File.expand_path(file_name)
 | 
			
		||||
 | 
			
		||||
      raise "Given file '#{file_name}' no file or not readable." \
 | 
			
		||||
        unless File.file?(file_name) && File.readable?(file_name)
 | 
			
		||||
      begin
 | 
			
		||||
        file_name = File.expand_path(file_name)
 | 
			
		||||
        raise "error" unless File.file?(file_name) && File.readable?(file_name)
 | 
			
		||||
      rescue
 | 
			
		||||
        raise "Given file '#{file_name}' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file']"
 | 
			
		||||
      end
 | 
			
		||||
      
 | 
			
		||||
      file = File.open(file_name)
 | 
			
		||||
      
 | 
			
		||||
      if file.size >= 10485760 # 10MB
 | 
			
		||||
        puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks, or double-checking the import results."
 | 
			
		||||
        puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results."
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      @doc = Nokogiri::XML(file)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user