Compare commits

..

No commits in common. "master" and "v0.3.0" have entirely different histories.

45 changed files with 756 additions and 985 deletions

View File

@ -1,4 +0,0 @@
rvm:
- 1.9.2
- 1.8.7
- ruby-head

10
Gemfile
View File

@ -1,15 +1,11 @@
source "http://rubygems.org" source "http://rubygems.org"
ruby '2.0.0' gem "rails", "3.0.7"
gem "rails", "4.0.0"
#gem "capybara", ">= 1.0.0.beta1" #gem "capybara", ">= 1.0.0.beta1"
gem "sqlite3" gem "sqlite3"
gem "rmagick" gem "rmagick"
gem 'shortcode', "0.1.2"
group :development, :test do group :development, :test do
gem 'byebug'
gem "rspec-rails", ">= 2.6.0" gem "rspec-rails", ">= 2.6.0"
gem "database_cleaner" gem "database_cleaner"
gem 'guard-rspec' gem 'guard-rspec'
@ -23,4 +19,6 @@ end
# gem 'ruby-debug' # gem 'ruby-debug'
# gem 'ruby-debug19' # gem 'ruby-debug19'
gem 'wordpress-import', :path => './' gem 'refinerycms'
gem 'refinerycms-blog'
gem 'refinerycms-wordpress-import', :path => './'

View File

@ -1,176 +1,197 @@
PATH PATH
remote: ./ remote: ./
specs: specs:
wordpress-import (0.4.4) refinerycms-wordpress-import (0.3.0)
bundler (~> 1.0) bundler (~> 1.0)
nokogiri (~> 1.6.0) nokogiri (~> 1.4.4)
shortcode (~> 0.1.1) refinerycms (~> 1.0.0)
refinerycms-blog (~> 1.5.2)
GEM GEM
remote: http://rubygems.org/ remote: http://rubygems.org/
specs: specs:
actionmailer (4.0.0) abstract (1.0.0)
actionpack (= 4.0.0) actionmailer (3.0.7)
mail (~> 2.5.3) actionpack (= 3.0.7)
actionpack (4.0.0) mail (~> 2.2.15)
activesupport (= 4.0.0) actionpack (3.0.7)
builder (~> 3.1.0) activemodel (= 3.0.7)
erubis (~> 2.7.0) activesupport (= 3.0.7)
rack (~> 1.5.2) builder (~> 2.1.2)
rack-test (~> 0.6.2) erubis (~> 2.6.6)
activemodel (4.0.0) i18n (~> 0.5.0)
activesupport (= 4.0.0) rack (~> 1.2.1)
builder (~> 3.1.0) rack-mount (~> 0.6.14)
activerecord (4.0.0) rack-test (~> 0.5.7)
activemodel (= 4.0.0) tzinfo (~> 0.3.23)
activerecord-deprecated_finders (~> 1.0.2) activemodel (3.0.7)
activesupport (= 4.0.0) activesupport (= 3.0.7)
arel (~> 4.0.0) builder (~> 2.1.2)
activerecord-deprecated_finders (1.0.3) i18n (~> 0.5.0)
activesupport (4.0.0) activerecord (3.0.7)
i18n (~> 0.6, >= 0.6.4) activemodel (= 3.0.7)
minitest (~> 4.2) activesupport (= 3.0.7)
multi_json (~> 1.3) arel (~> 2.0.2)
thread_safe (~> 0.1) tzinfo (~> 0.3.23)
tzinfo (~> 0.3.37) activeresource (3.0.7)
arel (4.0.2) activemodel (= 3.0.7)
atomic (1.1.15) activesupport (= 3.0.7)
blankslate (2.1.2.4) activesupport (3.0.7)
builder (3.1.4) acts-as-taggable-on (2.0.6)
byebug (2.7.0) acts_as_indexed (0.7.2)
columnize (~> 0.3) arel (2.0.10)
debugger-linecache (~> 1.2) awesome_nested_set (2.0.0)
celluloid (0.15.2) activerecord (>= 3.0.0)
timers (~> 1.1.0) babosa (0.3.5)
celluloid-io (0.15.0) bcrypt-ruby (2.1.4)
celluloid (>= 0.15.0) builder (2.1.2)
nio4r (>= 0.5.0) database_cleaner (0.6.7)
coderay (1.1.0) devise (1.3.4)
columnize (0.3.6) bcrypt-ruby (~> 2.1.2)
database_cleaner (1.2.0) orm_adapter (~> 0.0.3)
debugger-linecache (1.2.0) warden (~> 1.0.3)
diff-lcs (1.2.5) diff-lcs (1.1.2)
erubis (2.7.0) dragonfly (0.9.4)
rack
erubis (2.6.6)
abstract (>= 1.0.0)
fakeweb (1.3.0) fakeweb (1.3.0)
ffi (1.9.3) ffi (1.0.9)
formatador (0.2.4) filters_spam (0.3)
guard (2.5.1) friendly_id_globalize3 (3.2.1.3)
formatador (>= 0.2.4) babosa (~> 0.3.0)
listen (~> 2.6) globalize3 (0.1.0)
lumberjack (~> 1.0) activemodel (>= 3.0.0)
pry (>= 0.9.12) activerecord (>= 3.0.0)
thor (>= 0.18.1) guard (0.4.2)
guard-bundler (2.0.0) thor (~> 0.14.6)
bundler (~> 1.0) guard-bundler (0.1.3)
guard (~> 2.2) bundler (>= 1.0.0)
guard-rspec (4.2.8) guard (>= 0.2.2)
guard (~> 2.1) guard-rspec (0.4.0)
rspec (>= 2.14, < 4.0) guard (>= 0.4.0)
haml (4.0.5) i18n (0.5.0)
tilt libnotify (0.5.5)
hike (1.2.3) mail (2.2.19)
i18n (0.6.9) activesupport (>= 2.3.6)
libnotify (0.8.2) i18n (>= 0.4.0)
ffi (>= 1.0.11)
listen (2.7.0)
celluloid (>= 0.15.2)
celluloid-io (>= 0.15.0)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
lumberjack (1.0.4)
mail (2.5.4)
mime-types (~> 1.16) mime-types (~> 1.16)
treetop (~> 1.4.8) treetop (~> 1.4.8)
method_source (0.8.2) mime-types (1.16)
mime-types (1.25.1) nokogiri (1.4.4)
mini_portile (0.5.2) orm_adapter (0.0.5)
minitest (4.7.5) polyglot (0.3.1)
multi_json (1.9.0) rack (1.2.3)
nio4r (1.0.0) rack-cache (1.0.2)
nokogiri (1.6.1) rack (>= 0.4)
mini_portile (~> 0.5.0) rack-mount (0.6.14)
parslet (1.5.0) rack (>= 1.0.0)
blankslate (~> 2.0) rack-test (0.5.7)
polyglot (0.3.4)
pry (0.9.12.6)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0) rack (>= 1.0)
rails (4.0.0) rails (3.0.7)
actionmailer (= 4.0.0) actionmailer (= 3.0.7)
actionpack (= 4.0.0) actionpack (= 3.0.7)
activerecord (= 4.0.0) activerecord (= 3.0.7)
activesupport (= 4.0.0) activeresource (= 3.0.7)
bundler (>= 1.3.0, < 2.0) activesupport (= 3.0.7)
railties (= 4.0.0) bundler (~> 1.0)
sprockets-rails (~> 2.0.0) railties (= 3.0.7)
railties (4.0.0) railties (3.0.7)
actionpack (= 4.0.0) actionpack (= 3.0.7)
activesupport (= 4.0.0) activesupport (= 3.0.7)
rake (>= 0.8.7) rake (>= 0.8.7)
thor (>= 0.18.1, < 2.0) thor (~> 0.14.4)
rake (10.1.1) rake (0.9.2)
rb-fsevent (0.9.4) refinerycms (1.0.0)
rb-inotify (0.9.3) bundler (~> 1.0)
ffi (>= 0.5.0) refinerycms-authentication (= 1.0.0)
rmagick (2.13.2) refinerycms-base (= 1.0.0)
rspec (2.14.1) refinerycms-core (= 1.0.0)
rspec-core (~> 2.14.0) refinerycms-dashboard (= 1.0.0)
rspec-expectations (~> 2.14.0) refinerycms-images (= 1.0.0)
rspec-mocks (~> 2.14.0) refinerycms-pages (= 1.0.0)
rspec-core (2.14.8) refinerycms-resources (= 1.0.0)
rspec-expectations (2.14.5) refinerycms-settings (= 1.0.0)
diff-lcs (>= 1.1.3, < 2.0) refinerycms-authentication (1.0.0)
rspec-mocks (2.14.6) devise (~> 1.3.0)
rspec-rails (2.14.1) friendly_id_globalize3 (~> 3.2.1)
actionpack (>= 3.0) refinerycms-core (= 1.0.0)
activemodel (>= 3.0) refinerycms-base (1.0.0)
activesupport (>= 3.0) refinerycms-blog (1.5.2)
railties (>= 3.0) acts-as-taggable-on
rspec-core (~> 2.14.0) filters_spam (~> 0.2)
rspec-expectations (~> 2.14.0) refinerycms-core (~> 1.0.0)
rspec-mocks (~> 2.14.0) seo_meta (~> 1.1.0)
shortcode (0.1.2) refinerycms-core (1.0.0)
haml (~> 4.0) acts_as_indexed (~> 0.7)
parslet (= 1.5.0) awesome_nested_set (~> 2.0)
slop (3.5.0) friendly_id_globalize3 (~> 3.2.1)
sprockets (2.12.0) globalize3 (~> 0.1)
hike (~> 1.2) rails (~> 3.0.7)
multi_json (~> 1.0) refinerycms-base (= 1.0.0)
rack (~> 1.0) refinerycms-generators (~> 1.0)
tilt (~> 1.1, != 1.3.0) refinerycms-settings (= 1.0.0)
sprockets-rails (2.0.1) truncate_html (~> 0.5)
actionpack (>= 3.0) will_paginate (~> 3.0.pre)
activesupport (>= 3.0) refinerycms-dashboard (1.0.0)
sprockets (~> 2.8) refinerycms-core (= 1.0.0)
sqlite3 (1.3.9) refinerycms-generators (1.0.2)
thor (0.18.1) refinerycms-images (1.0.0)
thread_safe (0.2.0) dragonfly (~> 0.9.0)
atomic (>= 1.1.7, < 2) rack-cache (>= 0.5.3)
tilt (1.4.1) refinerycms-core (= 1.0.0)
timers (1.1.0) refinerycms-pages (1.0.0)
treetop (1.4.15) awesome_nested_set (~> 2.0)
polyglot friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
refinerycms-core (= 1.0.0)
seo_meta (~> 1.1)
refinerycms-resources (1.0.0)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.0)
refinerycms-settings (1.0.0)
refinerycms-base (= 1.0.0)
rmagick (2.13.1)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.4)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.6.0)
rspec-rails (2.6.1)
actionpack (~> 3.0)
activesupport (~> 3.0)
railties (~> 3.0)
rspec (~> 2.6.0)
seo_meta (1.1.1)
refinerycms-generators (~> 1.0.1)
sqlite3 (1.3.3)
thor (0.14.6)
treetop (1.4.9)
polyglot (>= 0.3.1) polyglot (>= 0.3.1)
tzinfo (0.3.39) truncate_html (0.5.1)
tzinfo (0.3.27)
warden (1.0.4)
rack (>= 1.0)
will_paginate (3.0.pre2)
PLATFORMS PLATFORMS
ruby ruby
DEPENDENCIES DEPENDENCIES
byebug
database_cleaner database_cleaner
fakeweb fakeweb
ffi ffi
guard-bundler guard-bundler
guard-rspec guard-rspec
libnotify libnotify
rails (= 4.0.0) rails (= 3.0.7)
refinerycms
refinerycms-blog
refinerycms-wordpress-import!
rmagick rmagick
rspec-rails (>= 2.6.0) rspec-rails (>= 2.6.0)
shortcode (= 0.1.2)
sqlite3 sqlite3
wordpress-import!

View File

@ -1,12 +1,6 @@
# A sample Guardfile # A sample Guardfile
# More info at https://github.com/guard/guard#readme # More info at https://github.com/guard/guard#readme
guard 'bundler' do
watch('Gemfile')
# Uncomment next line if Gemfile contain `gemspec' command
watch(/^.+\.gemspec/)
end
guard 'rspec', :version => 2 do guard 'rspec', :version => 2 do
watch(%r{^spec/.+_spec\.rb$}) watch(%r{^spec/.+_spec\.rb$})
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" } watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
@ -14,8 +8,19 @@ guard 'rspec', :version => 2 do
# Rails example # Rails example
watch(%r{^spec/.+_spec\.rb$}) watch(%r{^spec/.+_spec\.rb$})
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" } watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
watch(%r{^spec/support/(.+)\.rb$}) { "spec" } watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
watch('spec/spec_helper.rb') { "spec" } watch('spec/spec_helper.rb') { "spec" }
watch('config/routes.rb') { "spec/routing" }
watch('app/controllers/application_controller.rb') { "spec/controllers" }
# Capybara request specs
watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
end end
guard 'bundler' do
watch('Gemfile')
# Uncomment next line if Gemfile contain `gemspec' command
watch(/^.+\.gemspec/)
end

View File

@ -1,5 +1,4 @@
Copyright 2014 Will Bradley Copyright 2011 YOURNAME
portions Copyright 2011 Marc Remolt
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View File

@ -1,32 +1,31 @@
= Wordpress-import = Refinerycms-wordpress-import
This little project is an importer for WordPress XML dumps into Rails. This litte project is an importer for WordPress XML dumps into refinerycms(-blog).
It's been somewhat customized for one particular project; you probably want to fork this and modify it to fit your app's schema. You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
It's a fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
You can find the source code on github: https://github.com/zyphlar/wordpress-import
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags. Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
If your site (blog) structure uses new urls, the links WILL break! For example, if you used If your site (blog) structure uses new urls, the links WILL break! For example, if you used
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug". the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
So your inner site links will point to the old WP url. So your inner site links will point to the old WP url.
== Prerequisites == Prerequisites
TODO As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
the site is running, all migrations are run and you created the first refinery user.
== Installation == Installation
Just add the gem to your projects Gemfile: Just add the gem to your projects Gemfile:
gem 'wordpress-import' gem 'refinerycms-wordpress-import'
Or if you want to stay on the bleeding edge: Or if you want to stay on the bleeding edge:
gem 'wordpress-import', :git => 'git://github.com/zyphlar/wordpress-import.git' gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
and run and run

View File

@ -7,7 +7,7 @@ rescue LoadError
end end
require 'rake' require 'rake'
#require 'rdoc/task' require 'rdoc/task'
require 'rspec/core' require 'rspec/core'
require 'rspec/core/rake_task' require 'rspec/core/rake_task'
@ -16,10 +16,10 @@ RSpec::Core::RakeTask.new(:spec)
task :default => :spec task :default => :spec
#RDoc::Task.new(:rdoc) do |rdoc| Rake::RDocTask.new(:rdoc) do |rdoc|
#rdoc.rdoc_dir = 'rdoc' rdoc.rdoc_dir = 'rdoc'
#rdoc.title = 'Refinerycms-wordpress-import' rdoc.title = 'Refinerycms-wordpress-import'
#rdoc.options << '--line-numbers' << '--inline-source' rdoc.options << '--line-numbers' << '--inline-source'
#rdoc.rdoc_files.include('README.rdoc') rdoc.rdoc_files.include('README.rdoc')
#rdoc.rdoc_files.include('lib/**/*.rb') rdoc.rdoc_files.include('lib/**/*.rb')
#end end

View File

@ -0,0 +1,7 @@
module Refinery
module WordPress
end
end
require 'wordpress'

View File

@ -5,137 +5,86 @@ namespace :wordpress do
task :reset_blog do task :reset_blog do
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
%w(posts post_translations taggings tags).each do |table_name| %w(taggings tags blog_comments blog_categories blog_categories_blog_posts
blog_posts).each do |table_name|
p "Truncating #{table_name} ..." p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end end
end end
desc "import blog data from a WordPressImport XML dump" desc "import blog data from a Refinery::WordPress XML dump"
task :import_blog, :file_name, :blog_slug do |task, params| task :import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
p "Loading XML from #{params[:file_name]} (using blog #{params[:blog_slug]}) ..." dump = Refinery::WordPress::Dump.new(params[:file_name])
dump = WordPressImport::Dump.new(params[:file_name])
p "Importing #{dump.authors.count} authors ..." dump.authors.each(&:to_refinery)
dump.authors.each(&:to_rails)
# by default, import all; unless $ONLY_PUBLISHED = "true"
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
p "Importing #{dump.posts(only_published).count} posts ..." dump.posts(only_published).each(&:to_refinery)
if only_published Refinery::WordPress::Post.create_blog_page_if_necessary
p "(only published posts)"
else
p "(export ONLY_PUBLISHED=true to import only published posts)"
end
dump.posts(only_published).each{|p| p.to_rails(params[:blog_slug]) } ENV["MODEL"] = 'BlogPost'
Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL")
end end
desc "reset blog tables and then import blog data from a WordPressImport XML dump" desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
task :reset_and_import_blog, :file_name, :blog_slug do |task, params| task :reset_and_import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke Rake::Task["wordpress:reset_blog"].invoke
Rake::Task["wordpress:import_blog"].invoke(params[:file_name], params[:blog_slug]) Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
end end
desc "download images in posts to public folder" desc "Reset the cms relevant tables for a clean import"
task :download_post_images, :host_match do |task, params| task :reset_pages do
raise "Error: you must specify a host to match for this download (i.e. rake wordpress:download_post_images['mywebsite']" if params[:host_match].blank?
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
# scrape images %w(page_part_translations page_translations page_parts pages).each do |table_name|
@posts = ::Post.all p "Truncating #{table_name} ..."
@posts.each do |post| ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
doc = Nokogiri::HTML(post.body)
doc.css("img").each do |img|
# find remote file path
remote_file = img.attributes["src"].text
# load uri
begin
remote_uri = URI(remote_file)
rescue => error
puts "Error parsing URL #{remote_file}: #{error.message}"
end
# only download if the image is a LFA-hosted image
if remote_uri && remote_uri.host.match(params[:host_match]) != nil
# find a local path for it
local_file = File.expand_path(File.join(Rails.public_path,remote_uri.path))
# only download if not already there or if it's zero bytes
unless File.size?(local_file)
# create local folders if necessary
dirname = File.dirname(local_file)
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end
# save remote file to local
begin
remote_file_io = open(remote_file)
File.open(local_file,'wb'){ |f| f.write(remote_file_io.read) }
puts "Saved file: #{local_file}"
rescue OpenURI::HTTPError => error
puts "Error saving file #{remote_file}: #{error.message}"
end
end
end
end
end end
puts "Finished downloding images from #{@posts.count} posts"
end end
# desc "Reset the cms relevant tables for a clean import" desc "import cms data from a WordPress XML dump"
# task :reset_pages do task :import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
# %w(page_part_translations page_translations page_parts pages).each do |table_name| only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
# p "Truncating #{table_name} ..." dump.pages(only_published).each(&:to_refinery)
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
# end
# end
# desc "import cms data from a WordPress XML dump" # After all pages are persisted we can now create the parent - child
# task :import_pages, :file_name do |task, params| # relationships. This is necessary, as WordPress doesn't dump the pages in
# Rake::Task["environment"].invoke # a correct order.
# dump = WordPressImport::Dump.new(params[:file_name]) dump.pages(only_published).each do |dump_page|
page = ::Page.find(dump_page.post_id)
page.parent_id = dump_page.parent_id
page.save!
end
# only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false Refinery::WordPress::Post.create_blog_page_if_necessary
# dump.pages(only_published).each(&:to_rails)
# # After all pages are persisted we can now create the parent - child ENV["MODEL"] = 'Page'
# # relationships. This is necessary, as WordPress doesn't dump the pages in Rake::Task["friendly_id:redo_slugs"].invoke
# # a correct order. ENV.delete("MODEL")
# dump.pages(only_published).each do |dump_page| end
# page = ::Page.find(dump_page.post_id)
# page.parent_id = dump_page.parent_id
# page.save!
# end
# WordPressImport::Post.create_blog_page_if_necessary desc "reset cms tables and then import cms data from a WordPress XML dump"
task :reset_and_import_pages, :file_name do |task, params|
# ENV["MODEL"] = 'Page' Rake::Task["environment"].invoke
# Rake::Task["friendly_id:redo_slugs"].invoke Rake::Task["wordpress:reset_pages"].invoke
# ENV.delete("MODEL") Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
# end end
# desc "reset cms tables and then import cms data from a WordPress XML dump"
# task :reset_and_import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# Rake::Task["wordpress:reset_pages"].invoke
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
# end
desc "Reset the media relevant tables for a clean import" desc "Reset the media relevant tables for a clean import"
task :reset_media do task :reset_media do
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
%w(rich_rich_files).each do |table_name| %w(images resources).each do |table_name|
p "Truncating #{table_name} ..." p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end end
@ -144,27 +93,14 @@ namespace :wordpress do
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts" desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params| task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = WordPressImport::Dump.new(params[:file_name]) dump = Refinery::WordPress::Dump.new(params[:file_name])
p "Importing #{dump.attachments.each_slice(200).first.count} attachments ..." attachments = dump.attachments.each(&:to_refinery)
attachments = dump.attachments.each_slice(200).first.each(&:to_rails)
unless $ATTACHMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED IMPORTING ATTACHMENTS:"
$ATTACHMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
# parse all created Post and Page bodys and replace the old wordpress media urls # parse all created BlogPost and Page bodys and replace the old wordpress media uls
# with the newly created ones # with the newly created ones
p "Replacing attachment URLs found in posts/pages ..." attachments.each do |attachment|
attachments.each(&:replace_url) attachment.replace_url
unless $REPLACEMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED REPLACING ATTACHMENTS:"
$REPLACEMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end end
end end
@ -176,12 +112,10 @@ namespace :wordpress do
end end
desc "reset and import all data (see the other tasks)" desc "reset and import all data (see the other tasks)"
task :full_import, :file_name, :blog_slug do |task, params| task :full_import, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name],params[:blog_slug]) Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name])
#Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name]) Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
#Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name]) Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end end
end end

View File

@ -1,5 +0,0 @@
module WordPressImport
end
require 'wordpress'

View File

@ -1,15 +1,16 @@
require 'nokogiri' require 'nokogiri'
require "wordpress/railtie" require "wordpress/railtie"
require 'shortcode'
module WordPressImport module Refinery
autoload :Author, 'wordpress/author' module WordPress
autoload :Tag, 'wordpress/tag' autoload :Author, 'wordpress/author'
autoload :Category, 'wordpress/category' autoload :Tag, 'wordpress/tag'
autoload :Page, 'wordpress/page' autoload :Category, 'wordpress/category'
autoload :Post, 'wordpress/post' autoload :Page, 'wordpress/page'
autoload :Comment, 'wordpress/comment' autoload :Post, 'wordpress/post'
autoload :Dump, 'wordpress/dump' autoload :Comment, 'wordpress/comment'
autoload :Attachment, 'wordpress/attachment' autoload :Dump, 'wordpress/dump'
autoload :Attachment, 'wordpress/attachment'
end
end end

View File

@ -1,179 +1,130 @@
module WordPressImport module Refinery
class Attachment module WordPress
attr_reader :node class Attachment
attr_reader :paperclip_image attr_reader :node
attr_reader :paperclip_file attr_reader :refinery_image
attr_reader :refinery_resource
def initialize(node) def initialize(node)
@node = node @node = node
end end
def title def title
node.xpath("title").text node.xpath("title").text
end end
def description def description
node.xpath("description").text node.xpath("description").text
end end
def file_name def file_name
url.split('/').last url.split('/').last
end end
def post_date def post_date
DateTime.parse node.xpath("wp:post_date").text DateTime.parse node.xpath("wp:post_date").text
end end
def url def url
node.xpath("wp:attachment_url").text node.xpath("wp:attachment_url").text
end end
def url_pattern def url_pattern
url_parts = url.split('.') url_parts = url.split('.')
extension = url_parts.pop extension = url_parts.pop
url_without_extension = url_parts.join('.') url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/ /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end end
def image? def image?
url.match /\.(png|jpg|jpeg|gif)$/ url.match /\.(png|jpg|jpeg|gif)$/
end end
def to_rails def to_refinery
begin
if image? if image?
to_image to_image
else else
to_file to_resource
end end
rescue StandardError => ex
message = "ERROR saving attachment #{url} -- #{ex.message}"
p message
$ATTACHMENT_EXCEPTIONS = [] if $ATTACHMENT_EXCEPTIONS.blank?
$ATTACHMENT_EXCEPTIONS << message
return nil
end end
end
def replace_url def replace_url
begin
@occurrance_count = 0
if image? if image?
replace_image_url replace_image_url
else else
replace_resource_url replace_resource_url
end end
p "Replaced #{@occurrance_count} occurrances of #{url}"
rescue StandardError => ex
message = "ERROR replacing URL #{url} -- #{ex.message}"
p message
$REPLACEMENT_EXCEPTIONS = [] if $REPLACEMENT_EXCEPTIONS.blank?
$REPLACEMENT_EXCEPTIONS << message
return nil
end end
end
private private
def rich_file_clean_file_name(full_file_name) def to_image
extension = File.extname(full_file_name).gsub(/^\.+/, '') image = ::Image.new
filename = full_file_name.gsub(/\.#{extension}$/, '')
filename = CGI::unescape(filename)
filename = CGI::unescape(filename)
extension = extension.downcase
filename = filename.downcase.gsub(/[^a-z0-9]+/i, '-')
"#{filename}.#{extension}"
end
def to_image
# avoid duplicates; use our storage system's filename cleaner for lookup
image = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if image.rich_file.instance.id.blank?
p "Importing image #{file_name}"
image.simplified_type = "image"
image.created_at = post_date image.created_at = post_date
image.rich_file = URI.parse(url) image.image_url = url
image.save! image.save!
else
p "image #{file_name} already exists..." @refinery_image = image
image
end end
@paperclip_image = image def to_resource
image resource = ::Resource.new
end resource.created_at = post_date
resource.file_url = url
resource.save!
def to_file @refinery_resource = resource
# avoid duplicates; use our storage system's filename cleaner for lookup resource
file = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if file.rich_file.instance.id.blank?
p "Importing file #{file_name}"
file.created_at = post_date
file.rich_file = URI.parse(url) if file.rich_file.blank?
file.save!
else
p "file #{file_name} already exists..."
end end
@paperclip_file = file def replace_image_url
file replace_image_url_in_blog_posts
end replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url def replace_image_url_in_blog_posts
replace_image_url_in_blog_posts replace_url_in_blog_posts(refinery_image.image.url)
replace_image_url_in_pages end
end
def replace_resource_url def replace_image_url_in_pages
replace_resource_url_in_blog_posts replace_url_in_pages(refinery_image.image.url)
replace_resource_url_in_pages end
end
def replace_image_url_in_blog_posts def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(paperclip_image.rich_file.url) replace_url_in_blog_posts(refinery_resource.file.url)
end end
def replace_image_url_in_pages def replace_resource_url_in_pages
replace_url_in_pages(paperclip_image.rich_file.url) replace_url_in_pages(refinery_resource.file.url)
end end
def replace_resource_url_in_blog_posts def replace_url_in_blog_posts(new_url)
replace_url_in_blog_posts(paperclip_file.rich_file.url) ::BlogPost.all.each do |post|
end if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
def replace_resource_url_in_pages post.save!
replace_url_in_pages(paperclip_file.rich_file.url) end
end
def replace_url_in_blog_posts(new_url)
::Post.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
@occurrance_count += 1
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end end
end end
end
def replace_url_in_pages(new_url) def replace_url_in_pages(new_url)
::Page.all.each do |page| ::Page.all.each do |page|
page.translations.each do |translation| page.parts.each do |part|
translation.parts.each do |part| if (! part.body.to_s.blank?) && part.body.include?(url)
if (! part.content.to_s.blank?) && part.content.include?(url) part.body = part.body.gsub(url_pattern, new_url)
@occurrance_count += 1
part.content = part.content.gsub(url_pattern, new_url)
part.save! part.save!
end end
end end
end end
end end
end
end
end end
end end

View File

@ -1,47 +1,37 @@
module WordPressImport module Refinery
class Author module WordPress
attr_reader :author_node class Author
attr_reader :author_node
def initialize(author_node) def initialize(author_node)
@author_node = author_node @author_node = author_node
end
def name
name = author_node.xpath("wp:author_display_name").text
name = author_node.xpath("wp:author_first_name").text + " " + author_node.xpath("wp:author_first_name").text if name.blank?
name
end
def login
author_node.xpath("wp:author_login").text
end
def email
author_node.xpath("wp:author_email").text
end
def ==(other)
login == other.login
end
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def to_rails
user = ::User.find_or_initialize_by_email(email)
user.wp_username = login
unless user.persisted?
user.name = name
user.password = 'password'
user.password_confirmation = 'password'
end end
user.save
puts "User #{login} imported." def login
author_node.xpath("wp:author_login").text
end
user def email
author_node.xpath("wp:author_email").text
end
def ==(other)
login == other.login
end
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def to_refinery
user = User.find_or_initialize_by_username_and_email(login, email)
unless user.persisted?
user.password = 'password'
user.password_confirmation = 'password'
user.save
end
user
end
end end
end end
end end

View File

@ -1,17 +1,19 @@
module WordPressImport module Refinery
class Category module WordPress
attr_accessor :name class Category
attr_accessor :name
def initialize(text) def initialize(text)
@name = text @name = text
end end
def ==(other) def ==(other)
name == other.name name == other.name
end end
def to_rails def to_refinery
Tag.find_or_create_by_title(name) BlogCategory.find_or_create_by_title(name)
end
end end
end end
end end

View File

@ -1,46 +1,48 @@
module WordPressImport module Refinery
class Comment module WordPress
attr_reader :node class Comment
attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end end
def author def author
node.xpath('wp:comment_author').text node.xpath('wp:comment_author').text
end end
def email def email
node.xpath('wp:comment_author_email').text node.xpath('wp:comment_author_email').text
end end
def url def url
node.xpath('wp:comment_author_url').text node.xpath('wp:comment_author_url').text
end end
def date def date
DateTime.parse node.xpath("wp:comment_date").text DateTime.parse node.xpath("wp:comment_date").text
end end
def content def content
node.xpath('wp:comment_content').text node.xpath('wp:comment_content').text
end end
def approved? def approved?
node.xpath('wp:comment_approved').text.to_i == 1 node.xpath('wp:comment_approved').text.to_i == 1
end end
def ==(other) def ==(other)
(email == other.email) && (date == other.date) && (content == other.content) (email == other.email) && (date == other.date) && (content == other.content)
end end
def to_refinery def to_refinery
comment = BlogComment.new :name => author, :email => email comment = BlogComment.new :name => author, :email => email
comment.body = content comment.body = content
comment.created_at = date comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected' comment.state = approved? ? 'approved' : 'rejected'
comment comment
end
end end
end end
end end

View File

@ -1,62 +1,57 @@
module WordPressImport module Refinery
class Dump module WordPress
attr_reader :doc class Dump
attr_reader :doc
def initialize(file_name) def initialize(file_name)
begin file_name = File.absolute_path(file_name)
file_name = File.expand_path(file_name)
raise "error" unless File.file?(file_name) && File.readable?(file_name) raise "Given file '#{file_name}' no file or not readable." \
rescue unless File.file?(file_name) && File.readable?(file_name)
raise "Given file '#{file_name}' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file']"
file = File.open(file_name)
@doc = Nokogiri::XML(file)
end end
file = File.open(file_name) def authors
doc.xpath("//wp:author").collect do |author|
if file.size >= 10485760 # 10MB Author.new(author)
puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results." end
end end
@doc = Nokogiri::XML(file.read().gsub("\u0004", "")) # get rid of all EOT characters def pages(only_published=false)
end pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end
def authors pages = pages.select(&:published?) if only_published
doc.xpath("//wp:author").collect do |author| pages
Author.new(author)
end
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end end
pages = pages.select(&:published?) if only_published def posts(only_published=false)
pages posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
end Post.new(post)
end
def posts(only_published=false) posts = posts.select(&:published?) if only_published
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post| posts
Post.new(post)
end end
posts = posts.select(&:published?) if only_published
posts
end
def tags def tags
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Tag.new(tag.text) Tag.new(tag.text)
end
end end
end
def categories def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category| doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text) Category.new(category.text)
end
end end
end
def attachments def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment| doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment) Attachment.new(attachment)
end
end end
end end
end end

View File

@ -1,144 +1,107 @@
module WordPressImport module Refinery
class Page module WordPress
include ::ActionView::Helpers::TagHelper class Page
include ::ActionView::Helpers::TextHelper include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TextHelper
attr_reader :node attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def link
node.xpath("link").text
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_shortcodes(format_syntax_highlighter(format_paragraphs(content)))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end end
formatted def inspect
end "WordPress::Page(#{post_id}): #{title}"
def creator
node.xpath("dc:creator").text
end
def post_date
Time.parse node.xpath("wp:post_date").text
end
def publish_date
Time.parse node.xpath("pubDate").text
end
def post_name
node.xpath("wp:post_name").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
#NEED:
# creator -> "user_id"
# wp:post_name -> "slug"
# pubDate -> "published_at"
#OK:
# title -> "title"
# content:encoded -> "body"
# wp:post_date_gmt -> "created_at"
def to_rails
# :user_id => creator
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :slug => post_name,
:published_at => publish_date, :body => content_formatted)
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\n\n+/, "</p>#{start_tag}") # 2+ newline -> paragraph
text.gsub!(/\r?\n/, "<br/>\n") # \r\n and \n -> line break (must be after the paragraph detection to avoid <br/><br/>)
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
# Replace Wordpress shortcodes with formatted HTML (see shortcode gem and support/templates folder)
def format_shortcodes(text)
Shortcode.setup do |config|
# the template parser to use
config.template_parser = :haml # :erb or :haml supported, :haml is default
# location of the template files
config.template_path = ::File.join(::File.dirname(__FILE__), "..", "..","support/templates/haml")
# a list of block tags to support e.g. [quote]Hello World[/quote]
config.block_tags = [:caption, :column]
# a list of self closing tags to support e.g. [youtube id="12345"]
config.self_closing_tags = [:end_columns, "google-map-v3"]
end end
Shortcode.process(text) def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
formatted
end
def creator
node.xpath("dc:creator").text
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
def to_refinery
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :draft => draft?)
page.parts.create(:title => 'Body', :body => content_formatted)
page
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
end end
end end
end end

View File

@ -1,67 +1,85 @@
module WordPressImport module Refinery
class Post < Page module WordPress
def tags class Post < Page
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0 def tags
path = if node.xpath("category[@domain='post_tag']").count > 0 # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
"category[@domain='post_tag']" path = if node.xpath("category[@domain='post_tag']").count > 0
else "category[@domain='post_tag']"
"category[@domain='tag']" else
"category[@domain='tag']"
end
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
end end
node.xpath(path).collect do |tag_node| def tag_list
Tag.new(tag_node.text) tags.collect(&:name).join(',')
end end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:author => user, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end end
def tag_list
tags.collect(&:name).join(',')
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
# blog_slug is used to identify which blog this import is from
def to_rails(blog_slug)
user = ::User.find_by_wp_username(creator)
if user.nil?
raise "User with wp_username #{creator} not found"
end
post = ::Post.create({
:wp_post_id => post_id, :slug => post_name,
:user_id => user.id, :title => title,
:created_at => post_date,
:published_at => publish_date,
:wp_link => link,
:wp_blog => blog_slug,
:translations_attributes => { "0" => {
:locale => "en",
:title => title,
:body => content_formatted,
# merge the translation's category list with the wordpress post's
:category_list => categories.collect(&:name) | tags.collect(&:name)
}}
})
if post.errors.blank?
puts "Post #{post_name} imported."
return post.reload
else
puts post.inspect
raise post.errors.full_messages.to_s
end
end
end end
end end

View File

@ -1,7 +1,9 @@
module WordPressImport module Refinery
class Railtie < Rails::Railtie module WordPress
rake_tasks do class Railtie < Rails::Railtie
load "tasks/wordpress.rake" rake_tasks do
load "tasks/wordpress.rake"
end
end end
end end
end end

View File

@ -1,18 +1,20 @@
module WordPressImport module Refinery
class Tag module WordPress
attr_accessor :name class Tag
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
def initialize(text)
@name = text
end end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
end end
end end

View File

@ -0,0 +1,23 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "refinerycms-wordpress-import"
s.summary = "Import WordPress XML dumps into refinerycms(-blog)."
s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.3.0"
s.date = "2011-06-13"
s.authors = ['Marc Remolt']
s.email = 'marc.remolt@googlemail.com'
s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'refinerycms', '~> 1.0.0'
s.add_dependency 'refinerycms-blog', '~> 1.5.2'
s.add_dependency 'nokogiri', '~> 1.4.4'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end

View File

@ -1,3 +1,6 @@
require 'yaml'
YAML::ENGINE.yamler= 'syck'
require 'rubygems' require 'rubygems'
gemfile = File.expand_path('../../../../Gemfile', __FILE__) gemfile = File.expand_path('../../../../Gemfile', __FILE__)

View File

@ -24,6 +24,4 @@ Dummy::Application.configure do
config.action_dispatch.best_standards_support = :builtin config.action_dispatch.best_standards_support = :builtin
end end
Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine Refinery.rescue_not_found = false
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -47,6 +47,4 @@ Dummy::Application.configure do
# Send deprecation notices to registered listeners # Send deprecation notices to registered listeners
config.active_support.deprecation = :notify config.active_support.deprecation = :notify
end end
Refinery.rescue_not_found = true# When true will use Amazon's Simple Storage Service on your production machine Refinery.rescue_not_found = true
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -33,6 +33,4 @@ Dummy::Application.configure do
# Print deprecation notices to the stderr # Print deprecation notices to the stderr
config.active_support.deprecation = :stderr config.active_support.deprecation = :stderr
end end
Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine Refinery.rescue_not_found = false
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -1,11 +0,0 @@
class AddCachedSlugs < ActiveRecord::Migration
def self.up
add_column :blog_categories, :cached_slug, :string
add_column :blog_posts, :cached_slug, :string
end
def self.down
remove_column :blog_categories, :cached_slug
remove_column :blog_posts, :cached_slug
end
end

View File

@ -1,9 +0,0 @@
class AddCustomUrlFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_url, :string
end
def self.down
remove_column :blog_posts, :custom_url
end
end

View File

@ -1,10 +0,0 @@
class AddCustomTeaserFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_teaser, :text
end
def self.down
remove_column :blog_posts, :custom_teaser
end
end

View File

@ -10,13 +10,12 @@
# #
# It's strongly recommended to check this file into your version control system. # It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110812162204) do ActiveRecord::Schema.define(:version => 20110602094445) do
create_table "blog_categories", :force => true do |t| create_table "blog_categories", :force => true do |t|
t.string "title" t.string "title"
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.string "cached_slug"
end end
add_index "blog_categories", ["id"], :name => "index_blog_categories_on_id" add_index "blog_categories", ["id"], :name => "index_blog_categories_on_id"
@ -49,9 +48,6 @@ ActiveRecord::Schema.define(:version => 20110812162204) do
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.integer "user_id" t.integer "user_id"
t.string "cached_slug"
t.string "custom_url"
t.text "custom_teaser"
end end
add_index "blog_posts", ["id"], :name => "index_blog_posts_on_id" add_index "blog_posts", ["id"], :name => "index_blog_posts_on_id"

View File

@ -1,12 +1,3 @@
::Page.reset_column_information
# Check whether all columns are applied yet by seo_meta.
unless !defined?(::SeoMeta) || ::SeoMeta.attributes.keys.all? { |k|
::Page.translation_class.instance_methods.include?(k)
}
# Make pages model seo_meta because not all columns are accessible.
::Page.translation_class.send :is_seo_meta
end
page_position = -1 page_position = -1
home_page = Page.create(:title => "Home", home_page = Page.create(:title => "Home",

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Attachment, :type => :model do describe Refinery::WordPress::Attachment, :type => :model do
context "an image attchment" do context "an image attchment" do
let(:attachment) { test_dump.attachments.first } let(:attachment) { test_dump.attachments.first }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Author, :type => :model do describe Refinery::WordPress::Author, :type => :model do
let(:author) { test_dump.authors.first } let(:author) { test_dump.authors.first }
specify { author.login.should == 'admin' } specify { author.login.should == 'admin' }

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Category, :type => :model do describe Refinery::WordPress::Category, :type => :model do
let(:category) { WordPressImport::Category.new('Rant') } let(:category) { Refinery::WordPress::Category.new('Rant') }
describe "#name" do describe "#name" do
specify { category.name.should == 'Rant' } specify { category.name.should == 'Rant' }
end end
describe "#==" do describe "#==" do
specify { category.should == WordPressImport::Category.new('Rant') } specify { category.should == Refinery::WordPress::Category.new('Rant') }
specify { category.should_not == WordPressImport::Category.new('Tutorials') } specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -1,10 +1,10 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Dump, :type => :model do describe Refinery::WordPress::Dump, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
it "should create a Dump object given a xml file" do it "should create a Dump object given a xml file" do
dump.should be_a WordPressImport::Dump dump.should be_a Refinery::WordPress::Dump
end end
it "should include a Nokogiri::XML object" do it "should include a Nokogiri::XML object" do
@ -13,12 +13,12 @@ describe WordPressImport::Dump, :type => :model do
describe "#tags" do describe "#tags" do
let(:tags) do let(:tags) do
[ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'), [ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'),
WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')] Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')]
end end
specify { dump.tags.count == 4 } specify { dump.tags.count == 4 }
specify { dump.tags.first.should be_a(WordPressImport::Tag) } specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) }
it "should return all included tags" do it "should return all included tags" do
tags.each do |tag| tags.each do |tag|
@ -29,12 +29,12 @@ describe WordPressImport::Dump, :type => :model do
describe "#categories" do describe "#categories" do
let(:categories) do let(:categories) do
[ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'), [ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'),
WordPressImport::Category.new('Uncategorized') ] Refinery::WordPress::Category.new('Uncategorized') ]
end end
specify { dump.categories.count == 4 } specify { dump.categories.count == 4 }
specify { dump.categories.first.should be_a(WordPressImport::Category) } specify { dump.categories.first.should be_a(Refinery::WordPress::Category) }
it "should return all included categories" do it "should return all included categories" do
categories.each do |cat| categories.each do |cat|
@ -48,7 +48,7 @@ describe WordPressImport::Dump, :type => :model do
dump.pages.should have(3).pages dump.pages.should have(3).pages
end end
specify { dump.pages.first.should be_a(WordPressImport::Page) } specify { dump.pages.first.should be_a(Refinery::WordPress::Page) }
it "should return only published pages with only_published=true" do it "should return only published pages with only_published=true" do
dump.pages(true).should have(2).pages dump.pages(true).should have(2).pages
@ -60,7 +60,7 @@ describe WordPressImport::Dump, :type => :model do
dump.authors.should have(1).author dump.authors.should have(1).author
end end
specify { dump.authors.first.should be_a(WordPressImport::Author) } specify { dump.authors.first.should be_a(Refinery::WordPress::Author) }
end end
describe "#posts" do describe "#posts" do
@ -68,7 +68,7 @@ describe WordPressImport::Dump, :type => :model do
dump.posts.should have(3).posts dump.posts.should have(3).posts
end end
specify { dump.posts.first.should be_a(WordPressImport::Post) } specify { dump.posts.first.should be_a(Refinery::WordPress::Post) }
it "should return only published posts with only_published=true" do it "should return only published posts with only_published=true" do
dump.posts(true).should have(2).posts dump.posts(true).should have(2).posts
@ -80,6 +80,6 @@ describe WordPressImport::Dump, :type => :model do
dump.attachments.should have(2).attachments dump.attachments.should have(2).attachments
end end
specify { dump.attachments.first.should be_a(WordPressImport::Attachment) } specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) }
end end
end end

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Page, :type => :model do describe Refinery::WordPress::Page, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
let(:page) { test_dump.pages.last } let(:page) { test_dump.pages.last }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Post, :type => :model do describe Refinery::WordPress::Post, :type => :model do
let(:post) { test_dump.posts.last } let(:post) { test_dump.posts.last }
specify { post.title.should == 'Third blog post' } specify { post.title.should == 'Third blog post' }
@ -17,15 +17,15 @@ describe WordPressImport::Post, :type => :model do
describe "#categories" do describe "#categories" do
specify { post.categories.should have(1).category } specify { post.categories.should have(1).category }
specify { post.categories.first.should == WordPressImport::Category.new('Rant') } specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') }
end end
describe "#tags" do describe "#tags" do
specify { post.tags.should have(3).tags } specify { post.tags.should have(3).tags }
specify { post.tags.should include(WordPressImport::Tag.new('css')) } specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) }
specify { post.tags.should include(WordPressImport::Tag.new('html')) } specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) }
specify { post.tags.should include(WordPressImport::Tag.new('php')) } specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) }
end end
specify { post.tag_list.should == 'css,html,php' } specify { post.tag_list.should == 'css,html,php' }
@ -63,7 +63,6 @@ describe WordPressImport::Post, :type => :model do
@comment.body.should == comment.content @comment.body.should == comment.content
@comment.state.should == 'approved' @comment.state.should == 'approved'
@comment.created_at.should == comment.date @comment.created_at.should == comment.date
@comment.created_at.should == comment.date
end end
end end
end end
@ -80,13 +79,16 @@ describe WordPressImport::Post, :type => :model do
@post = post.to_refinery @post = post.to_refinery
end end
specify { BlogPost.should have(1).record } specify { BlogPost.should have(1).record }
specify { @post.title.should == post.title } it "should copy the attributes from Refinery::WordPress::Post" do
specify { @post.body.should == post.content_formatted } @post.title.should == post.title
specify { @post.draft.should == post.draft? } @post.body.should == post.content_formatted
specify { @post.published_at.should == post.post_date } @post.draft.should == post.draft?
specify { @post.author.username.should == post.creator } @post.published_at.should == post.post_date
@post.created_at.should == post.post_date
@post.author.username.should == post.creator
end
it "should assign a category for each Refinery::WordPress::Category" do it "should assign a category for each Refinery::WordPress::Category" do
@post.categories.should have(post.categories.count).records @post.categories.should have(post.categories.count).records

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe WordPressImport::Tag, :type => :model do describe Refinery::WordPress::Tag, :type => :model do
let(:tag) { WordPressImport::Tag.new('ruby') } let(:tag) { Refinery::WordPress::Tag.new('ruby') }
describe "#name" do describe "#name" do
specify { tag.name.should == 'ruby' } specify { tag.name.should == 'ruby' }
end end
describe "#==" do describe "#==" do
specify { tag.should == WordPressImport::Tag.new('ruby') } specify { tag.should == Refinery::WordPress::Tag.new('ruby') }
specify { tag.should_not == WordPressImport::Tag.new('php') } specify { tag.should_not == Refinery::WordPress::Tag.new('php') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -0,0 +1,7 @@
require 'spec_helper'
describe Refinery::WordPress do
it "should be valid" do
Refinery::WordPress.should be_a(Module)
end
end

View File

@ -1,11 +1,11 @@
module WordPressImport::SpecHelpers module Refinery::WordPress::SpecHelpers
def test_dump def test_dump
file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml')) file_name = File.realpath(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
WordPressImport::Dump.new(file_name) Refinery::WordPress::Dump.new(file_name)
end end
end end
RSpec.configure do |config| RSpec.configure do |config|
config.include WordPressImport::SpecHelpers config.include Refinery::WordPress::SpecHelpers
end end

View File

@ -1,7 +0,0 @@
require 'spec_helper'
describe WordPressImport do
it "should be valid" do
WordPressImport.should be_a(Module)
end
end

View File

@ -1,24 +0,0 @@
-# disabling style_hash for now; setting the width = width+10 doesn't seem to actually be a good thing
-# style_hash = {:style => "width: #{@attributes[:width].to_i+10}px"} unless @attributes[:width].blank?
- figure_hash = {:class => @attributes[:align], :id => @attributes[:id]}
-# figure_hash = figure_hash.merge(style_hash) if style_hash
- content_image = Nokogiri::HTML(@content).css("img").to_html
- content_caption = Nokogiri::HTML(@content).text + " #{@attributes[:caption]}"
- @content = "#{content_image} <figcaption>#{content_caption}</figcaption>" unless content_image.blank?
%figure{figure_hash}= @content
-# sample wordpress-y css to go along with this html:
figure {
background: #f1f1f1;
margin-bottom: 20px;
padding: 4px;
text-align: center;
}
figure img {
margin: 5px 5px 0;
}
figure figcaption {
color: #777;
font-size: 12px;
margin: 5px 5px 24px;
}

View File

@ -1,4 +0,0 @@
.post_column_1{:style =>"width:#{@attributes[:width]}; float: left; padding: #{@attributes[:padding]} 6% #{@attributes[:padding]} #{@attributes[:padding]}; display: inline;"}= @content
-#[column width="47%" padding="0"] foo [/column]
-# <div class="post_column_1">

View File

@ -1 +0,0 @@
<div style="clear: both;"></div>

View File

@ -1,37 +0,0 @@
- addmarkerlist = @attributes[:addmarkerlist].to_s || ""
- name = addmarkerlist.split("{}").last
- uri_name = URI::encode(name)
- uri_address = URI::encode(addmarkerlist.split("{}").first)
-# google will open up the business page if we pass it a name
- uri_address = "#{uri_name},%20#{uri_address}" unless (uri_name == uri_address || name.blank?)
- gmaps_url = "https://maps.google.com/maps/?q=#{uri_address}&amp;ie=UTF8&amp;t=m"
<iframe src="#{gmaps_url}&amp;output=embed" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" width="#{@attributes[:width]}" height="#{@attributes[:height]}"></iframe>
<small>View <a href="#{gmaps_url}">#{name}</a> in a larger map</small>
-# [google-map-v3 width="425"
height="350"
zoom="12"
maptype="roadmap"
mapalign="left"
directionhint="false"
language="default"
poweredby="false"
maptypecontrol="true"
pancontrol="true"
zoomcontrol="true"
scalecontrol="true"
streetviewcontrol="true"
scrollwheelcontrol="false"
draggable="true"
tiltfourtyfive="false"
addmarkermashupbubble="false"
addmarkermashupbubble="false"
addmarkerlist="#1 Dream Manor Dr Globe, AZ{}1-default.png{}Dream Manor Inn"
bubbleautopan="true"
showbike="false"
showtraffic="false"
showpanoramio="false"]

View File

@ -1,22 +0,0 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "wordpress-import"
s.summary = "Import WordPress XML dumps into your Ruby on Rails app."
s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.4"
s.date = "2014-03-17"
s.authors = ['Will Bradley']
s.email = 'bradley.will@gmail.com'
s.homepage = 'https://github.com/zyphlar/wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'nokogiri', '~> 1.6.0'
s.add_dependency 'shortcode', '~> 0.1.1'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end