Compare commits

...

29 Commits

Author SHA1 Message Date
05bf79536d Updating shortcode gem version 2014-03-18 14:09:38 -07:00
249de047b2 Updating gem version 2014-03-17 23:28:50 -07:00
9aa403612e Adding wordpress shortcode support 2014-03-17 23:27:02 -07:00
201233f89f Filter EOT characters from import 2014-03-12 18:59:31 -07:00
ede3baae64 Version 2014-03-12 15:57:41 -07:00
6382ceb7dc Minor filename tweak 2014-03-12 15:57:20 -07:00
10df4f5494 Tweaking error handling and reporting 2014-03-12 15:14:14 -07:00
3807af7ff3 Finishing rake task for image scraping/download 2014-03-12 12:56:25 -07:00
84ba9eac1c Version increment 2014-03-12 11:12:30 -07:00
0031b7046f Adding download post images task 2014-03-12 11:11:07 -07:00
5d96f3a336 Updating with more rake tweaks and some post customizations 2014-03-11 17:57:28 -07:00
39914c83c9 Adding categories/tags 2014-03-11 15:17:46 -07:00
5f564bdbff Starting categories 2014-03-11 12:03:51 -07:00
ce3e947b83 Todo update 2014-03-06 18:23:22 -07:00
c06bc8142d Fixing occurrance count and error reporting in attachments 2014-03-06 18:20:09 -07:00
4b36eaeccf Not sure what the diff is here, but it seems to work now 2014-03-06 17:37:38 -07:00
d4b2457787 Changing attachment to use paperclip 2014-03-06 17:25:39 -07:00
bd4cccd91a Refining post/author/dump behavior 2014-03-05 21:27:57 -07:00
60ce62ad1b Beginning to remove refinery stuff 2014-03-04 15:47:53 -07:00
Marc Remolt
1e1f3574eb Updated nokogiri dependency to 1.5.0 2011-08-12 19:29:05 +02:00
Marc Remolt
0beaa90a3d Trying to make the gem 1.8.7 compatible
* replaced File.realpath in testsuite with File.expand_path
* removed the old YAML hack
2011-08-12 19:15:46 +02:00
Marc Remolt
6188f58706 Trying to test under multiple ruby versions 2011-08-12 18:57:41 +02:00
Marc Remolt
b2700960d8 Updated to latest refinerycms(-blog)
* bundle update
* new migrations
* fixed specs for new behaviour
2011-08-12 18:44:00 +02:00
Marc Remolt
d3352df3a5 Added 1.9.2 as build base to travis config 2011-07-27 14:12:30 +02:00
Marc Remolt
e02aeefeb5 Removed RDoc rake tasks (not needed for now) 2011-07-27 14:07:54 +02:00
Marc Remolt
cdbb1d001a Merge branch 'master' of github.com:mremolt/refinerycms-wordpress-import 2011-07-27 13:56:25 +02:00
Marc Remolt
054e7f3b91 Bundle update 2011-07-27 13:55:52 +02:00
Marc Remolt
ec1162fc4b Merge pull request #4 from wikyd/ruby_1_8_7
Change to make compatible with Ruby 1.8.7
2011-06-21 00:24:05 -07:00
Kyle Wilkinson
368561cfc7 Use File.expand_path instead of File.absolute_path to work with Ruby 1.8.7 2011-06-20 22:53:26 -07:00
45 changed files with 983 additions and 754 deletions

4
.travis.yml Normal file
View File

@ -0,0 +1,4 @@
rvm:
- 1.9.2
- 1.8.7
- ruby-head

10
Gemfile
View File

@ -1,11 +1,15 @@
source "http://rubygems.org" source "http://rubygems.org"
gem "rails", "3.0.7" ruby '2.0.0'
gem "rails", "4.0.0"
#gem "capybara", ">= 1.0.0.beta1" #gem "capybara", ">= 1.0.0.beta1"
gem "sqlite3" gem "sqlite3"
gem "rmagick" gem "rmagick"
gem 'shortcode', "0.1.2"
group :development, :test do group :development, :test do
gem 'byebug'
gem "rspec-rails", ">= 2.6.0" gem "rspec-rails", ">= 2.6.0"
gem "database_cleaner" gem "database_cleaner"
gem 'guard-rspec' gem 'guard-rspec'
@ -19,6 +23,4 @@ end
# gem 'ruby-debug' # gem 'ruby-debug'
# gem 'ruby-debug19' # gem 'ruby-debug19'
gem 'refinerycms' gem 'wordpress-import', :path => './'
gem 'refinerycms-blog'
gem 'refinerycms-wordpress-import', :path => './'

View File

@ -1,197 +1,176 @@
PATH PATH
remote: ./ remote: ./
specs: specs:
refinerycms-wordpress-import (0.3.0) wordpress-import (0.4.4)
bundler (~> 1.0) bundler (~> 1.0)
nokogiri (~> 1.4.4) nokogiri (~> 1.6.0)
refinerycms (~> 1.0.0) shortcode (~> 0.1.1)
refinerycms-blog (~> 1.5.2)
GEM GEM
remote: http://rubygems.org/ remote: http://rubygems.org/
specs: specs:
abstract (1.0.0) actionmailer (4.0.0)
actionmailer (3.0.7) actionpack (= 4.0.0)
actionpack (= 3.0.7) mail (~> 2.5.3)
mail (~> 2.2.15) actionpack (4.0.0)
actionpack (3.0.7) activesupport (= 4.0.0)
activemodel (= 3.0.7) builder (~> 3.1.0)
activesupport (= 3.0.7) erubis (~> 2.7.0)
builder (~> 2.1.2) rack (~> 1.5.2)
erubis (~> 2.6.6) rack-test (~> 0.6.2)
i18n (~> 0.5.0) activemodel (4.0.0)
rack (~> 1.2.1) activesupport (= 4.0.0)
rack-mount (~> 0.6.14) builder (~> 3.1.0)
rack-test (~> 0.5.7) activerecord (4.0.0)
tzinfo (~> 0.3.23) activemodel (= 4.0.0)
activemodel (3.0.7) activerecord-deprecated_finders (~> 1.0.2)
activesupport (= 3.0.7) activesupport (= 4.0.0)
builder (~> 2.1.2) arel (~> 4.0.0)
i18n (~> 0.5.0) activerecord-deprecated_finders (1.0.3)
activerecord (3.0.7) activesupport (4.0.0)
activemodel (= 3.0.7) i18n (~> 0.6, >= 0.6.4)
activesupport (= 3.0.7) minitest (~> 4.2)
arel (~> 2.0.2) multi_json (~> 1.3)
tzinfo (~> 0.3.23) thread_safe (~> 0.1)
activeresource (3.0.7) tzinfo (~> 0.3.37)
activemodel (= 3.0.7) arel (4.0.2)
activesupport (= 3.0.7) atomic (1.1.15)
activesupport (3.0.7) blankslate (2.1.2.4)
acts-as-taggable-on (2.0.6) builder (3.1.4)
acts_as_indexed (0.7.2) byebug (2.7.0)
arel (2.0.10) columnize (~> 0.3)
awesome_nested_set (2.0.0) debugger-linecache (~> 1.2)
activerecord (>= 3.0.0) celluloid (0.15.2)
babosa (0.3.5) timers (~> 1.1.0)
bcrypt-ruby (2.1.4) celluloid-io (0.15.0)
builder (2.1.2) celluloid (>= 0.15.0)
database_cleaner (0.6.7) nio4r (>= 0.5.0)
devise (1.3.4) coderay (1.1.0)
bcrypt-ruby (~> 2.1.2) columnize (0.3.6)
orm_adapter (~> 0.0.3) database_cleaner (1.2.0)
warden (~> 1.0.3) debugger-linecache (1.2.0)
diff-lcs (1.1.2) diff-lcs (1.2.5)
dragonfly (0.9.4) erubis (2.7.0)
rack
erubis (2.6.6)
abstract (>= 1.0.0)
fakeweb (1.3.0) fakeweb (1.3.0)
ffi (1.0.9) ffi (1.9.3)
filters_spam (0.3) formatador (0.2.4)
friendly_id_globalize3 (3.2.1.3) guard (2.5.1)
babosa (~> 0.3.0) formatador (>= 0.2.4)
globalize3 (0.1.0) listen (~> 2.6)
activemodel (>= 3.0.0) lumberjack (~> 1.0)
activerecord (>= 3.0.0) pry (>= 0.9.12)
guard (0.4.2) thor (>= 0.18.1)
thor (~> 0.14.6) guard-bundler (2.0.0)
guard-bundler (0.1.3) bundler (~> 1.0)
bundler (>= 1.0.0) guard (~> 2.2)
guard (>= 0.2.2) guard-rspec (4.2.8)
guard-rspec (0.4.0) guard (~> 2.1)
guard (>= 0.4.0) rspec (>= 2.14, < 4.0)
i18n (0.5.0) haml (4.0.5)
libnotify (0.5.5) tilt
mail (2.2.19) hike (1.2.3)
activesupport (>= 2.3.6) i18n (0.6.9)
i18n (>= 0.4.0) libnotify (0.8.2)
ffi (>= 1.0.11)
listen (2.7.0)
celluloid (>= 0.15.2)
celluloid-io (>= 0.15.0)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
lumberjack (1.0.4)
mail (2.5.4)
mime-types (~> 1.16) mime-types (~> 1.16)
treetop (~> 1.4.8) treetop (~> 1.4.8)
mime-types (1.16) method_source (0.8.2)
nokogiri (1.4.4) mime-types (1.25.1)
orm_adapter (0.0.5) mini_portile (0.5.2)
polyglot (0.3.1) minitest (4.7.5)
rack (1.2.3) multi_json (1.9.0)
rack-cache (1.0.2) nio4r (1.0.0)
rack (>= 0.4) nokogiri (1.6.1)
rack-mount (0.6.14) mini_portile (~> 0.5.0)
rack (>= 1.0.0) parslet (1.5.0)
rack-test (0.5.7) blankslate (~> 2.0)
polyglot (0.3.4)
pry (0.9.12.6)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0) rack (>= 1.0)
rails (3.0.7) rails (4.0.0)
actionmailer (= 3.0.7) actionmailer (= 4.0.0)
actionpack (= 3.0.7) actionpack (= 4.0.0)
activerecord (= 3.0.7) activerecord (= 4.0.0)
activeresource (= 3.0.7) activesupport (= 4.0.0)
activesupport (= 3.0.7) bundler (>= 1.3.0, < 2.0)
bundler (~> 1.0) railties (= 4.0.0)
railties (= 3.0.7) sprockets-rails (~> 2.0.0)
railties (3.0.7) railties (4.0.0)
actionpack (= 3.0.7) actionpack (= 4.0.0)
activesupport (= 3.0.7) activesupport (= 4.0.0)
rake (>= 0.8.7) rake (>= 0.8.7)
thor (~> 0.14.4) thor (>= 0.18.1, < 2.0)
rake (0.9.2) rake (10.1.1)
refinerycms (1.0.0) rb-fsevent (0.9.4)
bundler (~> 1.0) rb-inotify (0.9.3)
refinerycms-authentication (= 1.0.0) ffi (>= 0.5.0)
refinerycms-base (= 1.0.0) rmagick (2.13.2)
refinerycms-core (= 1.0.0) rspec (2.14.1)
refinerycms-dashboard (= 1.0.0) rspec-core (~> 2.14.0)
refinerycms-images (= 1.0.0) rspec-expectations (~> 2.14.0)
refinerycms-pages (= 1.0.0) rspec-mocks (~> 2.14.0)
refinerycms-resources (= 1.0.0) rspec-core (2.14.8)
refinerycms-settings (= 1.0.0) rspec-expectations (2.14.5)
refinerycms-authentication (1.0.0) diff-lcs (>= 1.1.3, < 2.0)
devise (~> 1.3.0) rspec-mocks (2.14.6)
friendly_id_globalize3 (~> 3.2.1) rspec-rails (2.14.1)
refinerycms-core (= 1.0.0) actionpack (>= 3.0)
refinerycms-base (1.0.0) activemodel (>= 3.0)
refinerycms-blog (1.5.2) activesupport (>= 3.0)
acts-as-taggable-on railties (>= 3.0)
filters_spam (~> 0.2) rspec-core (~> 2.14.0)
refinerycms-core (~> 1.0.0) rspec-expectations (~> 2.14.0)
seo_meta (~> 1.1.0) rspec-mocks (~> 2.14.0)
refinerycms-core (1.0.0) shortcode (0.1.2)
acts_as_indexed (~> 0.7) haml (~> 4.0)
awesome_nested_set (~> 2.0) parslet (= 1.5.0)
friendly_id_globalize3 (~> 3.2.1) slop (3.5.0)
globalize3 (~> 0.1) sprockets (2.12.0)
rails (~> 3.0.7) hike (~> 1.2)
refinerycms-base (= 1.0.0) multi_json (~> 1.0)
refinerycms-generators (~> 1.0) rack (~> 1.0)
refinerycms-settings (= 1.0.0) tilt (~> 1.1, != 1.3.0)
truncate_html (~> 0.5) sprockets-rails (2.0.1)
will_paginate (~> 3.0.pre) actionpack (>= 3.0)
refinerycms-dashboard (1.0.0) activesupport (>= 3.0)
refinerycms-core (= 1.0.0) sprockets (~> 2.8)
refinerycms-generators (1.0.2) sqlite3 (1.3.9)
refinerycms-images (1.0.0) thor (0.18.1)
dragonfly (~> 0.9.0) thread_safe (0.2.0)
rack-cache (>= 0.5.3) atomic (>= 1.1.7, < 2)
refinerycms-core (= 1.0.0) tilt (1.4.1)
refinerycms-pages (1.0.0) timers (1.1.0)
awesome_nested_set (~> 2.0) treetop (1.4.15)
friendly_id_globalize3 (~> 3.2.1) polyglot
globalize3 (~> 0.1)
refinerycms-core (= 1.0.0)
seo_meta (~> 1.1)
refinerycms-resources (1.0.0)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.0)
refinerycms-settings (1.0.0)
refinerycms-base (= 1.0.0)
rmagick (2.13.1)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.4)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.6.0)
rspec-rails (2.6.1)
actionpack (~> 3.0)
activesupport (~> 3.0)
railties (~> 3.0)
rspec (~> 2.6.0)
seo_meta (1.1.1)
refinerycms-generators (~> 1.0.1)
sqlite3 (1.3.3)
thor (0.14.6)
treetop (1.4.9)
polyglot (>= 0.3.1) polyglot (>= 0.3.1)
truncate_html (0.5.1) tzinfo (0.3.39)
tzinfo (0.3.27)
warden (1.0.4)
rack (>= 1.0)
will_paginate (3.0.pre2)
PLATFORMS PLATFORMS
ruby ruby
DEPENDENCIES DEPENDENCIES
byebug
database_cleaner database_cleaner
fakeweb fakeweb
ffi ffi
guard-bundler guard-bundler
guard-rspec guard-rspec
libnotify libnotify
rails (= 3.0.7) rails (= 4.0.0)
refinerycms
refinerycms-blog
refinerycms-wordpress-import!
rmagick rmagick
rspec-rails (>= 2.6.0) rspec-rails (>= 2.6.0)
shortcode (= 0.1.2)
sqlite3 sqlite3
wordpress-import!

View File

@ -1,6 +1,12 @@
# A sample Guardfile # A sample Guardfile
# More info at https://github.com/guard/guard#readme # More info at https://github.com/guard/guard#readme
guard 'bundler' do
watch('Gemfile')
# Uncomment next line if Gemfile contain `gemspec' command
watch(/^.+\.gemspec/)
end
guard 'rspec', :version => 2 do guard 'rspec', :version => 2 do
watch(%r{^spec/.+_spec\.rb$}) watch(%r{^spec/.+_spec\.rb$})
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" } watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
@ -8,19 +14,8 @@ guard 'rspec', :version => 2 do
# Rails example # Rails example
watch(%r{^spec/.+_spec\.rb$}) watch(%r{^spec/.+_spec\.rb$})
watch(%r{^app/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" } watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
watch(%r{^app/controllers/(.+)_(controller)\.rb$}) { |m| ["spec/routing/#{m[1]}_routing_spec.rb", "spec/#{m[2]}s/#{m[1]}_#{m[2]}_spec.rb", "spec/acceptance/#{m[1]}_spec.rb"] }
watch(%r{^spec/support/(.+)\.rb$}) { "spec" } watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
watch('spec/spec_helper.rb') { "spec" } watch('spec/spec_helper.rb') { "spec" }
watch('config/routes.rb') { "spec/routing" }
watch('app/controllers/application_controller.rb') { "spec/controllers" }
# Capybara request specs
watch(%r{^app/views/(.+)/.*\.(erb|haml)$}) { |m| "spec/requests/#{m[1]}_spec.rb" }
end end
guard 'bundler' do
watch('Gemfile')
# Uncomment next line if Gemfile contain `gemspec' command
watch(/^.+\.gemspec/)
end

View File

@ -1,4 +1,5 @@
Copyright 2011 YOURNAME Copyright 2014 Will Bradley
portions Copyright 2011 Marc Remolt
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View File

@ -1,31 +1,32 @@
= Refinerycms-wordpress-import = Wordpress-import
This litte project is an importer for WordPress XML dumps into refinerycms(-blog). This little project is an importer for WordPress XML dumps into Rails.
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import It's been somewhat customized for one particular project; you probably want to fork this and modify it to fit your app's schema.
It's a fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
You can find the source code on github: https://github.com/zyphlar/wordpress-import
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags. Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
If your site (blog) structure uses new urls, the links WILL break! For example, if you used If your site (blog) structure uses new urls, the links WILL break! For example, if you used
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug". the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
So your inner site links will point to the old WP url. So your inner site links will point to the old WP url.
== Prerequisites == Prerequisites
As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it. TODO
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
the site is running, all migrations are run and you created the first refinery user.
== Installation == Installation
Just add the gem to your projects Gemfile: Just add the gem to your projects Gemfile:
gem 'refinerycms-wordpress-import' gem 'wordpress-import'
Or if you want to stay on the bleeding edge: Or if you want to stay on the bleeding edge:
gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git' gem 'wordpress-import', :git => 'git://github.com/zyphlar/wordpress-import.git'
and run and run

View File

@ -7,7 +7,7 @@ rescue LoadError
end end
require 'rake' require 'rake'
require 'rdoc/task' #require 'rdoc/task'
require 'rspec/core' require 'rspec/core'
require 'rspec/core/rake_task' require 'rspec/core/rake_task'
@ -16,10 +16,10 @@ RSpec::Core::RakeTask.new(:spec)
task :default => :spec task :default => :spec
Rake::RDocTask.new(:rdoc) do |rdoc| #RDoc::Task.new(:rdoc) do |rdoc|
rdoc.rdoc_dir = 'rdoc' #rdoc.rdoc_dir = 'rdoc'
rdoc.title = 'Refinerycms-wordpress-import' #rdoc.title = 'Refinerycms-wordpress-import'
rdoc.options << '--line-numbers' << '--inline-source' #rdoc.options << '--line-numbers' << '--inline-source'
rdoc.rdoc_files.include('README.rdoc') #rdoc.rdoc_files.include('README.rdoc')
rdoc.rdoc_files.include('lib/**/*.rb') #rdoc.rdoc_files.include('lib/**/*.rb')
end #end

View File

@ -1,7 +0,0 @@
module Refinery
module WordPress
end
end
require 'wordpress'

View File

@ -5,86 +5,137 @@ namespace :wordpress do
task :reset_blog do task :reset_blog do
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
%w(taggings tags blog_comments blog_categories blog_categories_blog_posts %w(posts post_translations taggings tags).each do |table_name|
blog_posts).each do |table_name|
p "Truncating #{table_name} ..." p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end end
end end
desc "import blog data from a Refinery::WordPress XML dump" desc "import blog data from a WordPressImport XML dump"
task :import_blog, :file_name do |task, params| task :import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) p "Loading XML from #{params[:file_name]} (using blog #{params[:blog_slug]}) ..."
dump = WordPressImport::Dump.new(params[:file_name])
dump.authors.each(&:to_refinery) p "Importing #{dump.authors.count} authors ..."
dump.authors.each(&:to_rails)
# by default, import all; unless $ONLY_PUBLISHED = "true"
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.posts(only_published).each(&:to_refinery) p "Importing #{dump.posts(only_published).count} posts ..."
Refinery::WordPress::Post.create_blog_page_if_necessary if only_published
p "(only published posts)"
else
p "(export ONLY_PUBLISHED=true to import only published posts)"
end
ENV["MODEL"] = 'BlogPost' dump.posts(only_published).each{|p| p.to_rails(params[:blog_slug]) }
Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL")
end end
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump" desc "reset blog tables and then import blog data from a WordPressImport XML dump"
task :reset_and_import_blog, :file_name do |task, params| task :reset_and_import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke Rake::Task["wordpress:reset_blog"].invoke
Rake::Task["wordpress:import_blog"].invoke(params[:file_name]) Rake::Task["wordpress:import_blog"].invoke(params[:file_name], params[:blog_slug])
end end
desc "Reset the cms relevant tables for a clean import" desc "download images in posts to public folder"
task :reset_pages do task :download_post_images, :host_match do |task, params|
raise "Error: you must specify a host to match for this download (i.e. rake wordpress:download_post_images['mywebsite']" if params[:host_match].blank?
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
%w(page_part_translations page_translations page_parts pages).each do |table_name| # scrape images
p "Truncating #{table_name} ..." @posts = ::Post.all
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" @posts.each do |post|
doc = Nokogiri::HTML(post.body)
doc.css("img").each do |img|
# find remote file path
remote_file = img.attributes["src"].text
# load uri
begin
remote_uri = URI(remote_file)
rescue => error
puts "Error parsing URL #{remote_file}: #{error.message}"
end
# only download if the image is a LFA-hosted image
if remote_uri && remote_uri.host.match(params[:host_match]) != nil
# find a local path for it
local_file = File.expand_path(File.join(Rails.public_path,remote_uri.path))
# only download if not already there or if it's zero bytes
unless File.size?(local_file)
# create local folders if necessary
dirname = File.dirname(local_file)
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end
# save remote file to local
begin
remote_file_io = open(remote_file)
File.open(local_file,'wb'){ |f| f.write(remote_file_io.read) }
puts "Saved file: #{local_file}"
rescue OpenURI::HTTPError => error
puts "Error saving file #{remote_file}: #{error.message}"
end
end
end
end
end end
puts "Finished downloding images from #{@posts.count} posts"
end end
desc "import cms data from a WordPress XML dump" # desc "Reset the cms relevant tables for a clean import"
task :import_pages, :file_name do |task, params| # task :reset_pages do
Rake::Task["environment"].invoke # Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false # %w(page_part_translations page_translations page_parts pages).each do |table_name|
dump.pages(only_published).each(&:to_refinery) # p "Truncating #{table_name} ..."
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
# end
# end
# After all pages are persisted we can now create the parent - child # desc "import cms data from a WordPress XML dump"
# relationships. This is necessary, as WordPress doesn't dump the pages in # task :import_pages, :file_name do |task, params|
# a correct order. # Rake::Task["environment"].invoke
dump.pages(only_published).each do |dump_page| # dump = WordPressImport::Dump.new(params[:file_name])
page = ::Page.find(dump_page.post_id)
page.parent_id = dump_page.parent_id
page.save!
end
Refinery::WordPress::Post.create_blog_page_if_necessary # only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
# dump.pages(only_published).each(&:to_rails)
ENV["MODEL"] = 'Page' # # After all pages are persisted we can now create the parent - child
Rake::Task["friendly_id:redo_slugs"].invoke # # relationships. This is necessary, as WordPress doesn't dump the pages in
ENV.delete("MODEL") # # a correct order.
end # dump.pages(only_published).each do |dump_page|
# page = ::Page.find(dump_page.post_id)
# page.parent_id = dump_page.parent_id
# page.save!
# end
desc "reset cms tables and then import cms data from a WordPress XML dump" # WordPressImport::Post.create_blog_page_if_necessary
task :reset_and_import_pages, :file_name do |task, params|
Rake::Task["environment"].invoke # ENV["MODEL"] = 'Page'
Rake::Task["wordpress:reset_pages"].invoke # Rake::Task["friendly_id:redo_slugs"].invoke
Rake::Task["wordpress:import_pages"].invoke(params[:file_name]) # ENV.delete("MODEL")
end # end
# desc "reset cms tables and then import cms data from a WordPress XML dump"
# task :reset_and_import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# Rake::Task["wordpress:reset_pages"].invoke
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
# end
desc "Reset the media relevant tables for a clean import" desc "Reset the media relevant tables for a clean import"
task :reset_media do task :reset_media do
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
%w(images resources).each do |table_name| %w(rich_rich_files).each do |table_name|
p "Truncating #{table_name} ..." p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}" ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end end
@ -93,14 +144,27 @@ namespace :wordpress do
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts" desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params| task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = WordPressImport::Dump.new(params[:file_name])
attachments = dump.attachments.each(&:to_refinery) p "Importing #{dump.attachments.each_slice(200).first.count} attachments ..."
attachments = dump.attachments.each_slice(200).first.each(&:to_rails)
unless $ATTACHMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED IMPORTING ATTACHMENTS:"
$ATTACHMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
# parse all created BlogPost and Page bodys and replace the old wordpress media uls # parse all created Post and Page bodys and replace the old wordpress media urls
# with the newly created ones # with the newly created ones
attachments.each do |attachment| p "Replacing attachment URLs found in posts/pages ..."
attachment.replace_url attachments.each(&:replace_url)
unless $REPLACEMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED REPLACING ATTACHMENTS:"
$REPLACEMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end end
end end
@ -112,10 +176,12 @@ namespace :wordpress do
end end
desc "reset and import all data (see the other tasks)" desc "reset and import all data (see the other tasks)"
task :full_import, :file_name do |task, params| task :full_import, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name]) Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name],params[:blog_slug])
Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name]) #Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name]) #Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end end
end end

5
lib/wordpress-import.rb Normal file
View File

@ -0,0 +1,5 @@
module WordPressImport
end
require 'wordpress'

View File

@ -1,16 +1,15 @@
require 'nokogiri' require 'nokogiri'
require "wordpress/railtie" require "wordpress/railtie"
require 'shortcode'
module Refinery module WordPressImport
module WordPress autoload :Author, 'wordpress/author'
autoload :Author, 'wordpress/author' autoload :Tag, 'wordpress/tag'
autoload :Tag, 'wordpress/tag' autoload :Category, 'wordpress/category'
autoload :Category, 'wordpress/category' autoload :Page, 'wordpress/page'
autoload :Page, 'wordpress/page' autoload :Post, 'wordpress/post'
autoload :Post, 'wordpress/post' autoload :Comment, 'wordpress/comment'
autoload :Comment, 'wordpress/comment' autoload :Dump, 'wordpress/dump'
autoload :Dump, 'wordpress/dump' autoload :Attachment, 'wordpress/attachment'
autoload :Attachment, 'wordpress/attachment'
end
end end

View File

@ -1,130 +1,179 @@
module Refinery module WordPressImport
module WordPress class Attachment
class Attachment attr_reader :node
attr_reader :node attr_reader :paperclip_image
attr_reader :refinery_image attr_reader :paperclip_file
attr_reader :refinery_resource
def initialize(node) def initialize(node)
@node = node @node = node
end end
def title def title
node.xpath("title").text node.xpath("title").text
end end
def description def description
node.xpath("description").text node.xpath("description").text
end end
def file_name def file_name
url.split('/').last url.split('/').last
end end
def post_date def post_date
DateTime.parse node.xpath("wp:post_date").text DateTime.parse node.xpath("wp:post_date").text
end end
def url def url
node.xpath("wp:attachment_url").text node.xpath("wp:attachment_url").text
end end
def url_pattern def url_pattern
url_parts = url.split('.') url_parts = url.split('.')
extension = url_parts.pop extension = url_parts.pop
url_without_extension = url_parts.join('.') url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/ /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end end
def image? def image?
url.match /\.(png|jpg|jpeg|gif)$/ url.match /\.(png|jpg|jpeg|gif)$/
end end
def to_refinery def to_rails
begin
if image? if image?
to_image to_image
else else
to_resource to_file
end end
rescue StandardError => ex
message = "ERROR saving attachment #{url} -- #{ex.message}"
p message
$ATTACHMENT_EXCEPTIONS = [] if $ATTACHMENT_EXCEPTIONS.blank?
$ATTACHMENT_EXCEPTIONS << message
return nil
end end
end
def replace_url def replace_url
begin
@occurrance_count = 0
if image? if image?
replace_image_url replace_image_url
else else
replace_resource_url replace_resource_url
end end
p "Replaced #{@occurrance_count} occurrances of #{url}"
rescue StandardError => ex
message = "ERROR replacing URL #{url} -- #{ex.message}"
p message
$REPLACEMENT_EXCEPTIONS = [] if $REPLACEMENT_EXCEPTIONS.blank?
$REPLACEMENT_EXCEPTIONS << message
return nil
end end
end
private private
def to_image def rich_file_clean_file_name(full_file_name)
image = ::Image.new extension = File.extname(full_file_name).gsub(/^\.+/, '')
filename = full_file_name.gsub(/\.#{extension}$/, '')
filename = CGI::unescape(filename)
filename = CGI::unescape(filename)
extension = extension.downcase
filename = filename.downcase.gsub(/[^a-z0-9]+/i, '-')
"#{filename}.#{extension}"
end
def to_image
# avoid duplicates; use our storage system's filename cleaner for lookup
image = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if image.rich_file.instance.id.blank?
p "Importing image #{file_name}"
image.simplified_type = "image"
image.created_at = post_date image.created_at = post_date
image.image_url = url image.rich_file = URI.parse(url)
image.save! image.save!
else
@refinery_image = image p "image #{file_name} already exists..."
image
end end
def to_resource @paperclip_image = image
resource = ::Resource.new image
resource.created_at = post_date end
resource.file_url = url
resource.save!
@refinery_resource = resource def to_file
resource # avoid duplicates; use our storage system's filename cleaner for lookup
file = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if file.rich_file.instance.id.blank?
p "Importing file #{file_name}"
file.created_at = post_date
file.rich_file = URI.parse(url) if file.rich_file.blank?
file.save!
else
p "file #{file_name} already exists..."
end end
def replace_image_url @paperclip_file = file
replace_image_url_in_blog_posts file
replace_image_url_in_pages end
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts def replace_image_url
replace_url_in_blog_posts(refinery_image.image.url) replace_image_url_in_blog_posts
end replace_image_url_in_pages
end
def replace_image_url_in_pages def replace_resource_url
replace_url_in_pages(refinery_image.image.url) replace_resource_url_in_blog_posts
end replace_resource_url_in_pages
end
def replace_resource_url_in_blog_posts def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url) replace_url_in_blog_posts(paperclip_image.rich_file.url)
end end
def replace_resource_url_in_pages def replace_image_url_in_pages
replace_url_in_pages(refinery_resource.file.url) replace_url_in_pages(paperclip_image.rich_file.url)
end end
def replace_url_in_blog_posts(new_url) def replace_resource_url_in_blog_posts
::BlogPost.all.each do |post| replace_url_in_blog_posts(paperclip_file.rich_file.url)
if (! post.body.empty?) && post.body.include?(url) end
post.body = post.body.gsub(url_pattern, new_url)
post.save! def replace_resource_url_in_pages
end replace_url_in_pages(paperclip_file.rich_file.url)
end
def replace_url_in_blog_posts(new_url)
::Post.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
@occurrance_count += 1
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end end
end end
end
def replace_url_in_pages(new_url) def replace_url_in_pages(new_url)
::Page.all.each do |page| ::Page.all.each do |page|
page.parts.each do |part| page.translations.each do |translation|
if (! part.body.to_s.blank?) && part.body.include?(url) translation.parts.each do |part|
part.body = part.body.gsub(url_pattern, new_url) if (! part.content.to_s.blank?) && part.content.include?(url)
@occurrance_count += 1
part.content = part.content.gsub(url_pattern, new_url)
part.save! part.save!
end end
end end
end end
end end
end end
end end
end end

View File

@ -1,37 +1,47 @@
module Refinery module WordPressImport
module WordPress class Author
class Author attr_reader :author_node
attr_reader :author_node
def initialize(author_node) def initialize(author_node)
@author_node = author_node @author_node = author_node
end end
def login def name
author_node.xpath("wp:author_login").text name = author_node.xpath("wp:author_display_name").text
end name = author_node.xpath("wp:author_first_name").text + " " + author_node.xpath("wp:author_first_name").text if name.blank?
name
end
def email def login
author_node.xpath("wp:author_email").text author_node.xpath("wp:author_login").text
end end
def ==(other) def email
login == other.login author_node.xpath("wp:author_email").text
end end
def inspect def ==(other)
"WordPress::Author: #{login} <#{email}>" login == other.login
end end
def to_refinery def inspect
user = User.find_or_initialize_by_username_and_email(login, email) "WordPress::Author: #{login} <#{email}>"
unless user.persisted? end
user.password = 'password'
user.password_confirmation = 'password' def to_rails
user.save user = ::User.find_or_initialize_by_email(email)
end user.wp_username = login
user
unless user.persisted?
user.name = name
user.password = 'password'
user.password_confirmation = 'password'
end end
user.save
puts "User #{login} imported."
user
end end
end end
end end

View File

@ -1,19 +1,17 @@
module Refinery module WordPressImport
module WordPress class Category
class Category attr_accessor :name
attr_accessor :name
def initialize(text) def initialize(text)
@name = text @name = text
end end
def ==(other) def ==(other)
name == other.name name == other.name
end end
def to_refinery def to_rails
BlogCategory.find_or_create_by_title(name) Tag.find_or_create_by_title(name)
end
end end
end end
end end

View File

@ -1,48 +1,46 @@
module Refinery module WordPressImport
module WordPress class Comment
class Comment attr_reader :node
attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end end
def author def author
node.xpath('wp:comment_author').text node.xpath('wp:comment_author').text
end end
def email def email
node.xpath('wp:comment_author_email').text node.xpath('wp:comment_author_email').text
end end
def url def url
node.xpath('wp:comment_author_url').text node.xpath('wp:comment_author_url').text
end end
def date def date
DateTime.parse node.xpath("wp:comment_date").text DateTime.parse node.xpath("wp:comment_date").text
end end
def content def content
node.xpath('wp:comment_content').text node.xpath('wp:comment_content').text
end end
def approved? def approved?
node.xpath('wp:comment_approved').text.to_i == 1 node.xpath('wp:comment_approved').text.to_i == 1
end end
def ==(other) def ==(other)
(email == other.email) && (date == other.date) && (content == other.content) (email == other.email) && (date == other.date) && (content == other.content)
end end
def to_refinery def to_refinery
comment = BlogComment.new :name => author, :email => email comment = BlogComment.new :name => author, :email => email
comment.body = content comment.body = content
comment.created_at = date comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected' comment.state = approved? ? 'approved' : 'rejected'
comment comment
end
end end
end end
end end

View File

@ -1,57 +1,62 @@
module Refinery module WordPressImport
module WordPress class Dump
class Dump attr_reader :doc
attr_reader :doc
def initialize(file_name) def initialize(file_name)
file_name = File.absolute_path(file_name) begin
file_name = File.expand_path(file_name)
raise "Given file '#{file_name}' no file or not readable." \ raise "error" unless File.file?(file_name) && File.readable?(file_name)
unless File.file?(file_name) && File.readable?(file_name) rescue
raise "Given file '#{file_name}' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file']"
file = File.open(file_name)
@doc = Nokogiri::XML(file)
end end
def authors file = File.open(file_name)
doc.xpath("//wp:author").collect do |author|
Author.new(author) if file.size >= 10485760 # 10MB
end puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results."
end end
def pages(only_published=false) @doc = Nokogiri::XML(file.read().gsub("\u0004", "")) # get rid of all EOT characters
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page| end
Page.new(page)
end
pages = pages.select(&:published?) if only_published def authors
pages doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end end
def posts(only_published=false) pages = pages.select(&:published?) if only_published
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post| pages
Post.new(post) end
end
posts = posts.select(&:published?) if only_published
posts
end
def tags def posts(only_published=false)
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Tag.new(tag.text) Post.new(post)
end
end end
posts = posts.select(&:published?) if only_published
posts
end
def categories def tags
doc.xpath("//wp:category/wp:cat_name").collect do |category| doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Category.new(category.text) Tag.new(tag.text)
end
end end
end
def attachments def categories
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment| doc.xpath("//wp:category/wp:cat_name").collect do |category|
Attachment.new(attachment) Category.new(category.text)
end end
end
def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment)
end end
end end
end end

View File

@ -1,107 +1,144 @@
module Refinery module WordPressImport
module WordPress class Page
class Page include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TagHelper include ::ActionView::Helpers::TextHelper
include ::ActionView::Helpers::TextHelper
attr_reader :node attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def link
node.xpath("link").text
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_shortcodes(format_syntax_highlighter(format_paragraphs(content)))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end end
def inspect formatted
"WordPress::Page(#{post_id}): #{title}" end
def creator
node.xpath("dc:creator").text
end
def post_date
Time.parse node.xpath("wp:post_date").text
end
def publish_date
Time.parse node.xpath("pubDate").text
end
def post_name
node.xpath("wp:post_name").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
#NEED:
# creator -> "user_id"
# wp:post_name -> "slug"
# pubDate -> "published_at"
#OK:
# title -> "title"
# content:encoded -> "body"
# wp:post_date_gmt -> "created_at"
def to_rails
# :user_id => creator
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :slug => post_name,
:published_at => publish_date, :body => content_formatted)
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\n\n+/, "</p>#{start_tag}") # 2+ newline -> paragraph
text.gsub!(/\r?\n/, "<br/>\n") # \r\n and \n -> line break (must be after the paragraph detection to avoid <br/><br/>)
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
# Replace Wordpress shortcodes with formatted HTML (see shortcode gem and support/templates folder)
def format_shortcodes(text)
Shortcode.setup do |config|
# the template parser to use
config.template_parser = :haml # :erb or :haml supported, :haml is default
# location of the template files
config.template_path = ::File.join(::File.dirname(__FILE__), "..", "..","support/templates/haml")
# a list of block tags to support e.g. [quote]Hello World[/quote]
config.block_tags = [:caption, :column]
# a list of self closing tags to support e.g. [youtube id="12345"]
config.self_closing_tags = [:end_columns, "google-map-v3"]
end end
def title Shortcode.process(text)
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
formatted
end
def creator
node.xpath("dc:creator").text
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
def to_refinery
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :draft => draft?)
page.parts.create(:title => 'Body', :body => content_formatted)
page
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
end end
end end
end end

View File

@ -1,85 +1,67 @@
module Refinery module WordPressImport
module WordPress class Post < Page
class Post < Page def tags
def tags # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0 path = if node.xpath("category[@domain='post_tag']").count > 0
path = if node.xpath("category[@domain='post_tag']").count > 0 "category[@domain='post_tag']"
"category[@domain='post_tag']" else
else "category[@domain='tag']"
"category[@domain='tag']"
end
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
end end
def tag_list node.xpath(path).collect do |tag_node|
tags.collect(&:name).join(',') Tag.new(tag_node.text)
end end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:author => user, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end end
def tag_list
tags.collect(&:name).join(',')
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
# blog_slug is used to identify which blog this import is from
def to_rails(blog_slug)
user = ::User.find_by_wp_username(creator)
if user.nil?
raise "User with wp_username #{creator} not found"
end
post = ::Post.create({
:wp_post_id => post_id, :slug => post_name,
:user_id => user.id, :title => title,
:created_at => post_date,
:published_at => publish_date,
:wp_link => link,
:wp_blog => blog_slug,
:translations_attributes => { "0" => {
:locale => "en",
:title => title,
:body => content_formatted,
# merge the translation's category list with the wordpress post's
:category_list => categories.collect(&:name) | tags.collect(&:name)
}}
})
if post.errors.blank?
puts "Post #{post_name} imported."
return post.reload
else
puts post.inspect
raise post.errors.full_messages.to_s
end
end
end end
end end

View File

@ -1,9 +1,7 @@
module Refinery module WordPressImport
module WordPress class Railtie < Rails::Railtie
class Railtie < Rails::Railtie rake_tasks do
rake_tasks do load "tasks/wordpress.rake"
load "tasks/wordpress.rake"
end
end end
end end
end end

View File

@ -1,20 +1,18 @@
module Refinery module WordPressImport
module WordPress class Tag
class Tag attr_accessor :name
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
def initialize(text)
@name = text
end end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
end end
end end

View File

@ -1,23 +0,0 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "refinerycms-wordpress-import"
s.summary = "Import WordPress XML dumps into refinerycms(-blog)."
s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.3.0"
s.date = "2011-06-13"
s.authors = ['Marc Remolt']
s.email = 'marc.remolt@googlemail.com'
s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'refinerycms', '~> 1.0.0'
s.add_dependency 'refinerycms-blog', '~> 1.5.2'
s.add_dependency 'nokogiri', '~> 1.4.4'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end

View File

@ -1,6 +1,3 @@
require 'yaml'
YAML::ENGINE.yamler= 'syck'
require 'rubygems' require 'rubygems'
gemfile = File.expand_path('../../../../Gemfile', __FILE__) gemfile = File.expand_path('../../../../Gemfile', __FILE__)

View File

@ -24,4 +24,6 @@ Dummy::Application.configure do
config.action_dispatch.best_standards_support = :builtin config.action_dispatch.best_standards_support = :builtin
end end
Refinery.rescue_not_found = false Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -47,4 +47,6 @@ Dummy::Application.configure do
# Send deprecation notices to registered listeners # Send deprecation notices to registered listeners
config.active_support.deprecation = :notify config.active_support.deprecation = :notify
end end
Refinery.rescue_not_found = true Refinery.rescue_not_found = true# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -33,4 +33,6 @@ Dummy::Application.configure do
# Print deprecation notices to the stderr # Print deprecation notices to the stderr
config.active_support.deprecation = :stderr config.active_support.deprecation = :stderr
end end
Refinery.rescue_not_found = false Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -0,0 +1,11 @@
class AddCachedSlugs < ActiveRecord::Migration
def self.up
add_column :blog_categories, :cached_slug, :string
add_column :blog_posts, :cached_slug, :string
end
def self.down
remove_column :blog_categories, :cached_slug
remove_column :blog_posts, :cached_slug
end
end

View File

@ -0,0 +1,9 @@
class AddCustomUrlFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_url, :string
end
def self.down
remove_column :blog_posts, :custom_url
end
end

View File

@ -0,0 +1,10 @@
class AddCustomTeaserFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_teaser, :text
end
def self.down
remove_column :blog_posts, :custom_teaser
end
end

View File

@ -10,12 +10,13 @@
# #
# It's strongly recommended to check this file into your version control system. # It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110602094445) do ActiveRecord::Schema.define(:version => 20110812162204) do
create_table "blog_categories", :force => true do |t| create_table "blog_categories", :force => true do |t|
t.string "title" t.string "title"
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.string "cached_slug"
end end
add_index "blog_categories", ["id"], :name => "index_blog_categories_on_id" add_index "blog_categories", ["id"], :name => "index_blog_categories_on_id"
@ -48,6 +49,9 @@ ActiveRecord::Schema.define(:version => 20110602094445) do
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.integer "user_id" t.integer "user_id"
t.string "cached_slug"
t.string "custom_url"
t.text "custom_teaser"
end end
add_index "blog_posts", ["id"], :name => "index_blog_posts_on_id" add_index "blog_posts", ["id"], :name => "index_blog_posts_on_id"

View File

@ -1,3 +1,12 @@
::Page.reset_column_information
# Check whether all columns are applied yet by seo_meta.
unless !defined?(::SeoMeta) || ::SeoMeta.attributes.keys.all? { |k|
::Page.translation_class.instance_methods.include?(k)
}
# Make pages model seo_meta because not all columns are accessible.
::Page.translation_class.send :is_seo_meta
end
page_position = -1 page_position = -1
home_page = Page.create(:title => "Home", home_page = Page.create(:title => "Home",

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Attachment, :type => :model do describe WordPressImport::Attachment, :type => :model do
context "an image attchment" do context "an image attchment" do
let(:attachment) { test_dump.attachments.first } let(:attachment) { test_dump.attachments.first }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Author, :type => :model do describe WordPressImport::Author, :type => :model do
let(:author) { test_dump.authors.first } let(:author) { test_dump.authors.first }
specify { author.login.should == 'admin' } specify { author.login.should == 'admin' }

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Category, :type => :model do describe WordPressImport::Category, :type => :model do
let(:category) { Refinery::WordPress::Category.new('Rant') } let(:category) { WordPressImport::Category.new('Rant') }
describe "#name" do describe "#name" do
specify { category.name.should == 'Rant' } specify { category.name.should == 'Rant' }
end end
describe "#==" do describe "#==" do
specify { category.should == Refinery::WordPress::Category.new('Rant') } specify { category.should == WordPressImport::Category.new('Rant') }
specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') } specify { category.should_not == WordPressImport::Category.new('Tutorials') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -1,10 +1,10 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Dump, :type => :model do describe WordPressImport::Dump, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
it "should create a Dump object given a xml file" do it "should create a Dump object given a xml file" do
dump.should be_a Refinery::WordPress::Dump dump.should be_a WordPressImport::Dump
end end
it "should include a Nokogiri::XML object" do it "should include a Nokogiri::XML object" do
@ -13,12 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#tags" do describe "#tags" do
let(:tags) do let(:tags) do
[ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'), [ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'),
Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')] WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')]
end end
specify { dump.tags.count == 4 } specify { dump.tags.count == 4 }
specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) } specify { dump.tags.first.should be_a(WordPressImport::Tag) }
it "should return all included tags" do it "should return all included tags" do
tags.each do |tag| tags.each do |tag|
@ -29,12 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#categories" do describe "#categories" do
let(:categories) do let(:categories) do
[ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'), [ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'),
Refinery::WordPress::Category.new('Uncategorized') ] WordPressImport::Category.new('Uncategorized') ]
end end
specify { dump.categories.count == 4 } specify { dump.categories.count == 4 }
specify { dump.categories.first.should be_a(Refinery::WordPress::Category) } specify { dump.categories.first.should be_a(WordPressImport::Category) }
it "should return all included categories" do it "should return all included categories" do
categories.each do |cat| categories.each do |cat|
@ -48,7 +48,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.pages.should have(3).pages dump.pages.should have(3).pages
end end
specify { dump.pages.first.should be_a(Refinery::WordPress::Page) } specify { dump.pages.first.should be_a(WordPressImport::Page) }
it "should return only published pages with only_published=true" do it "should return only published pages with only_published=true" do
dump.pages(true).should have(2).pages dump.pages(true).should have(2).pages
@ -60,7 +60,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.authors.should have(1).author dump.authors.should have(1).author
end end
specify { dump.authors.first.should be_a(Refinery::WordPress::Author) } specify { dump.authors.first.should be_a(WordPressImport::Author) }
end end
describe "#posts" do describe "#posts" do
@ -68,7 +68,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.posts.should have(3).posts dump.posts.should have(3).posts
end end
specify { dump.posts.first.should be_a(Refinery::WordPress::Post) } specify { dump.posts.first.should be_a(WordPressImport::Post) }
it "should return only published posts with only_published=true" do it "should return only published posts with only_published=true" do
dump.posts(true).should have(2).posts dump.posts(true).should have(2).posts
@ -80,6 +80,6 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.attachments.should have(2).attachments dump.attachments.should have(2).attachments
end end
specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) } specify { dump.attachments.first.should be_a(WordPressImport::Attachment) }
end end
end end

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Page, :type => :model do describe WordPressImport::Page, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
let(:page) { test_dump.pages.last } let(:page) { test_dump.pages.last }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Post, :type => :model do describe WordPressImport::Post, :type => :model do
let(:post) { test_dump.posts.last } let(:post) { test_dump.posts.last }
specify { post.title.should == 'Third blog post' } specify { post.title.should == 'Third blog post' }
@ -17,15 +17,15 @@ describe Refinery::WordPress::Post, :type => :model do
describe "#categories" do describe "#categories" do
specify { post.categories.should have(1).category } specify { post.categories.should have(1).category }
specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') } specify { post.categories.first.should == WordPressImport::Category.new('Rant') }
end end
describe "#tags" do describe "#tags" do
specify { post.tags.should have(3).tags } specify { post.tags.should have(3).tags }
specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) } specify { post.tags.should include(WordPressImport::Tag.new('css')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) } specify { post.tags.should include(WordPressImport::Tag.new('html')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) } specify { post.tags.should include(WordPressImport::Tag.new('php')) }
end end
specify { post.tag_list.should == 'css,html,php' } specify { post.tag_list.should == 'css,html,php' }
@ -63,6 +63,7 @@ describe Refinery::WordPress::Post, :type => :model do
@comment.body.should == comment.content @comment.body.should == comment.content
@comment.state.should == 'approved' @comment.state.should == 'approved'
@comment.created_at.should == comment.date @comment.created_at.should == comment.date
@comment.created_at.should == comment.date
end end
end end
end end
@ -79,16 +80,13 @@ describe Refinery::WordPress::Post, :type => :model do
@post = post.to_refinery @post = post.to_refinery
end end
specify { BlogPost.should have(1).record } specify { BlogPost.should have(1).record }
it "should copy the attributes from Refinery::WordPress::Post" do specify { @post.title.should == post.title }
@post.title.should == post.title specify { @post.body.should == post.content_formatted }
@post.body.should == post.content_formatted specify { @post.draft.should == post.draft? }
@post.draft.should == post.draft? specify { @post.published_at.should == post.post_date }
@post.published_at.should == post.post_date specify { @post.author.username.should == post.creator }
@post.created_at.should == post.post_date
@post.author.username.should == post.creator
end
it "should assign a category for each Refinery::WordPress::Category" do it "should assign a category for each Refinery::WordPress::Category" do
@post.categories.should have(post.categories.count).records @post.categories.should have(post.categories.count).records

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Tag, :type => :model do describe WordPressImport::Tag, :type => :model do
let(:tag) { Refinery::WordPress::Tag.new('ruby') } let(:tag) { WordPressImport::Tag.new('ruby') }
describe "#name" do describe "#name" do
specify { tag.name.should == 'ruby' } specify { tag.name.should == 'ruby' }
end end
describe "#==" do describe "#==" do
specify { tag.should == Refinery::WordPress::Tag.new('ruby') } specify { tag.should == WordPressImport::Tag.new('ruby') }
specify { tag.should_not == Refinery::WordPress::Tag.new('php') } specify { tag.should_not == WordPressImport::Tag.new('php') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -1,7 +0,0 @@
require 'spec_helper'
describe Refinery::WordPress do
it "should be valid" do
Refinery::WordPress.should be_a(Module)
end
end

View File

@ -1,11 +1,11 @@
module Refinery::WordPress::SpecHelpers module WordPressImport::SpecHelpers
def test_dump def test_dump
file_name = File.realpath(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml')) file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
Refinery::WordPress::Dump.new(file_name) WordPressImport::Dump.new(file_name)
end end
end end
RSpec.configure do |config| RSpec.configure do |config|
config.include Refinery::WordPress::SpecHelpers config.include WordPressImport::SpecHelpers
end end

View File

@ -0,0 +1,7 @@
require 'spec_helper'
describe WordPressImport do
it "should be valid" do
WordPressImport.should be_a(Module)
end
end

View File

@ -0,0 +1,24 @@
-# disabling style_hash for now; setting the width = width+10 doesn't seem to actually be a good thing
-# style_hash = {:style => "width: #{@attributes[:width].to_i+10}px"} unless @attributes[:width].blank?
- figure_hash = {:class => @attributes[:align], :id => @attributes[:id]}
-# figure_hash = figure_hash.merge(style_hash) if style_hash
- content_image = Nokogiri::HTML(@content).css("img").to_html
- content_caption = Nokogiri::HTML(@content).text + " #{@attributes[:caption]}"
- @content = "#{content_image} <figcaption>#{content_caption}</figcaption>" unless content_image.blank?
%figure{figure_hash}= @content
-# sample wordpress-y css to go along with this html:
figure {
background: #f1f1f1;
margin-bottom: 20px;
padding: 4px;
text-align: center;
}
figure img {
margin: 5px 5px 0;
}
figure figcaption {
color: #777;
font-size: 12px;
margin: 5px 5px 24px;
}

View File

@ -0,0 +1,4 @@
.post_column_1{:style =>"width:#{@attributes[:width]}; float: left; padding: #{@attributes[:padding]} 6% #{@attributes[:padding]} #{@attributes[:padding]}; display: inline;"}= @content
-#[column width="47%" padding="0"] foo [/column]
-# <div class="post_column_1">

View File

@ -0,0 +1 @@
<div style="clear: both;"></div>

View File

@ -0,0 +1,37 @@
- addmarkerlist = @attributes[:addmarkerlist].to_s || ""
- name = addmarkerlist.split("{}").last
- uri_name = URI::encode(name)
- uri_address = URI::encode(addmarkerlist.split("{}").first)
-# google will open up the business page if we pass it a name
- uri_address = "#{uri_name},%20#{uri_address}" unless (uri_name == uri_address || name.blank?)
- gmaps_url = "https://maps.google.com/maps/?q=#{uri_address}&amp;ie=UTF8&amp;t=m"
<iframe src="#{gmaps_url}&amp;output=embed" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" width="#{@attributes[:width]}" height="#{@attributes[:height]}"></iframe>
<small>View <a href="#{gmaps_url}">#{name}</a> in a larger map</small>
-# [google-map-v3 width="425"
height="350"
zoom="12"
maptype="roadmap"
mapalign="left"
directionhint="false"
language="default"
poweredby="false"
maptypecontrol="true"
pancontrol="true"
zoomcontrol="true"
scalecontrol="true"
streetviewcontrol="true"
scrollwheelcontrol="false"
draggable="true"
tiltfourtyfive="false"
addmarkermashupbubble="false"
addmarkermashupbubble="false"
addmarkerlist="#1 Dream Manor Dr Globe, AZ{}1-default.png{}Dream Manor Inn"
bubbleautopan="true"
showbike="false"
showtraffic="false"
showpanoramio="false"]

22
wordpress-import.gemspec Normal file
View File

@ -0,0 +1,22 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "wordpress-import"
s.summary = "Import WordPress XML dumps into your Ruby on Rails app."
s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.4"
s.date = "2014-03-17"
s.authors = ['Will Bradley']
s.email = 'bradley.will@gmail.com'
s.homepage = 'https://github.com/zyphlar/wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'nokogiri', '~> 1.6.0'
s.add_dependency 'shortcode', '~> 0.1.1'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end