Compare commits

...

38 Commits

Author SHA1 Message Date
05bf79536d Updating shortcode gem version 2014-03-18 14:09:38 -07:00
249de047b2 Updating gem version 2014-03-17 23:28:50 -07:00
9aa403612e Adding wordpress shortcode support 2014-03-17 23:27:02 -07:00
201233f89f Filter EOT characters from import 2014-03-12 18:59:31 -07:00
ede3baae64 Version 2014-03-12 15:57:41 -07:00
6382ceb7dc Minor filename tweak 2014-03-12 15:57:20 -07:00
10df4f5494 Tweaking error handling and reporting 2014-03-12 15:14:14 -07:00
3807af7ff3 Finishing rake task for image scraping/download 2014-03-12 12:56:25 -07:00
84ba9eac1c Version increment 2014-03-12 11:12:30 -07:00
0031b7046f Adding download post images task 2014-03-12 11:11:07 -07:00
5d96f3a336 Updating with more rake tweaks and some post customizations 2014-03-11 17:57:28 -07:00
39914c83c9 Adding categories/tags 2014-03-11 15:17:46 -07:00
5f564bdbff Starting categories 2014-03-11 12:03:51 -07:00
ce3e947b83 Todo update 2014-03-06 18:23:22 -07:00
c06bc8142d Fixing occurrance count and error reporting in attachments 2014-03-06 18:20:09 -07:00
4b36eaeccf Not sure what the diff is here, but it seems to work now 2014-03-06 17:37:38 -07:00
d4b2457787 Changing attachment to use paperclip 2014-03-06 17:25:39 -07:00
bd4cccd91a Refining post/author/dump behavior 2014-03-05 21:27:57 -07:00
60ce62ad1b Beginning to remove refinery stuff 2014-03-04 15:47:53 -07:00
Marc Remolt
1e1f3574eb Updated nokogiri dependency to 1.5.0 2011-08-12 19:29:05 +02:00
Marc Remolt
0beaa90a3d Trying to make the gem 1.8.7 compatible
* replaced File.realpath in testsuite with File.expand_path
* removed the old YAML hack
2011-08-12 19:15:46 +02:00
Marc Remolt
6188f58706 Trying to test under multiple ruby versions 2011-08-12 18:57:41 +02:00
Marc Remolt
b2700960d8 Updated to latest refinerycms(-blog)
* bundle update
* new migrations
* fixed specs for new behaviour
2011-08-12 18:44:00 +02:00
Marc Remolt
d3352df3a5 Added 1.9.2 as build base to travis config 2011-07-27 14:12:30 +02:00
Marc Remolt
e02aeefeb5 Removed RDoc rake tasks (not needed for now) 2011-07-27 14:07:54 +02:00
Marc Remolt
cdbb1d001a Merge branch 'master' of github.com:mremolt/refinerycms-wordpress-import 2011-07-27 13:56:25 +02:00
Marc Remolt
054e7f3b91 Bundle update 2011-07-27 13:55:52 +02:00
Marc Remolt
ec1162fc4b Merge pull request #4 from wikyd/ruby_1_8_7
Change to make compatible with Ruby 1.8.7
2011-06-21 00:24:05 -07:00
Kyle Wilkinson
368561cfc7 Use File.expand_path instead of File.absolute_path to work with Ruby 1.8.7 2011-06-20 22:53:26 -07:00
Marc Remolt
32bb8a528a Typo 2011-06-13 19:03:52 +02:00
Marc Remolt
309ec660b9 Preparing for release 0.3.0 2011-06-13 19:02:25 +02:00
Marc Remolt
90665484c7 Resource import working
* added rake tasks
2011-06-13 18:48:17 +02:00
Marc Remolt
a24ea686fa Image import working
* bundle update
* replacement of image URLs now considers wordpress thumbnails (like -150x150)
* added Guard for autotesting
2011-06-13 15:02:40 +02:00
Marc Remolt
7265d31f62 Some specs for file Attachment 2011-06-06 19:45:34 +02:00
Marc Remolt
2581895c7c Updated test dump and adapted specs
* We now have a second attachment, a simple text file
2011-06-06 19:39:27 +02:00
Marc Remolt
7550dfa164 First shot at attachment import
* only images for now
* only blog posts for now
* thumbnailed images are not yet replaced in blog posts body
2011-06-05 19:14:52 +02:00
Marc Remolt
9a2b5acef6 Spec improvements
* replaced it with specify on {...} blocks
2011-06-05 17:55:00 +02:00
Marc Remolt
772cfdc25b Started on attachment import
* created Attachment
* created Dump#attachments
* specs
* replaced require with autoload
2011-06-05 17:43:30 +02:00
51 changed files with 1403 additions and 687 deletions

3
.gitignore vendored
View File

@ -4,3 +4,6 @@ pkg/
spec/dummy/db/*.sqlite3
spec/dummy/log/*.log
spec/dummy/tmp/
*.un~
refinerycms-wordpress-import-*.gem
*.swp

4
.travis.yml Normal file
View File

@ -0,0 +1,4 @@
rvm:
- 1.9.2
- 1.8.7
- ruby-head

22
Gemfile
View File

@ -1,16 +1,26 @@
source "http://rubygems.org"
gem "rails", "3.0.7"
ruby '2.0.0'
gem "rails", "4.0.0"
#gem "capybara", ">= 1.0.0.beta1"
gem "sqlite3"
gem "rmagick"
gem 'shortcode', "0.1.2"
gem "rspec-rails", ">= 2.6.0"
gem "database_cleaner"
group :development, :test do
gem 'byebug'
gem "rspec-rails", ">= 2.6.0"
gem "database_cleaner"
gem 'guard-rspec'
gem 'ffi'
gem 'guard-bundler'
gem 'libnotify' if RUBY_PLATFORM =~ /linux/i
gem 'fakeweb'
end
# To use debugger (ruby-debug for Ruby 1.8.7+, ruby-debug19 for Ruby 1.9.2+)
# gem 'ruby-debug'
# gem 'ruby-debug19'
gem 'refinerycms'
gem 'refinerycms-blog'
gem 'refinerycms-wordpress-import', :path => './'
gem 'wordpress-import', :path => './'

View File

@ -1,180 +1,176 @@
PATH
remote: ./
specs:
refinerycms-wordpress-import (0.1.0)
wordpress-import (0.4.4)
bundler (~> 1.0)
nokogiri (~> 1.4.4)
refinerycms (~> 1.0.0)
refinerycms-blog (~> 1.5.2)
nokogiri (~> 1.6.0)
shortcode (~> 0.1.1)
GEM
remote: http://rubygems.org/
specs:
abstract (1.0.0)
actionmailer (3.0.7)
actionpack (= 3.0.7)
mail (~> 2.2.15)
actionpack (3.0.7)
activemodel (= 3.0.7)
activesupport (= 3.0.7)
builder (~> 2.1.2)
erubis (~> 2.6.6)
i18n (~> 0.5.0)
rack (~> 1.2.1)
rack-mount (~> 0.6.14)
rack-test (~> 0.5.7)
tzinfo (~> 0.3.23)
activemodel (3.0.7)
activesupport (= 3.0.7)
builder (~> 2.1.2)
i18n (~> 0.5.0)
activerecord (3.0.7)
activemodel (= 3.0.7)
activesupport (= 3.0.7)
arel (~> 2.0.2)
tzinfo (~> 0.3.23)
activeresource (3.0.7)
activemodel (= 3.0.7)
activesupport (= 3.0.7)
activesupport (3.0.7)
acts-as-taggable-on (2.0.6)
acts_as_indexed (0.7.2)
arel (2.0.10)
awesome_nested_set (2.0.0)
activerecord (>= 3.0.0)
babosa (0.3.4)
bcrypt-ruby (2.1.4)
builder (2.1.2)
database_cleaner (0.6.7)
devise (1.3.4)
bcrypt-ruby (~> 2.1.2)
orm_adapter (~> 0.0.3)
warden (~> 1.0.3)
diff-lcs (1.1.2)
dragonfly (0.9.3)
rack
erubis (2.6.6)
abstract (>= 1.0.0)
filters_spam (0.3)
friendly_id_globalize3 (3.2.1.3)
babosa (~> 0.3.0)
globalize3 (0.1.0)
activemodel (>= 3.0.0)
activerecord (>= 3.0.0)
i18n (0.5.0)
mail (2.2.19)
activesupport (>= 2.3.6)
i18n (>= 0.4.0)
actionmailer (4.0.0)
actionpack (= 4.0.0)
mail (~> 2.5.3)
actionpack (4.0.0)
activesupport (= 4.0.0)
builder (~> 3.1.0)
erubis (~> 2.7.0)
rack (~> 1.5.2)
rack-test (~> 0.6.2)
activemodel (4.0.0)
activesupport (= 4.0.0)
builder (~> 3.1.0)
activerecord (4.0.0)
activemodel (= 4.0.0)
activerecord-deprecated_finders (~> 1.0.2)
activesupport (= 4.0.0)
arel (~> 4.0.0)
activerecord-deprecated_finders (1.0.3)
activesupport (4.0.0)
i18n (~> 0.6, >= 0.6.4)
minitest (~> 4.2)
multi_json (~> 1.3)
thread_safe (~> 0.1)
tzinfo (~> 0.3.37)
arel (4.0.2)
atomic (1.1.15)
blankslate (2.1.2.4)
builder (3.1.4)
byebug (2.7.0)
columnize (~> 0.3)
debugger-linecache (~> 1.2)
celluloid (0.15.2)
timers (~> 1.1.0)
celluloid-io (0.15.0)
celluloid (>= 0.15.0)
nio4r (>= 0.5.0)
coderay (1.1.0)
columnize (0.3.6)
database_cleaner (1.2.0)
debugger-linecache (1.2.0)
diff-lcs (1.2.5)
erubis (2.7.0)
fakeweb (1.3.0)
ffi (1.9.3)
formatador (0.2.4)
guard (2.5.1)
formatador (>= 0.2.4)
listen (~> 2.6)
lumberjack (~> 1.0)
pry (>= 0.9.12)
thor (>= 0.18.1)
guard-bundler (2.0.0)
bundler (~> 1.0)
guard (~> 2.2)
guard-rspec (4.2.8)
guard (~> 2.1)
rspec (>= 2.14, < 4.0)
haml (4.0.5)
tilt
hike (1.2.3)
i18n (0.6.9)
libnotify (0.8.2)
ffi (>= 1.0.11)
listen (2.7.0)
celluloid (>= 0.15.2)
celluloid-io (>= 0.15.0)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
lumberjack (1.0.4)
mail (2.5.4)
mime-types (~> 1.16)
treetop (~> 1.4.8)
mime-types (1.16)
nokogiri (1.4.4)
orm_adapter (0.0.5)
polyglot (0.3.1)
rack (1.2.3)
rack-cache (1.0.2)
rack (>= 0.4)
rack-mount (0.6.14)
rack (>= 1.0.0)
rack-test (0.5.7)
method_source (0.8.2)
mime-types (1.25.1)
mini_portile (0.5.2)
minitest (4.7.5)
multi_json (1.9.0)
nio4r (1.0.0)
nokogiri (1.6.1)
mini_portile (~> 0.5.0)
parslet (1.5.0)
blankslate (~> 2.0)
polyglot (0.3.4)
pry (0.9.12.6)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0)
rails (3.0.7)
actionmailer (= 3.0.7)
actionpack (= 3.0.7)
activerecord (= 3.0.7)
activeresource (= 3.0.7)
activesupport (= 3.0.7)
bundler (~> 1.0)
railties (= 3.0.7)
railties (3.0.7)
actionpack (= 3.0.7)
activesupport (= 3.0.7)
rails (4.0.0)
actionmailer (= 4.0.0)
actionpack (= 4.0.0)
activerecord (= 4.0.0)
activesupport (= 4.0.0)
bundler (>= 1.3.0, < 2.0)
railties (= 4.0.0)
sprockets-rails (~> 2.0.0)
railties (4.0.0)
actionpack (= 4.0.0)
activesupport (= 4.0.0)
rake (>= 0.8.7)
thor (~> 0.14.4)
rake (0.9.1)
refinerycms (1.0.0)
bundler (~> 1.0)
refinerycms-authentication (= 1.0.0)
refinerycms-base (= 1.0.0)
refinerycms-core (= 1.0.0)
refinerycms-dashboard (= 1.0.0)
refinerycms-images (= 1.0.0)
refinerycms-pages (= 1.0.0)
refinerycms-resources (= 1.0.0)
refinerycms-settings (= 1.0.0)
refinerycms-authentication (1.0.0)
devise (~> 1.3.0)
friendly_id_globalize3 (~> 3.2.1)
refinerycms-core (= 1.0.0)
refinerycms-base (1.0.0)
refinerycms-blog (1.5.2)
acts-as-taggable-on
filters_spam (~> 0.2)
refinerycms-core (~> 1.0.0)
seo_meta (~> 1.1.0)
refinerycms-core (1.0.0)
acts_as_indexed (~> 0.7)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
rails (~> 3.0.7)
refinerycms-base (= 1.0.0)
refinerycms-generators (~> 1.0)
refinerycms-settings (= 1.0.0)
truncate_html (~> 0.5)
will_paginate (~> 3.0.pre)
refinerycms-dashboard (1.0.0)
refinerycms-core (= 1.0.0)
refinerycms-generators (1.0.2)
refinerycms-images (1.0.0)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.0)
refinerycms-pages (1.0.0)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
refinerycms-core (= 1.0.0)
seo_meta (~> 1.1)
refinerycms-resources (1.0.0)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.0)
refinerycms-settings (1.0.0)
refinerycms-base (= 1.0.0)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.3)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.6.0)
rspec-rails (2.6.1)
actionpack (~> 3.0)
activesupport (~> 3.0)
railties (~> 3.0)
rspec (~> 2.6.0)
seo_meta (1.1.1)
refinerycms-generators (~> 1.0.1)
sqlite3 (1.3.3)
thor (0.14.6)
treetop (1.4.9)
thor (>= 0.18.1, < 2.0)
rake (10.1.1)
rb-fsevent (0.9.4)
rb-inotify (0.9.3)
ffi (>= 0.5.0)
rmagick (2.13.2)
rspec (2.14.1)
rspec-core (~> 2.14.0)
rspec-expectations (~> 2.14.0)
rspec-mocks (~> 2.14.0)
rspec-core (2.14.8)
rspec-expectations (2.14.5)
diff-lcs (>= 1.1.3, < 2.0)
rspec-mocks (2.14.6)
rspec-rails (2.14.1)
actionpack (>= 3.0)
activemodel (>= 3.0)
activesupport (>= 3.0)
railties (>= 3.0)
rspec-core (~> 2.14.0)
rspec-expectations (~> 2.14.0)
rspec-mocks (~> 2.14.0)
shortcode (0.1.2)
haml (~> 4.0)
parslet (= 1.5.0)
slop (3.5.0)
sprockets (2.12.0)
hike (~> 1.2)
multi_json (~> 1.0)
rack (~> 1.0)
tilt (~> 1.1, != 1.3.0)
sprockets-rails (2.0.1)
actionpack (>= 3.0)
activesupport (>= 3.0)
sprockets (~> 2.8)
sqlite3 (1.3.9)
thor (0.18.1)
thread_safe (0.2.0)
atomic (>= 1.1.7, < 2)
tilt (1.4.1)
timers (1.1.0)
treetop (1.4.15)
polyglot
polyglot (>= 0.3.1)
truncate_html (0.5.1)
tzinfo (0.3.27)
warden (1.0.4)
rack (>= 1.0)
will_paginate (3.0.pre2)
tzinfo (0.3.39)
PLATFORMS
ruby
DEPENDENCIES
byebug
database_cleaner
rails (= 3.0.7)
refinerycms
refinerycms-blog
refinerycms-wordpress-import!
fakeweb
ffi
guard-bundler
guard-rspec
libnotify
rails (= 4.0.0)
rmagick
rspec-rails (>= 2.6.0)
shortcode (= 0.1.2)
sqlite3
wordpress-import!

21
Guardfile Normal file
View File

@ -0,0 +1,21 @@
# A sample Guardfile
# More info at https://github.com/guard/guard#readme
guard 'bundler' do
watch('Gemfile')
# Uncomment next line if Gemfile contain `gemspec' command
watch(/^.+\.gemspec/)
end
guard 'rspec', :version => 2 do
watch(%r{^spec/.+_spec\.rb$})
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
watch('spec/spec_helper.rb') { "spec" }
# Rails example
watch(%r{^spec/.+_spec\.rb$})
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
watch(%r{^spec/support/(.+)\.rb$}) { "spec" }
watch('spec/spec_helper.rb') { "spec" }
end

View File

@ -1,4 +1,5 @@
Copyright 2011 YOURNAME
Copyright 2014 Will Bradley
portions Copyright 2011 Marc Remolt
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View File

@ -1,34 +1,32 @@
= Refinerycms-wordpress-import
= Wordpress-import
This litte project is an importer for WordPress XML dumps into refinerycms(-blog).
This little project is an importer for WordPress XML dumps into Rails.
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
It's been somewhat customized for one particular project; you probably want to fork this and modify it to fit your app's schema.
Keep in mind, this gem imports blog posts and pages, NOT the media files, as they are not
part of the XML dump! You have to manually readd them to Refinery.
It's a fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
The same goes for links to other pages on your site. WordPress exports them just as <a>-Tags.
You can find the source code on github: https://github.com/zyphlar/wordpress-import
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
If your site (blog) structure uses new urls, the links WILL break! For example, if you used
the popular WP blog url structure "YYYY-MM/slug", be warned that Refinery just uses "blog/slug".
So your inner site links will point to the old WP url.
== Prerequisites
As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
the site is running, all migrations are run and you created the first refinery user.
TODO
== Installation
Just add the gem to your projects Gemfile:
gem 'refinerycms-wordpress-import'
gem 'wordpress-import'
Or if you want to stay on the bleeding edge:
gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
gem 'wordpress-import', :git => 'git://github.com/zyphlar/wordpress-import.git'
and run
@ -84,6 +82,39 @@ If you want to clean the tables and import in one task:
rake wordpress:reset_and_import_pages[file_name]
Finally, if you want to reset and import all data including media (see below):
rake wordpress:full_import[file_name]
== Importing media files
The WP XML dump contains absolute links to media files linked inside posts, like:
www.mysite.com/wordpress/wp-content/uploads/2011/05/cv.txt
The dump does NOT contain the files itself! To get them imported, this gem downloads the files
from the given URL and imports them to refinery. So for a working media import the old site with
the media URLs must still be online.
After importing the files, this gem replaces the old links in pages and blog posts with the
new generated ones. It parses all existing records searching for the right pattern. That
means, you have to import pages and posts FIRST to get the URLs replaced.
Now to the rake tasks for media import:
rake wordpress:reset_media
This task deletes all data from the media tables (images and resources), ensuring a clean import.
rake wordpress:import_and_replace_media[file_name]
This task imports all the WordPress media into Refinery. After the import it parses all
pages and blog posts, replacing the legacy links with the current refinery ones.
If you want to clean the tables and import in one task:
rake wordpress:reset_import_and_replace_media[file_name]
== Usage on ZSH

View File

@ -7,7 +7,7 @@ rescue LoadError
end
require 'rake'
require 'rdoc/task'
#require 'rdoc/task'
require 'rspec/core'
require 'rspec/core/rake_task'
@ -16,10 +16,10 @@ RSpec::Core::RakeTask.new(:spec)
task :default => :spec
Rake::RDocTask.new(:rdoc) do |rdoc|
rdoc.rdoc_dir = 'rdoc'
rdoc.title = 'Refinerycms-wordpress-import'
rdoc.options << '--line-numbers' << '--inline-source'
rdoc.rdoc_files.include('README.rdoc')
rdoc.rdoc_files.include('lib/**/*.rb')
end
#RDoc::Task.new(:rdoc) do |rdoc|
#rdoc.rdoc_dir = 'rdoc'
#rdoc.title = 'Refinerycms-wordpress-import'
#rdoc.options << '--line-numbers' << '--inline-source'
#rdoc.rdoc_files.include('README.rdoc')
#rdoc.rdoc_files.include('lib/**/*.rb')
#end

View File

@ -1,7 +0,0 @@
module Refinery
module WordPress
end
end
require 'wordpress'

View File

@ -5,77 +5,183 @@ namespace :wordpress do
task :reset_blog do
Rake::Task["environment"].invoke
%w(taggings tags blog_comments blog_categories blog_categories_blog_posts
blog_posts).each do |table_name|
%w(posts post_translations taggings tags).each do |table_name|
p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end
end
desc "import blog data from a Refinery::WordPress XML dump"
task :import_blog, :file_name do |task, params|
desc "import blog data from a WordPressImport XML dump"
task :import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
p "Loading XML from #{params[:file_name]} (using blog #{params[:blog_slug]}) ..."
dump = WordPressImport::Dump.new(params[:file_name])
dump.authors.each(&:to_refinery)
p "Importing #{dump.authors.count} authors ..."
dump.authors.each(&:to_rails)
# by default, import all; unless $ONLY_PUBLISHED = "true"
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.posts(only_published).each(&:to_refinery)
p "Importing #{dump.posts(only_published).count} posts ..."
Refinery::WordPress::Post.create_blog_page_if_necessary
if only_published
p "(only published posts)"
else
p "(export ONLY_PUBLISHED=true to import only published posts)"
end
ENV["MODEL"] = 'BlogPost'
Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL")
dump.posts(only_published).each{|p| p.to_rails(params[:blog_slug]) }
end
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
task :reset_and_import_blog, :file_name do |task, params|
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
task :reset_and_import_blog, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke
Rake::Task["wordpress:import_blog"].invoke(params[:file_name])
Rake::Task["wordpress:import_blog"].invoke(params[:file_name], params[:blog_slug])
end
desc "Reset the cms relevant tables for a clean import"
task :reset_pages do
desc "download images in posts to public folder"
task :download_post_images, :host_match do |task, params|
raise "Error: you must specify a host to match for this download (i.e. rake wordpress:download_post_images['mywebsite']" if params[:host_match].blank?
Rake::Task["environment"].invoke
%w(page_part_translations page_translations page_parts pages).each do |table_name|
# scrape images
@posts = ::Post.all
@posts.each do |post|
doc = Nokogiri::HTML(post.body)
doc.css("img").each do |img|
# find remote file path
remote_file = img.attributes["src"].text
# load uri
begin
remote_uri = URI(remote_file)
rescue => error
puts "Error parsing URL #{remote_file}: #{error.message}"
end
# only download if the image is a LFA-hosted image
if remote_uri && remote_uri.host.match(params[:host_match]) != nil
# find a local path for it
local_file = File.expand_path(File.join(Rails.public_path,remote_uri.path))
# only download if not already there or if it's zero bytes
unless File.size?(local_file)
# create local folders if necessary
dirname = File.dirname(local_file)
unless File.directory?(dirname)
FileUtils.mkdir_p(dirname)
end
# save remote file to local
begin
remote_file_io = open(remote_file)
File.open(local_file,'wb'){ |f| f.write(remote_file_io.read) }
puts "Saved file: #{local_file}"
rescue OpenURI::HTTPError => error
puts "Error saving file #{remote_file}: #{error.message}"
end
end
end
end
end
puts "Finished downloding images from #{@posts.count} posts"
end
# desc "Reset the cms relevant tables for a clean import"
# task :reset_pages do
# Rake::Task["environment"].invoke
# %w(page_part_translations page_translations page_parts pages).each do |table_name|
# p "Truncating #{table_name} ..."
# ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
# end
# end
# desc "import cms data from a WordPress XML dump"
# task :import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# dump = WordPressImport::Dump.new(params[:file_name])
# only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
# dump.pages(only_published).each(&:to_rails)
# # After all pages are persisted we can now create the parent - child
# # relationships. This is necessary, as WordPress doesn't dump the pages in
# # a correct order.
# dump.pages(only_published).each do |dump_page|
# page = ::Page.find(dump_page.post_id)
# page.parent_id = dump_page.parent_id
# page.save!
# end
# WordPressImport::Post.create_blog_page_if_necessary
# ENV["MODEL"] = 'Page'
# Rake::Task["friendly_id:redo_slugs"].invoke
# ENV.delete("MODEL")
# end
# desc "reset cms tables and then import cms data from a WordPress XML dump"
# task :reset_and_import_pages, :file_name do |task, params|
# Rake::Task["environment"].invoke
# Rake::Task["wordpress:reset_pages"].invoke
# Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
# end
desc "Reset the media relevant tables for a clean import"
task :reset_media do
Rake::Task["environment"].invoke
%w(rich_rich_files).each do |table_name|
p "Truncating #{table_name} ..."
ActiveRecord::Base.connection.execute "DELETE FROM #{table_name}"
end
end
desc "import cms data from a Refinery::WordPress XML dump"
task :import_pages, :file_name do |task, params|
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
dump = WordPressImport::Dump.new(params[:file_name])
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.pages(only_published).each(&:to_refinery)
# After all pages are persisted we can now create the parent - child
# relationships. This is necessary, as WordPress doesn't dump the pages in
# a correct order.
dump.pages(only_published).each do |dump_page|
page = ::Page.find(dump_page.post_id)
page.parent_id = dump_page.parent_id
page.save!
p "Importing #{dump.attachments.each_slice(200).first.count} attachments ..."
attachments = dump.attachments.each_slice(200).first.each(&:to_rails)
unless $ATTACHMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED IMPORTING ATTACHMENTS:"
$ATTACHMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
Refinery::WordPress::Post.create_blog_page_if_necessary
# parse all created Post and Page bodys and replace the old wordpress media urls
# with the newly created ones
p "Replacing attachment URLs found in posts/pages ..."
attachments.each(&:replace_url)
ENV["MODEL"] = 'Page'
Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL")
unless $REPLACEMENT_EXCEPTIONS.blank?
p "----------------------------------------------------------"
p "ERRORS WERE ENCOUNTERED REPLACING ATTACHMENTS:"
$REPLACEMENT_EXCEPTIONS.each{|exception| puts exception}
p "----------------------------------------------------------"
end
end
desc "reset cms tables and then import cms data from a Refinery::WordPress XML dump"
task :reset_and_import_pages, :file_name do |task, params|
desc "reset media tables and then import media data from a WordPress XML dump"
task :reset_import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_pages"].invoke
Rake::Task["wordpress:import_pages"].invoke(params[:file_name])
Rake::Task["wordpress:reset_media"].invoke
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end
desc "reset and import all data (see the other tasks)"
task :full_import, :file_name, :blog_slug do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_and_import_blog"].invoke(params[:file_name],params[:blog_slug])
#Rake::Task["wordpress:reset_and_import_pages"].invoke(params[:file_name])
#Rake::Task["wordpress:reset_import_and_replace_media"].invoke(params[:file_name])
Rake::Task["wordpress:import_and_replace_media"].invoke(params[:file_name])
end
end

5
lib/wordpress-import.rb Normal file
View File

@ -0,0 +1,5 @@
module WordPressImport
end
require 'wordpress'

View File

@ -1,15 +1,15 @@
module Refinery
module WordPress
end
require 'nokogiri'
require "wordpress/railtie"
require 'shortcode'
module WordPressImport
autoload :Author, 'wordpress/author'
autoload :Tag, 'wordpress/tag'
autoload :Category, 'wordpress/category'
autoload :Page, 'wordpress/page'
autoload :Post, 'wordpress/post'
autoload :Comment, 'wordpress/comment'
autoload :Dump, 'wordpress/dump'
autoload :Attachment, 'wordpress/attachment'
end
require 'nokogiri'
require 'wordpress/author'
require 'wordpress/tag'
require 'wordpress/category'
require 'wordpress/page'
require 'wordpress/post'
require 'wordpress/comment'
require 'wordpress/dump'
require "wordpress/railtie"

179
lib/wordpress/attachment.rb Normal file
View File

@ -0,0 +1,179 @@
module WordPressImport
class Attachment
attr_reader :node
attr_reader :paperclip_image
attr_reader :paperclip_file
def initialize(node)
@node = node
end
def title
node.xpath("title").text
end
def description
node.xpath("description").text
end
def file_name
url.split('/').last
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def url
node.xpath("wp:attachment_url").text
end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image?
url.match /\.(png|jpg|jpeg|gif)$/
end
def to_rails
begin
if image?
to_image
else
to_file
end
rescue StandardError => ex
message = "ERROR saving attachment #{url} -- #{ex.message}"
p message
$ATTACHMENT_EXCEPTIONS = [] if $ATTACHMENT_EXCEPTIONS.blank?
$ATTACHMENT_EXCEPTIONS << message
return nil
end
end
def replace_url
begin
@occurrance_count = 0
if image?
replace_image_url
else
replace_resource_url
end
p "Replaced #{@occurrance_count} occurrances of #{url}"
rescue StandardError => ex
message = "ERROR replacing URL #{url} -- #{ex.message}"
p message
$REPLACEMENT_EXCEPTIONS = [] if $REPLACEMENT_EXCEPTIONS.blank?
$REPLACEMENT_EXCEPTIONS << message
return nil
end
end
private
def rich_file_clean_file_name(full_file_name)
extension = File.extname(full_file_name).gsub(/^\.+/, '')
filename = full_file_name.gsub(/\.#{extension}$/, '')
filename = CGI::unescape(filename)
filename = CGI::unescape(filename)
extension = extension.downcase
filename = filename.downcase.gsub(/[^a-z0-9]+/i, '-')
"#{filename}.#{extension}"
end
def to_image
# avoid duplicates; use our storage system's filename cleaner for lookup
image = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if image.rich_file.instance.id.blank?
p "Importing image #{file_name}"
image.simplified_type = "image"
image.created_at = post_date
image.rich_file = URI.parse(url)
image.save!
else
p "image #{file_name} already exists..."
end
@paperclip_image = image
image
end
def to_file
# avoid duplicates; use our storage system's filename cleaner for lookup
file = ::Rich::RichFile.find_or_initialize_by(rich_file_file_name: rich_file_clean_file_name(file_name))
if file.rich_file.instance.id.blank?
p "Importing file #{file_name}"
file.created_at = post_date
file.rich_file = URI.parse(url) if file.rich_file.blank?
file.save!
else
p "file #{file_name} already exists..."
end
@paperclip_file = file
file
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(paperclip_image.rich_file.url)
end
def replace_image_url_in_pages
replace_url_in_pages(paperclip_image.rich_file.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(paperclip_file.rich_file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(paperclip_file.rich_file.url)
end
def replace_url_in_blog_posts(new_url)
::Post.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
@occurrance_count += 1
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.translations.each do |translation|
translation.parts.each do |part|
if (! part.content.to_s.blank?) && part.content.include?(url)
@occurrance_count += 1
part.content = part.content.gsub(url_pattern, new_url)
part.save!
end
end
end
end
end
end
end

View File

@ -1,37 +1,47 @@
module Refinery
module WordPress
class Author
attr_reader :author_node
module WordPressImport
class Author
attr_reader :author_node
def initialize(author_node)
@author_node = author_node
end
def initialize(author_node)
@author_node = author_node
end
def login
author_node.xpath("wp:author_login").text
end
def name
name = author_node.xpath("wp:author_display_name").text
name = author_node.xpath("wp:author_first_name").text + " " + author_node.xpath("wp:author_first_name").text if name.blank?
name
end
def email
author_node.xpath("wp:author_email").text
end
def login
author_node.xpath("wp:author_login").text
end
def ==(other)
login == other.login
end
def email
author_node.xpath("wp:author_email").text
end
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def ==(other)
login == other.login
end
def to_refinery
user = User.find_or_initialize_by_username_and_email(login, email)
unless user.persisted?
user.password = 'password'
user.password_confirmation = 'password'
user.save
end
user
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def to_rails
user = ::User.find_or_initialize_by_email(email)
user.wp_username = login
unless user.persisted?
user.name = name
user.password = 'password'
user.password_confirmation = 'password'
end
user.save
puts "User #{login} imported."
user
end
end
end

View File

@ -1,19 +1,17 @@
module Refinery
module WordPress
class Category
attr_accessor :name
module WordPressImport
class Category
attr_accessor :name
def initialize(text)
@name = text
end
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def ==(other)
name == other.name
end
def to_refinery
BlogCategory.find_or_create_by_title(name)
end
def to_rails
Tag.find_or_create_by_title(name)
end
end
end

View File

@ -1,48 +1,46 @@
module Refinery
module WordPress
class Comment
attr_reader :node
module WordPressImport
class Comment
attr_reader :node
def initialize(node)
@node = node
end
def initialize(node)
@node = node
end
def author
node.xpath('wp:comment_author').text
end
def author
node.xpath('wp:comment_author').text
end
def email
node.xpath('wp:comment_author_email').text
end
def email
node.xpath('wp:comment_author_email').text
end
def url
node.xpath('wp:comment_author_url').text
end
def url
node.xpath('wp:comment_author_url').text
end
def date
DateTime.parse node.xpath("wp:comment_date").text
end
def date
DateTime.parse node.xpath("wp:comment_date").text
end
def content
node.xpath('wp:comment_content').text
end
def content
node.xpath('wp:comment_content').text
end
def approved?
node.xpath('wp:comment_approved').text.to_i == 1
end
def approved?
node.xpath('wp:comment_approved').text.to_i == 1
end
def ==(other)
(email == other.email) && (date == other.date) && (content == other.content)
end
def ==(other)
(email == other.email) && (date == other.date) && (content == other.content)
end
def to_refinery
comment = BlogComment.new :name => author, :email => email
def to_refinery
comment = BlogComment.new :name => author, :email => email
comment.body = content
comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected'
comment
end
comment.body = content
comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected'
comment
end
end
end

View File

@ -1,51 +1,62 @@
module Refinery
module WordPress
class Dump
attr_reader :doc
module WordPressImport
class Dump
attr_reader :doc
def initialize(file_name)
file_name = File.absolute_path(file_name)
raise "Given file '#{file_name}' no file or not readable." \
unless File.file?(file_name) && File.readable?(file_name)
file = File.open(file_name)
@doc = Nokogiri::XML(file)
def initialize(file_name)
begin
file_name = File.expand_path(file_name)
raise "error" unless File.file?(file_name) && File.readable?(file_name)
rescue
raise "Given file '#{file_name}' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file']"
end
def authors
doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
file = File.open(file_name)
if file.size >= 10485760 # 10MB
puts "WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results."
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end
@doc = Nokogiri::XML(file.read().gsub("\u0004", "")) # get rid of all EOT characters
end
pages = pages.select(&:published?) if only_published
pages
def authors
doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end
def posts(only_published=false)
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Post.new(post)
end
posts = posts.select(&:published?) if only_published
posts
end
pages = pages.select(&:published?) if only_published
pages
end
def tags
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Tag.new(tag.text)
end
def posts(only_published=false)
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Post.new(post)
end
posts = posts.select(&:published?) if only_published
posts
end
def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text)
end
def tags
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Tag.new(tag.text)
end
end
def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text)
end
end
def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment)
end
end
end

View File

@ -1,107 +1,144 @@
module Refinery
module WordPress
class Page
include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TextHelper
module WordPressImport
class Page
include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TextHelper
attr_reader :node
attr_reader :node
def initialize(node)
@node = node
def initialize(node)
@node = node
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def link
node.xpath("link").text
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_shortcodes(format_syntax_highlighter(format_paragraphs(content)))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
formatted
end
def creator
node.xpath("dc:creator").text
end
def post_date
Time.parse node.xpath("wp:post_date").text
end
def publish_date
Time.parse node.xpath("pubDate").text
end
def post_name
node.xpath("wp:post_name").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
#NEED:
# creator -> "user_id"
# wp:post_name -> "slug"
# pubDate -> "published_at"
#OK:
# title -> "title"
# content:encoded -> "body"
# wp:post_date_gmt -> "created_at"
def to_rails
# :user_id => creator
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :slug => post_name,
:published_at => publish_date, :body => content_formatted)
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\n\n+/, "</p>#{start_tag}") # 2+ newline -> paragraph
text.gsub!(/\r?\n/, "<br/>\n") # \r\n and \n -> line break (must be after the paragraph detection to avoid <br/><br/>)
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
# Replace Wordpress shortcodes with formatted HTML (see shortcode gem and support/templates folder)
def format_shortcodes(text)
Shortcode.setup do |config|
# the template parser to use
config.template_parser = :haml # :erb or :haml supported, :haml is default
# location of the template files
config.template_path = ::File.join(::File.dirname(__FILE__), "..", "..","support/templates/haml")
# a list of block tags to support e.g. [quote]Hello World[/quote]
config.block_tags = [:caption, :column]
# a list of self closing tags to support e.g. [youtube id="12345"]
config.self_closing_tags = [:end_columns, "google-map-v3"]
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
formatted
end
def creator
node.xpath("dc:creator").text
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
def to_refinery
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :draft => draft?)
page.parts.create(:title => 'Body', :body => content_formatted)
page
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
Shortcode.process(text)
end
end
end

View File

@ -1,85 +1,67 @@
module Refinery
module WordPress
class Post < Page
def tags
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
path = if node.xpath("category[@domain='post_tag']").count > 0
"category[@domain='post_tag']"
else
"category[@domain='tag']"
end
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
module WordPressImport
class Post < Page
def tags
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
path = if node.xpath("category[@domain='post_tag']").count > 0
"category[@domain='post_tag']"
else
"category[@domain='tag']"
end
def tag_list
tags.collect(&:name).join(',')
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:author => user, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end
def tag_list
tags.collect(&:name).join(',')
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
# blog_slug is used to identify which blog this import is from
def to_rails(blog_slug)
user = ::User.find_by_wp_username(creator)
if user.nil?
raise "User with wp_username #{creator} not found"
end
post = ::Post.create({
:wp_post_id => post_id, :slug => post_name,
:user_id => user.id, :title => title,
:created_at => post_date,
:published_at => publish_date,
:wp_link => link,
:wp_blog => blog_slug,
:translations_attributes => { "0" => {
:locale => "en",
:title => title,
:body => content_formatted,
# merge the translation's category list with the wordpress post's
:category_list => categories.collect(&:name) | tags.collect(&:name)
}}
})
if post.errors.blank?
puts "Post #{post_name} imported."
return post.reload
else
puts post.inspect
raise post.errors.full_messages.to_s
end
end
end
end

View File

@ -1,9 +1,7 @@
module Refinery
module WordPress
class Railtie < Rails::Railtie
rake_tasks do
load "tasks/wordpress.rake"
end
module WordPressImport
class Railtie < Rails::Railtie
rake_tasks do
load "tasks/wordpress.rake"
end
end
end

View File

@ -1,20 +1,18 @@
module Refinery
module WordPress
class Tag
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
module WordPressImport
class Tag
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
end
end

View File

@ -1,23 +0,0 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "refinerycms-wordpress-import"
s.summary = "Import WordPress XML dumps into refinerycms(-blog)."
s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.2.0"
s.date = "2011-06-05"
s.authors = ['Marc Remolt']
s.email = 'marc.remolt@googlemail.com'
s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'refinerycms', '~> 1.0.0'
s.add_dependency 'refinerycms-blog', '~> 1.5.2'
s.add_dependency 'nokogiri', '~> 1.4.4'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end

View File

@ -1,6 +1,3 @@
require 'yaml'
YAML::ENGINE.yamler= 'syck'
require 'rubygems'
gemfile = File.expand_path('../../../../Gemfile', __FILE__)

View File

@ -24,4 +24,6 @@ Dummy::Application.configure do
config.action_dispatch.best_standards_support = :builtin
end
Refinery.rescue_not_found = false
Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -47,4 +47,6 @@ Dummy::Application.configure do
# Send deprecation notices to registered listeners
config.active_support.deprecation = :notify
end
Refinery.rescue_not_found = true
Refinery.rescue_not_found = true# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -33,4 +33,6 @@ Dummy::Application.configure do
# Print deprecation notices to the stderr
config.active_support.deprecation = :stderr
end
Refinery.rescue_not_found = false
Refinery.rescue_not_found = false# When true will use Amazon's Simple Storage Service on your production machine
# instead of the default file system for resources and images
Refinery.s3_backend = !(ENV['S3_KEY'].nil? || ENV['S3_SECRET'].nil?)

View File

@ -0,0 +1,25 @@
class CreateSeoMetaForBlog < ActiveRecord::Migration
def self.up
unless ::SeoMetum.table_exists?
create_table ::SeoMetum.table_name do |t|
t.integer :seo_meta_id
t.string :seo_meta_type
t.string :browser_title
t.string :meta_keywords
t.text :meta_description
t.timestamps
end
add_index ::SeoMetum.table_name, :id
add_index ::SeoMetum.table_name, [:seo_meta_id, :seo_meta_type]
end
end
def self.down
# can't drop the table because someone else might be using it.
end
end

View File

@ -0,0 +1,11 @@
class AddCachedSlugs < ActiveRecord::Migration
def self.up
add_column :blog_categories, :cached_slug, :string
add_column :blog_posts, :cached_slug, :string
end
def self.down
remove_column :blog_categories, :cached_slug
remove_column :blog_posts, :cached_slug
end
end

View File

@ -0,0 +1,9 @@
class AddCustomUrlFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_url, :string
end
def self.down
remove_column :blog_posts, :custom_url
end
end

View File

@ -0,0 +1,10 @@
class AddCustomTeaserFieldToBlogPosts < ActiveRecord::Migration
def self.up
add_column :blog_posts, :custom_teaser, :text
end
def self.down
remove_column :blog_posts, :custom_teaser
end
end

View File

@ -10,12 +10,13 @@
#
# It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110602094445) do
ActiveRecord::Schema.define(:version => 20110812162204) do
create_table "blog_categories", :force => true do |t|
t.string "title"
t.datetime "created_at"
t.datetime "updated_at"
t.string "cached_slug"
end
add_index "blog_categories", ["id"], :name => "index_blog_categories_on_id"
@ -48,6 +49,9 @@ ActiveRecord::Schema.define(:version => 20110602094445) do
t.datetime "created_at"
t.datetime "updated_at"
t.integer "user_id"
t.string "cached_slug"
t.string "custom_url"
t.text "custom_teaser"
end
add_index "blog_posts", ["id"], :name => "index_blog_posts_on_id"

View File

@ -1,3 +1,12 @@
::Page.reset_column_information
# Check whether all columns are applied yet by seo_meta.
unless !defined?(::SeoMeta) || ::SeoMeta.attributes.keys.all? { |k|
::Page.translation_class.instance_methods.include?(k)
}
# Make pages model seo_meta because not all columns are accessible.
::Page.translation_class.send :is_seo_meta
end
page_position = -1
home_page = Page.create(:title => "Home",

View File

@ -1,16 +1,20 @@
User.find(:all).each do |user|
user.plugins.create(:name => "refinerycms_blog",
:position => (user.plugins.maximum(:position) || -1) +1)
end
::User.find(:all).each do |user|
if user.plugins.where(:name => 'refinerycms_blog').blank?
user.plugins.create(:name => "refinerycms_blog",
:position => (user.plugins.maximum(:position) || -1) +1)
end
end if defined?(::User)
page = Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
if defined?(::Page)
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end

BIN
spec/fixtures/200px-Tux.svg_.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View File

@ -16,7 +16,7 @@
<!-- 7. WordPress will then import each of the posts, pages, comments, categories, etc. -->
<!-- contained in this file into your site. -->
<!-- generator="WordPress/3.1.2" created="2011-05-21 12:27" -->
<!-- generator="WordPress/3.1.2" created="2011-06-06 18:42" -->
<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.1/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
@ -29,7 +29,7 @@
<title>My test blog</title>
<link>http://localhost/wordpress</link>
<description>Just another WordPress site</description>
<pubDate>Sat, 21 May 2011 12:27:19 +0000</pubDate>
<pubDate>Mon, 06 Jun 2011 18:42:09 +0000</pubDate>
<language>en</language>
<wp:wxr_version>1.1</wp:wxr_version>
<wp:base_site_url>http://localhost/wordpress</wp:base_site_url>
@ -69,6 +69,10 @@
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<category domain="category" nicename="uncategorized"><![CDATA[Uncategorized]]></category>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[1]]></wp:meta_value>
</wp:postmeta>
<wp:comment>
<wp:comment_id>1</wp:comment_id>
<wp:comment_author><![CDATA[Mr WordPress]]></wp:comment_author>
@ -92,13 +96,9 @@
<guid isPermaLink="false">http://localhost/wordpress/?page_id=2</guid>
<description></description>
<content:encoded><![CDATA[This is an example page. It's different from a blog post because it will stay in one place and will show up in your site navigation (in most themes). Most people start with an About page that introduces them to potential site visitors. It might say something like this:
<blockquote>Hi there! I'm a bike messenger by day, aspiring actor by night, and this is my blog. I live in Los Angeles, have a great dog named Jack, and I like pi&#241;a coladas. (And gettin' caught in the rain.)</blockquote>
<blockquote>Hi there! I'm a bike messenger by day, aspiring actor by night, and this is my blog. I live in Los Angeles, have a great dog named Jack, and I like piña coladas. (And gettin' caught in the rain.)</blockquote>
...or something like this:
<blockquote>The XYZ Doohickey Company was founded in 1971, and has been providing quality doohickies to the public ever since. Located in Gotham City, XYZ employs over 2,000 people and does all kinds of awesome things for the Gotham community.</blockquote>
As a new WordPress user, you should go to <a href="http://localhost/wordpress/wp-admin/">your dashboard</a> to delete this page and create new pages for your content. Have fun!]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>2</wp:post_id>
@ -117,6 +117,10 @@ As a new WordPress user, you should go to <a href="http://localhost/wordpress/wp
<wp:meta_key>_wp_page_template</wp:meta_key>
<wp:meta_value><![CDATA[default]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[1]]></wp:meta_value>
</wp:postmeta>
</item>
<item>
<title>Second blog post </title>
@ -159,8 +163,12 @@ In hac habitasse platea dictumst. Nunc quis tortor sed libero hendrerit dapibu
<description></description>
<content:encoded><![CDATA[This is just a standard text page example. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin metus dolor, hendrerit sit amet, aliquet nec, posuere sed, purus. Nullam et velit iaculis odio sagittis placerat. Duis metus tellus, pellentesque ut, luctus id, egestas a, lorem. Praesent vitae mauris. Aliquam sed nulla. Sed id nunc vitae leo suscipit viverra. Proin at leo ut lacus consequat rhoncus. In hac habitasse platea dictumst. Nunc quis tortor sed libero hendrerit dapibus.
<a href="http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png"><img class="alignnone size-thumbnail wp-image-13" title="200px-Tux.svg" src="http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_-150x150.png" alt="Tux, the Linux mascot" width="150" height="150" /></a>
Integer interdum purus id erat. Duis nec velit vitae dolor mattis euismod. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse pellentesque dignissim lacus.
<a href="http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png"><img class="alignnone size-full wp-image-13" title="200px-Tux.svg" src="http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png" alt="" width="200" height="235" /></a>
Nulla semper euismod arcu. Suspendisse egestas, erat a consectetur dapibus, felis orci cursus eros, et sollicitudin purus urna et metus. Integer eget est sed nunc euismod vestibulum. Integer nulla dui, tristique in, euismod et, interdum imperdiet, enim. Mauris at lectus. Sed egestas tortor nec mi.]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>6</wp:post_id>
@ -182,12 +190,12 @@ Nulla semper euismod arcu. Suspendisse egestas, erat a consectetur dapibus, fel
<wp:postmeta>
<wp:meta_key>_edit_last</wp:meta_key>
<wp:meta_value><![CDATA[1]]></wp:meta_value>
</wp:postmeta>
</wp:postmeta>
<wp:comment>
<wp:comment_id>2</wp:comment_id>
<wp:comment_author><![CDATA[admin]]></wp:comment_author>
<wp:comment_author_email>admin@example.com</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_url>http://www.example.com/</wp:comment_author_url>
<wp:comment_author_IP>127.0.0.1</wp:comment_author_IP>
<wp:comment_date>2011-05-21 12:26:24</wp:comment_date>
<wp:comment_date_gmt>2011-05-21 12:26:24</wp:comment_date_gmt>
@ -201,7 +209,7 @@ Nulla semper euismod arcu. Suspendisse egestas, erat a consectetur dapibus, fel
<wp:comment_id>3</wp:comment_id>
<wp:comment_author><![CDATA[admin]]></wp:comment_author>
<wp:comment_author_email>admin@example.com</wp:comment_author_email>
<wp:comment_author_url>http://www.example.com/</wp:comment_author_url>
<wp:comment_author_url>http://www.example.com/</wp:comment_author_url>
<wp:comment_author_IP>127.0.0.1</wp:comment_author_IP>
<wp:comment_date>2011-05-21 12:26:30</wp:comment_date>
<wp:comment_date_gmt>2011-05-21 12:26:30</wp:comment_date_gmt>
@ -253,6 +261,8 @@ In hac habitasse platea dictumst. Nunc quis tortor sed libero hendrerit dapibu
<description></description>
<content:encoded><![CDATA[This is just a standard text page example. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin metus dolor, hendrerit sit amet, aliquet nec, posuere sed, purus. Nullam et velit iaculis odio sagittis placerat.
<a href="http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt">This is my CV</a>, please download!
Duis metus tellus, pellentesque ut, luctus id, egestas a, lorem. Praesent vitae mauris. Aliquam sed nulla. Sed id nunc vitae leo suscipit <strong>viverra</strong>. Proin at leo ut lacus consequat rhoncus. In hac habitasse platea dictumst. Nunc quis tortor sed libero hendrerit dapibus. Integer interdum purus id erat. Duis nec velit vitae dolor mattis euismod.
Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Suspendisse pellentesque dignissim lacus. Nulla semper euismod arcu. Suspendisse egestas, erat a consectetur dapibus, felis orci cursus eros, et sollicitudin purus urna et metus. Integer eget est sed nunc euismod vestibulum. Integer nulla dui, tristique in, euismod et, interdum imperdiet, enim. Mauris at lectus. Sed egestas tortor nec mi.]]></content:encoded>
@ -278,5 +288,71 @@ Class aptent taciti sociosqu ad litora torquent per conubia nostra, per incepto
<wp:meta_value><![CDATA[default]]></wp:meta_value>
</wp:postmeta>
</item>
<item>
<title>200px-Tux.svg</title>
<link>http://localhost/wordpress/?attachment_id=13</link>
<pubDate>Sun, 05 Jun 2011 15:26:51 +0000</pubDate>
<dc:creator>admin</dc:creator>
<guid isPermaLink="false">http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png</guid>
<description></description>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[]]></excerpt:encoded>
<wp:post_id>13</wp:post_id>
<wp:post_date>2011-06-05 15:26:51</wp:post_date>
<wp:post_date_gmt>2011-06-05 15:26:51</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>200px-tux-svg</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_parent>6</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>attachment</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<wp:attachment_url>http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png</wp:attachment_url>
<wp:postmeta>
<wp:meta_key>_wp_attached_file</wp:meta_key>
<wp:meta_value><![CDATA[2011/05/200px-Tux.svg_.png]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_wp_attachment_metadata</wp:meta_key>
<wp:meta_value><![CDATA[a:6:{s:5:"width";s:3:"200";s:6:"height";s:3:"235";s:14:"hwstring_small";s:22:"height='96' width='81'";s:4:"file";s:26:"2011/05/200px-Tux.svg_.png";s:5:"sizes";a:2:{s:9:"thumbnail";a:3:{s:4:"file";s:26:"200px-Tux.svg_-150x150.png";s:5:"width";s:3:"150";s:6:"height";s:3:"150";}s:14:"post-thumbnail";a:3:{s:4:"file";s:26:"200px-Tux.svg_-200x198.png";s:5:"width";s:3:"200";s:6:"height";s:3:"198";}}s:10:"image_meta";a:10:{s:8:"aperture";s:1:"0";s:6:"credit";s:0:"";s:6:"camera";s:0:"";s:7:"caption";s:0:"";s:17:"created_timestamp";s:1:"0";s:9:"copyright";s:0:"";s:12:"focal_length";s:1:"0";s:3:"iso";s:1:"0";s:13:"shutter_speed";s:1:"0";s:5:"title";s:0:"";}}]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_wp_attachment_image_alt</wp:meta_key>
<wp:meta_value><![CDATA[Tux, the Linux mascot]]></wp:meta_value>
</wp:postmeta>
</item>
<item>
<title>cv</title>
<link>http://localhost/wordpress/?attachment_id=18</link>
<pubDate>Mon, 06 Jun 2011 17:27:50 +0000</pubDate>
<dc:creator>admin</dc:creator>
<guid isPermaLink="false">http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt</guid>
<description></description>
<content:encoded><![CDATA[]]></content:encoded>
<excerpt:encoded><![CDATA[My CV]]></excerpt:encoded>
<wp:post_id>18</wp:post_id>
<wp:post_date>2011-06-06 17:27:50</wp:post_date>
<wp:post_date_gmt>2011-06-06 17:27:50</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:ping_status>open</wp:ping_status>
<wp:post_name>cv</wp:post_name>
<wp:status>inherit</wp:status>
<wp:post_parent>10</wp:post_parent>
<wp:menu_order>0</wp:menu_order>
<wp:post_type>attachment</wp:post_type>
<wp:post_password></wp:post_password>
<wp:is_sticky>0</wp:is_sticky>
<wp:attachment_url>http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt</wp:attachment_url>
<wp:postmeta>
<wp:meta_key>_wp_attached_file</wp:meta_key>
<wp:meta_value><![CDATA[2011/05/cv.txt]]></wp:meta_value>
</wp:postmeta>
<wp:postmeta>
<wp:meta_key>_wp_attachment_metadata</wp:meta_key>
<wp:meta_value><![CDATA[a:0:{}]]></wp:meta_value>
</wp:postmeta>
</item>
</channel>
</rss>

View File

@ -0,0 +1,93 @@
require 'spec_helper'
describe WordPressImport::Attachment, :type => :model do
context "an image attchment" do
let(:attachment) { test_dump.attachments.first }
specify { attachment.title.should == '200px-Tux.svg' }
# doesn't get exported atm. for some reason
specify { attachment.description.should == '' }
specify { attachment.url.should == 'http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png' }
specify { attachment.file_name.should == '200px-Tux.svg_.png' }
specify { attachment.post_date.should == DateTime.new(2011, 6, 5, 15, 26, 51) }
specify { attachment.should be_an_image }
describe "#to_refinery" do
before do
@image = attachment.to_refinery
end
it "should create an Image from the Attachment" do
@image.should be_a(Image)
end
it "should copy the attributes from Attachment" do
@image.created_at.should == attachment.post_date
@image.image.url.end_with?(attachment.file_name).should be_true
end
end
describe "#replace_url" do
let(:post) { BlogPost.first }
before do
test_dump.authors.each(&:to_refinery)
test_dump.posts.each(&:to_refinery)
@image = attachment.to_refinery
attachment.replace_url
end
specify { post.body.should_not include attachment.url }
specify { post.body.should_not include '200px-Tux.svg_-150x150.png' }
specify { post.body.should_not include 'wp-content' }
it "should replace attachment urls in the generated BlogPosts" do
post.body.should include(@image.image.url)
end
end
end
context "a file attachment" do
let(:attachment) { test_dump.attachments.last }
specify { attachment.title.should == 'cv' }
specify { attachment.url.should == 'http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt' }
specify { attachment.file_name.should == 'cv.txt' }
specify { attachment.post_date.should == DateTime.new(2011, 6, 6, 17, 27, 50) }
specify { attachment.should_not be_an_image }
describe '#to_refinery' do
before do
@resource = attachment.to_refinery
end
specify { Resource.should have(1).record }
specify { @resource.should be_a(Resource) }
it "should copy the attributes from Attachment" do
@resource.created_at.should == attachment.post_date
@resource.file.url.end_with?(attachment.file_name).should be_true
end
end
describe '#replace_resource_url' do
let(:page_part) { Page.last.parts.first }
before do
test_dump.pages.each(&:to_refinery)
@resource = attachment.to_refinery
attachment.replace_url
end
specify { page_part.body.should_not include attachment.url }
specify { page_part.body.should_not include 'wp-content' }
it "should replace attachment urls in the generated BlogPosts" do
page_part.body.should include(@resource.file.url)
end
end
end
end

View File

@ -1,10 +1,10 @@
require 'spec_helper'
describe Refinery::WordPress::Author, :type => :model do
describe WordPressImport::Author, :type => :model do
let(:author) { test_dump.authors.first }
it { author.login.should == 'admin' }
it { author.email.should == 'admin@example.com' }
specify { author.login.should == 'admin' }
specify { author.email.should == 'admin@example.com' }
describe "#to_refinery" do
before do

View File

@ -1,15 +1,15 @@
require 'spec_helper'
describe Refinery::WordPress::Category, :type => :model do
let(:category) { Refinery::WordPress::Category.new('Rant') }
describe WordPressImport::Category, :type => :model do
let(:category) { WordPressImport::Category.new('Rant') }
describe "#name" do
specify { category.name.should == 'Rant' }
end
describe "#==" do
specify { category.should == Refinery::WordPress::Category.new('Rant') }
specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') }
specify { category.should == WordPressImport::Category.new('Rant') }
specify { category.should_not == WordPressImport::Category.new('Tutorials') }
end
describe "#to_refinery" do

View File

@ -1,10 +1,10 @@
require 'spec_helper'
describe Refinery::WordPress::Dump, :type => :model do
describe WordPressImport::Dump, :type => :model do
let(:dump) { test_dump }
it "should create a Dump object given a xml file" do
dump.should be_a Refinery::WordPress::Dump
dump.should be_a WordPressImport::Dump
end
it "should include a Nokogiri::XML object" do
@ -13,11 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#tags" do
let(:tags) do
[ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'),
Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')]
[ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'),
WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')]
end
specify { dump.tags.count == 4 }
specify { dump.tags.first.should be_a(WordPressImport::Tag) }
it "should return all included tags" do
tags.each do |tag|
@ -28,11 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#categories" do
let(:categories) do
[ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'),
Refinery::WordPress::Category.new('Uncategorized') ]
[ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'),
WordPressImport::Category.new('Uncategorized') ]
end
specify { dump.categories.count == 4 }
specify { dump.categories.first.should be_a(WordPressImport::Category) }
it "should return all included categories" do
categories.each do |cat|
@ -46,6 +48,8 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.pages.should have(3).pages
end
specify { dump.pages.first.should be_a(WordPressImport::Page) }
it "should return only published pages with only_published=true" do
dump.pages(true).should have(2).pages
end
@ -55,6 +59,8 @@ describe Refinery::WordPress::Dump, :type => :model do
it "should return all authors" do
dump.authors.should have(1).author
end
specify { dump.authors.first.should be_a(WordPressImport::Author) }
end
describe "#posts" do
@ -62,8 +68,18 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.posts.should have(3).posts
end
specify { dump.posts.first.should be_a(WordPressImport::Post) }
it "should return only published posts with only_published=true" do
dump.posts(true).should have(2).posts
end
end
describe "#attachments" do
it "should return all attachments" do
dump.attachments.should have(2).attachments
end
specify { dump.attachments.first.should be_a(WordPressImport::Attachment) }
end
end

View File

@ -1,19 +1,19 @@
require 'spec_helper'
describe Refinery::WordPress::Page, :type => :model do
describe WordPressImport::Page, :type => :model do
let(:dump) { test_dump }
let(:page) { test_dump.pages.last }
it { page.title.should == 'About me' }
it { page.content.should include('Lorem ipsum dolor sit') }
it { page.creator.should == 'admin' }
it { page.post_date.should == DateTime.new(2011, 5, 21, 12, 25, 42) }
it { page.post_id.should == 10 }
it { page.parent_id.should == 8 }
specify { page.title.should == 'About me' }
specify { page.content.should include('Lorem ipsum dolor sit') }
specify { page.creator.should == 'admin' }
specify { page.post_date.should == DateTime.new(2011, 5, 21, 12, 25, 42) }
specify { page.post_id.should == 10 }
specify { page.parent_id.should == 8 }
it { page.should == dump.pages.last }
it { page.should_not == dump.pages.first }
specify { page.should == dump.pages.last }
specify { page.should_not == dump.pages.first }
describe "#to_refinery" do
include ::ActionView::Helpers::TagHelper

View File

@ -1,34 +1,34 @@
require 'spec_helper'
describe Refinery::WordPress::Post, :type => :model do
describe WordPressImport::Post, :type => :model do
let(:post) { test_dump.posts.last }
it { post.title.should == 'Third blog post' }
it { post.content.should include('Lorem ipsum dolor sit') }
it { post.content_formatted.should include('Lorem ipsum dolor sit') }
it { post.creator.should == 'admin' }
it { post.post_date.should == DateTime.new(2011, 5, 21, 12, 24, 45) }
it { post.post_id.should == 6 }
it { post.parent_id.should == nil }
it { post.status.should == 'publish' }
specify { post.title.should == 'Third blog post' }
specify { post.content.should include('Lorem ipsum dolor sit') }
specify { post.content_formatted.should include('Lorem ipsum dolor sit') }
specify { post.creator.should == 'admin' }
specify { post.post_date.should == DateTime.new(2011, 5, 21, 12, 24, 45) }
specify { post.post_id.should == 6 }
specify { post.parent_id.should == nil }
specify { post.status.should == 'publish' }
it { post.should == test_dump.posts.last }
it { post.should_not == test_dump.posts.first }
specify { post.should == test_dump.posts.last }
specify { post.should_not == test_dump.posts.first }
describe "#categories" do
it { post.categories.should have(1).category }
it { post.categories.first.should == Refinery::WordPress::Category.new('Rant') }
specify { post.categories.should have(1).category }
specify { post.categories.first.should == WordPressImport::Category.new('Rant') }
end
describe "#tags" do
it { post.tags.should have(3).tags }
specify { post.tags.should have(3).tags }
it { post.tags.should include(Refinery::WordPress::Tag.new('css')) }
it { post.tags.should include(Refinery::WordPress::Tag.new('html')) }
it { post.tags.should include(Refinery::WordPress::Tag.new('php')) }
specify { post.tags.should include(WordPressImport::Tag.new('css')) }
specify { post.tags.should include(WordPressImport::Tag.new('html')) }
specify { post.tags.should include(WordPressImport::Tag.new('php')) }
end
it { post.tag_list.should == 'css,html,php' }
specify { post.tag_list.should == 'css,html,php' }
describe "#comments" do
it "should return all attached comments" do
@ -38,14 +38,14 @@ describe Refinery::WordPress::Post, :type => :model do
context "the last comment" do
let(:comment) { post.comments.last }
it { comment.author.should == 'admin' }
it { comment.email.should == 'admin@example.com' }
it { comment.url.should == 'http://www.example.com/' }
it { comment.date.should == DateTime.new(2011, 5, 21, 12, 26, 30) }
it { comment.content.should include('Another one!') }
it { comment.should be_approved }
specify { comment.author.should == 'admin' }
specify { comment.email.should == 'admin@example.com' }
specify { comment.url.should == 'http://www.example.com/' }
specify { comment.date.should == DateTime.new(2011, 5, 21, 12, 26, 30) }
specify { comment.content.should include('Another one!') }
specify { comment.should be_approved }
it { comment.should == post.comments.last }
specify { comment.should == post.comments.last }
describe "#to_refinery" do
before do
@ -63,6 +63,7 @@ describe Refinery::WordPress::Post, :type => :model do
@comment.body.should == comment.content
@comment.state.should == 'approved'
@comment.created_at.should == comment.date
@comment.created_at.should == comment.date
end
end
end
@ -79,16 +80,13 @@ describe Refinery::WordPress::Post, :type => :model do
@post = post.to_refinery
end
it { BlogPost.should have(1).record }
specify { BlogPost.should have(1).record }
it "should copy the attributes from Refinery::WordPress::Post" do
@post.title.should == post.title
@post.body.should == post.content_formatted
@post.draft.should == post.draft?
@post.published_at.should == post.post_date
@post.created_at.should == post.post_date
@post.author.username.should == post.creator
end
specify { @post.title.should == post.title }
specify { @post.body.should == post.content_formatted }
specify { @post.draft.should == post.draft? }
specify { @post.published_at.should == post.post_date }
specify { @post.author.username.should == post.creator }
it "should assign a category for each Refinery::WordPress::Category" do
@post.categories.should have(post.categories.count).records
@ -106,7 +104,7 @@ describe Refinery::WordPress::Post, :type => :model do
end
it { BlogPost.should have(2).records }
specify { BlogPost.should have(2).records }
it "should create the BlogPost with #post_id attached" do
@post.title.should == "#{post.title}-#{post.post_id}"

View File

@ -1,15 +1,15 @@
require 'spec_helper'
describe Refinery::WordPress::Tag, :type => :model do
let(:tag) { Refinery::WordPress::Tag.new('ruby') }
describe WordPressImport::Tag, :type => :model do
let(:tag) { WordPressImport::Tag.new('ruby') }
describe "#name" do
specify { tag.name.should == 'ruby' }
end
describe "#==" do
specify { tag.should == Refinery::WordPress::Tag.new('ruby') }
specify { tag.should_not == Refinery::WordPress::Tag.new('php') }
specify { tag.should == WordPressImport::Tag.new('ruby') }
specify { tag.should_not == WordPressImport::Tag.new('php') }
end
describe "#to_refinery" do

View File

@ -1,7 +0,0 @@
require 'spec_helper'
describe Refinery::WordPress do
it "should be valid" do
Refinery::WordPress.should be_a(Module)
end
end

12
spec/support/fakeweb.rb Normal file
View File

@ -0,0 +1,12 @@
require "fakeweb"
FakeWeb.allow_net_connect = false
# Simulating download of wordpress file attachments. The dump expects the files
# to be at the given URLs
FakeWeb.register_uri(:get,
"http://localhost/wordpress/wp-content/uploads/2011/05/200px-Tux.svg_.png",
:body => File.new('spec/fixtures/200px-Tux.svg_.png').read,
:content_type => "image/png")
FakeWeb.register_uri(:get, "http://localhost/wordpress/wp-content/uploads/2011/05/cv.txt", :body => "Hello World!", :content_type => "text/plain")

View File

@ -1,11 +1,11 @@
module Refinery::WordPress::SpecHelpers
module WordPressImport::SpecHelpers
def test_dump
file_name = File.realpath(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
Refinery::WordPress::Dump.new(file_name)
file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
WordPressImport::Dump.new(file_name)
end
end
RSpec.configure do |config|
config.include Refinery::WordPress::SpecHelpers
config.include WordPressImport::SpecHelpers
end

View File

@ -0,0 +1,7 @@
require 'spec_helper'
describe WordPressImport do
it "should be valid" do
WordPressImport.should be_a(Module)
end
end

View File

@ -0,0 +1,24 @@
-# disabling style_hash for now; setting the width = width+10 doesn't seem to actually be a good thing
-# style_hash = {:style => "width: #{@attributes[:width].to_i+10}px"} unless @attributes[:width].blank?
- figure_hash = {:class => @attributes[:align], :id => @attributes[:id]}
-# figure_hash = figure_hash.merge(style_hash) if style_hash
- content_image = Nokogiri::HTML(@content).css("img").to_html
- content_caption = Nokogiri::HTML(@content).text + " #{@attributes[:caption]}"
- @content = "#{content_image} <figcaption>#{content_caption}</figcaption>" unless content_image.blank?
%figure{figure_hash}= @content
-# sample wordpress-y css to go along with this html:
figure {
background: #f1f1f1;
margin-bottom: 20px;
padding: 4px;
text-align: center;
}
figure img {
margin: 5px 5px 0;
}
figure figcaption {
color: #777;
font-size: 12px;
margin: 5px 5px 24px;
}

View File

@ -0,0 +1,4 @@
.post_column_1{:style =>"width:#{@attributes[:width]}; float: left; padding: #{@attributes[:padding]} 6% #{@attributes[:padding]} #{@attributes[:padding]}; display: inline;"}= @content
-#[column width="47%" padding="0"] foo [/column]
-# <div class="post_column_1">

View File

@ -0,0 +1 @@
<div style="clear: both;"></div>

View File

@ -0,0 +1,37 @@
- addmarkerlist = @attributes[:addmarkerlist].to_s || ""
- name = addmarkerlist.split("{}").last
- uri_name = URI::encode(name)
- uri_address = URI::encode(addmarkerlist.split("{}").first)
-# google will open up the business page if we pass it a name
- uri_address = "#{uri_name},%20#{uri_address}" unless (uri_name == uri_address || name.blank?)
- gmaps_url = "https://maps.google.com/maps/?q=#{uri_address}&amp;ie=UTF8&amp;t=m"
<iframe src="#{gmaps_url}&amp;output=embed" frameborder="0" marginwidth="0" marginheight="0" scrolling="no" width="#{@attributes[:width]}" height="#{@attributes[:height]}"></iframe>
<small>View <a href="#{gmaps_url}">#{name}</a> in a larger map</small>
-# [google-map-v3 width="425"
height="350"
zoom="12"
maptype="roadmap"
mapalign="left"
directionhint="false"
language="default"
poweredby="false"
maptypecontrol="true"
pancontrol="true"
zoomcontrol="true"
scalecontrol="true"
streetviewcontrol="true"
scrollwheelcontrol="false"
draggable="true"
tiltfourtyfive="false"
addmarkermashupbubble="false"
addmarkermashupbubble="false"
addmarkerlist="#1 Dream Manor Dr Globe, AZ{}1-default.png{}Dream Manor Inn"
bubbleautopan="true"
showbike="false"
showtraffic="false"
showpanoramio="false"]

22
wordpress-import.gemspec Normal file
View File

@ -0,0 +1,22 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "wordpress-import"
s.summary = "Import WordPress XML dumps into your Ruby on Rails app."
s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.4"
s.date = "2014-03-17"
s.authors = ['Will Bradley']
s.email = 'bradley.will@gmail.com'
s.homepage = 'https://github.com/zyphlar/wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'nokogiri', '~> 1.6.0'
s.add_dependency 'shortcode', '~> 0.1.1'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end