Beginning to remove refinery stuff

This commit is contained in:
Will Bradley 2014-03-04 15:47:53 -07:00
parent 1e1f3574eb
commit 60ce62ad1b
29 changed files with 530 additions and 744 deletions

View File

@ -1,6 +1,8 @@
source "http://rubygems.org" source "http://rubygems.org"
gem "rails", "3.0.9" ruby '2.0.0'
gem "rails", "4.0.0"
#gem "capybara", ">= 1.0.0.beta1" #gem "capybara", ">= 1.0.0.beta1"
gem "sqlite3" gem "sqlite3"
gem "rmagick" gem "rmagick"
@ -19,6 +21,4 @@ end
# gem 'ruby-debug' # gem 'ruby-debug'
# gem 'ruby-debug19' # gem 'ruby-debug19'
gem 'refinerycms' gem 'wordpress-import', :path => './'
gem 'refinerycms-blog'
gem 'refinerycms-wordpress-import', :path => './'

View File

@ -1,200 +0,0 @@
PATH
remote: ./
specs:
refinerycms-wordpress-import (0.4.0)
bundler (~> 1.0)
nokogiri (~> 1.5.0)
refinerycms (~> 1.0.0)
refinerycms-blog (~> 1.6.2)
GEM
remote: http://rubygems.org/
specs:
abstract (1.0.0)
actionmailer (3.0.9)
actionpack (= 3.0.9)
mail (~> 2.2.19)
actionpack (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
builder (~> 2.1.2)
erubis (~> 2.6.6)
i18n (~> 0.5.0)
rack (~> 1.2.1)
rack-mount (~> 0.6.14)
rack-test (~> 0.5.7)
tzinfo (~> 0.3.23)
activemodel (3.0.9)
activesupport (= 3.0.9)
builder (~> 2.1.2)
i18n (~> 0.5.0)
activerecord (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
arel (~> 2.0.10)
tzinfo (~> 0.3.23)
activeresource (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
activesupport (3.0.9)
acts-as-taggable-on (2.0.6)
acts_as_indexed (0.7.3)
arel (2.0.10)
awesome_nested_set (2.0.1)
activerecord (>= 3.0.0)
babosa (0.3.5)
bcrypt-ruby (2.1.4)
builder (2.1.2)
database_cleaner (0.6.7)
devise (1.3.4)
bcrypt-ruby (~> 2.1.2)
orm_adapter (~> 0.0.3)
warden (~> 1.0.3)
diff-lcs (1.1.2)
dragonfly (0.9.5)
rack
erubis (2.6.6)
abstract (>= 1.0.0)
fakeweb (1.3.0)
ffi (1.0.9)
filters_spam (0.3)
friendly_id_globalize3 (3.2.1.4)
babosa (~> 0.3.0)
globalize3 (0.1.0)
activemodel (>= 3.0.0)
activerecord (>= 3.0.0)
guard (0.5.1)
thor (~> 0.14.6)
guard-bundler (0.1.3)
bundler (>= 1.0.0)
guard (>= 0.2.2)
guard-rspec (0.4.1)
guard (>= 0.4.0)
i18n (0.5.0)
libnotify (0.5.7)
mail (2.2.19)
activesupport (>= 2.3.6)
i18n (>= 0.4.0)
mime-types (~> 1.16)
treetop (~> 1.4.8)
mime-types (1.16)
nokogiri (1.5.0)
orm_adapter (0.0.5)
polyglot (0.3.2)
rack (1.2.3)
rack-cache (1.0.2)
rack (>= 0.4)
rack-mount (0.6.14)
rack (>= 1.0.0)
rack-test (0.5.7)
rack (>= 1.0)
rails (3.0.9)
actionmailer (= 3.0.9)
actionpack (= 3.0.9)
activerecord (= 3.0.9)
activeresource (= 3.0.9)
activesupport (= 3.0.9)
bundler (~> 1.0)
railties (= 3.0.9)
railties (3.0.9)
actionpack (= 3.0.9)
activesupport (= 3.0.9)
rake (>= 0.8.7)
rdoc (~> 3.4)
thor (~> 0.14.4)
rake (0.9.2)
rdoc (3.9.2)
refinerycms (1.0.4)
bundler (~> 1.0)
refinerycms-authentication (= 1.0.4)
refinerycms-base (= 1.0.4)
refinerycms-core (= 1.0.4)
refinerycms-dashboard (= 1.0.4)
refinerycms-images (= 1.0.4)
refinerycms-pages (= 1.0.4)
refinerycms-resources (= 1.0.4)
refinerycms-settings (= 1.0.4)
refinerycms-authentication (1.0.4)
devise (~> 1.3.0)
friendly_id_globalize3 (~> 3.2.1)
refinerycms-core (= 1.0.4)
refinerycms-base (1.0.4)
refinerycms-blog (1.6.2)
acts-as-taggable-on
filters_spam (~> 0.2)
refinerycms-core (~> 1.0.3)
seo_meta (~> 1.1.0)
refinerycms-core (1.0.4)
acts_as_indexed (~> 0.7)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
rails (~> 3.0.9)
refinerycms-base (= 1.0.4)
refinerycms-generators (~> 1.0)
refinerycms-settings (= 1.0.4)
truncate_html (~> 0.5)
will_paginate (= 3.0.pre2)
refinerycms-dashboard (1.0.4)
refinerycms-core (= 1.0.4)
refinerycms-generators (1.0.3)
refinerycms-images (1.0.4)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.4)
refinerycms-pages (1.0.4)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
refinerycms-core (= 1.0.4)
seo_meta (~> 1.1)
refinerycms-resources (1.0.4)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.4)
refinerycms-settings (1.0.4)
refinerycms-base (= 1.0.4)
rmagick (2.13.1)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.4)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.6.0)
rspec-rails (2.6.1)
actionpack (~> 3.0)
activesupport (~> 3.0)
railties (~> 3.0)
rspec (~> 2.6.0)
seo_meta (1.1.1)
refinerycms-generators (~> 1.0.1)
sqlite3 (1.3.4)
thor (0.14.6)
treetop (1.4.10)
polyglot
polyglot (>= 0.3.1)
truncate_html (0.5.1)
tzinfo (0.3.29)
warden (1.0.5)
rack (>= 1.0)
will_paginate (3.0.pre2)
PLATFORMS
ruby
DEPENDENCIES
database_cleaner
fakeweb
ffi
guard-bundler
guard-rspec
libnotify
rails (= 3.0.9)
refinerycms
refinerycms-blog
refinerycms-wordpress-import!
rmagick
rspec-rails (>= 2.6.0)
sqlite3

View File

@ -1,4 +1,5 @@
Copyright 2011 YOURNAME Copyright 2014 Will Bradley
portions Copyright 2011 Marc Remolt
Permission is hereby granted, free of charge, to any person obtaining Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the a copy of this software and associated documentation files (the

View File

@ -1,8 +1,10 @@
= Refinerycms-wordpress-import = Wordpress-import
This litte project is an importer for WordPress XML dumps into refinerycms(-blog). Fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import This little project is an importer for WordPress XML dumps into Rails
You can find the source code on github: https://github.com/zyphlar/wordpress-import
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags. Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
If your site (blog) structure uses new urls, the links WILL break! For example, if you used If your site (blog) structure uses new urls, the links WILL break! For example, if you used
@ -12,20 +14,18 @@ So your inner site links will point to the old WP url.
== Prerequisites == Prerequisites
As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it. TODO
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
the site is running, all migrations are run and you created the first refinery user.
== Installation == Installation
Just add the gem to your projects Gemfile: Just add the gem to your projects Gemfile:
gem 'refinerycms-wordpress-import' gem 'wordpress-import'
Or if you want to stay on the bleeding edge: Or if you want to stay on the bleeding edge:
gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git' gem 'wordpress-import', :git => 'git://github.com/zyphlarz/wordpress-import.git'
and run and run

View File

@ -1,7 +0,0 @@
module Refinery
module WordPress
end
end
require 'wordpress'

View File

@ -13,24 +13,24 @@ namespace :wordpress do
end end
desc "import blog data from a Refinery::WordPress XML dump" desc "import blog data from a WordPressImport XML dump"
task :import_blog, :file_name do |task, params| task :import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = WordPressImport::Dump.new(params[:file_name])
dump.authors.each(&:to_refinery) dump.authors.each(&:to_refinery)
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.posts(only_published).each(&:to_refinery) dump.posts(only_published).each(&:to_refinery)
Refinery::WordPress::Post.create_blog_page_if_necessary WordPressImport::Post.create_blog_page_if_necessary
ENV["MODEL"] = 'BlogPost' ENV["MODEL"] = 'BlogPost'
Rake::Task["friendly_id:redo_slugs"].invoke Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL") ENV.delete("MODEL")
end end
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump" desc "reset blog tables and then import blog data from a WordPressImport XML dump"
task :reset_and_import_blog, :file_name do |task, params| task :reset_and_import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke Rake::Task["wordpress:reset_blog"].invoke
@ -51,7 +51,7 @@ namespace :wordpress do
desc "import cms data from a WordPress XML dump" desc "import cms data from a WordPress XML dump"
task :import_pages, :file_name do |task, params| task :import_pages, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = WordPressImport::Dump.new(params[:file_name])
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.pages(only_published).each(&:to_refinery) dump.pages(only_published).each(&:to_refinery)
@ -65,7 +65,7 @@ namespace :wordpress do
page.save! page.save!
end end
Refinery::WordPress::Post.create_blog_page_if_necessary WordPressImport::Post.create_blog_page_if_necessary
ENV["MODEL"] = 'Page' ENV["MODEL"] = 'Page'
Rake::Task["friendly_id:redo_slugs"].invoke Rake::Task["friendly_id:redo_slugs"].invoke
@ -93,7 +93,7 @@ namespace :wordpress do
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts" desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params| task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name]) dump = WordPressImport::Dump.new(params[:file_name])
attachments = dump.attachments.each(&:to_refinery) attachments = dump.attachments.each(&:to_refinery)

5
lib/wordpress-import.rb Normal file
View File

@ -0,0 +1,5 @@
module WordPressImport
end
require 'wordpress'

View File

@ -1,16 +1,14 @@
require 'nokogiri' require 'nokogiri'
require "wordpress/railtie" require "wordpress/railtie"
module Refinery module WordPressImport
module WordPress autoload :Author, 'wordpress/author'
autoload :Author, 'wordpress/author' autoload :Tag, 'wordpress/tag'
autoload :Tag, 'wordpress/tag' autoload :Category, 'wordpress/category'
autoload :Category, 'wordpress/category' autoload :Page, 'wordpress/page'
autoload :Page, 'wordpress/page' autoload :Post, 'wordpress/post'
autoload :Post, 'wordpress/post' autoload :Comment, 'wordpress/comment'
autoload :Comment, 'wordpress/comment' autoload :Dump, 'wordpress/dump'
autoload :Dump, 'wordpress/dump' autoload :Attachment, 'wordpress/attachment'
autoload :Attachment, 'wordpress/attachment'
end
end end

View File

@ -1,130 +1,128 @@
module Refinery module WordPressImport
module WordPress class Attachment
class Attachment attr_reader :node
attr_reader :node attr_reader :refinery_image
attr_reader :refinery_image attr_reader :refinery_resource
attr_reader :refinery_resource
def initialize(node)
@node = node
end
def title
node.xpath("title").text
end
def description
node.xpath("description").text
end
def file_name
url.split('/').last
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def url
node.xpath("wp:attachment_url").text
end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image?
url.match /\.(png|jpg|jpeg|gif)$/
end
def to_refinery
if image?
to_image
else
to_resource
end
end
def replace_url
if image?
replace_image_url
else
replace_resource_url
end
end
private
def to_image
image = ::Image.new
image.created_at = post_date
image.image_url = url
image.save!
@refinery_image = image
image
end
def to_resource
resource = ::Resource.new
resource.created_at = post_date
resource.file_url = url
resource.save!
@refinery_resource = resource
resource
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_image.image.url)
end
def replace_image_url_in_pages
replace_url_in_pages(refinery_image.image.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(refinery_resource.file.url)
end
def replace_url_in_blog_posts(new_url)
::BlogPost.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.parts.each do |part|
if (! part.body.to_s.blank?) && part.body.include?(url)
part.body = part.body.gsub(url_pattern, new_url)
part.save!
end
end
end
end
def initialize(node)
@node = node
end end
def title
node.xpath("title").text
end
def description
node.xpath("description").text
end
def file_name
url.split('/').last
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def url
node.xpath("wp:attachment_url").text
end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image?
url.match /\.(png|jpg|jpeg|gif)$/
end
def to_refinery
if image?
to_image
else
to_resource
end
end
def replace_url
if image?
replace_image_url
else
replace_resource_url
end
end
private
def to_image
image = ::Image.new
image.created_at = post_date
image.image_url = url
image.save!
@refinery_image = image
image
end
def to_resource
resource = ::Resource.new
resource.created_at = post_date
resource.file_url = url
resource.save!
@refinery_resource = resource
resource
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_image.image.url)
end
def replace_image_url_in_pages
replace_url_in_pages(refinery_image.image.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(refinery_resource.file.url)
end
def replace_url_in_blog_posts(new_url)
::BlogPost.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.parts.each do |part|
if (! part.body.to_s.blank?) && part.body.include?(url)
part.body = part.body.gsub(url_pattern, new_url)
part.save!
end
end
end
end
end end
end end

View File

@ -1,37 +1,35 @@
module Refinery module WordPressImport
module WordPress class Author
class Author attr_reader :author_node
attr_reader :author_node
def initialize(author_node) def initialize(author_node)
@author_node = author_node @author_node = author_node
end end
def login def login
author_node.xpath("wp:author_login").text author_node.xpath("wp:author_login").text
end end
def email def email
author_node.xpath("wp:author_email").text author_node.xpath("wp:author_email").text
end end
def ==(other) def ==(other)
login == other.login login == other.login
end end
def inspect def inspect
"WordPress::Author: #{login} <#{email}>" "WordPress::Author: #{login} <#{email}>"
end end
def to_refinery def to_refinery
user = User.find_or_initialize_by_username_and_email(login, email) user = User.find_or_initialize_by_username_and_email(login, email)
unless user.persisted? unless user.persisted?
user.password = 'password' user.password = 'password'
user.password_confirmation = 'password' user.password_confirmation = 'password'
user.save user.save
end
user
end end
user
end end
end end
end end

View File

@ -1,19 +1,17 @@
module Refinery module WordPressImport
module WordPress class Category
class Category attr_accessor :name
attr_accessor :name
def initialize(text) def initialize(text)
@name = text @name = text
end end
def ==(other) def ==(other)
name == other.name name == other.name
end end
def to_refinery def to_refinery
BlogCategory.find_or_create_by_title(name) BlogCategory.find_or_create_by_title(name)
end
end end
end end
end end

View File

@ -1,48 +1,46 @@
module Refinery module WordPressImport
module WordPress class Comment
class Comment attr_reader :node
attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end end
def author def author
node.xpath('wp:comment_author').text node.xpath('wp:comment_author').text
end end
def email def email
node.xpath('wp:comment_author_email').text node.xpath('wp:comment_author_email').text
end end
def url def url
node.xpath('wp:comment_author_url').text node.xpath('wp:comment_author_url').text
end end
def date def date
DateTime.parse node.xpath("wp:comment_date").text DateTime.parse node.xpath("wp:comment_date").text
end end
def content def content
node.xpath('wp:comment_content').text node.xpath('wp:comment_content').text
end end
def approved? def approved?
node.xpath('wp:comment_approved').text.to_i == 1 node.xpath('wp:comment_approved').text.to_i == 1
end end
def ==(other) def ==(other)
(email == other.email) && (date == other.date) && (content == other.content) (email == other.email) && (date == other.date) && (content == other.content)
end end
def to_refinery def to_refinery
comment = BlogComment.new :name => author, :email => email comment = BlogComment.new :name => author, :email => email
comment.body = content comment.body = content
comment.created_at = date comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected' comment.state = approved? ? 'approved' : 'rejected'
comment comment
end
end end
end end
end end

View File

@ -1,57 +1,55 @@
module Refinery module WordPressImport
module WordPress class Dump
class Dump attr_reader :doc
attr_reader :doc
def initialize(file_name) def initialize(file_name)
file_name = File.expand_path(file_name) file_name = File.expand_path(file_name)
raise "Given file '#{file_name}' no file or not readable." \ raise "Given file '#{file_name}' no file or not readable." \
unless File.file?(file_name) && File.readable?(file_name) unless File.file?(file_name) && File.readable?(file_name)
file = File.open(file_name) file = File.open(file_name)
@doc = Nokogiri::XML(file) @doc = Nokogiri::XML(file)
end
def authors
doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end end
def authors pages = pages.select(&:published?) if only_published
doc.xpath("//wp:author").collect do |author| pages
Author.new(author) end
end
def posts(only_published=false)
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Post.new(post)
end end
posts = posts.select(&:published?) if only_published
posts
end
def pages(only_published=false) def tags
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page| doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Page.new(page) Tag.new(tag.text)
end
pages = pages.select(&:published?) if only_published
pages
end end
end
def posts(only_published=false) def categories
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post| doc.xpath("//wp:category/wp:cat_name").collect do |category|
Post.new(post) Category.new(category.text)
end
posts = posts.select(&:published?) if only_published
posts
end end
end
def tags def attachments
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Tag.new(tag.text) Attachment.new(attachment)
end
end
def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text)
end
end
def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment)
end
end end
end end
end end

View File

@ -1,107 +1,114 @@
module Refinery module WordPressImport
module WordPress class Page
class Page include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TagHelper include ::ActionView::Helpers::TextHelper
include ::ActionView::Helpers::TextHelper
attr_reader :node attr_reader :node
def initialize(node) def initialize(node)
@node = node @node = node
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end end
def inspect formatted
"WordPress::Page(#{post_id}): #{title}" end
end
def title def creator
node.xpath("title").text node.xpath("dc:creator").text
end end
def content def post_date
node.xpath("content:encoded").text DateTime.parse node.xpath("wp:post_date").text
end end
def content_formatted def post_id
formatted = format_syntax_highlighter(format_paragraphs(content)) node.xpath("wp:post_id").text.to_i
end
# remove all tags inside <pre> that simple_format created def parent_id
# TODO: replace format_paragraphs with a method, that ignores pre-tags dump_id = node.xpath("wp:post_parent").text.to_i
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match| dump_id == 0 ? nil : dump_id
"#{$1}#{strip_tags($2)}#{$3}" end
end
formatted def status
end node.xpath("wp:status").text
end
def creator def draft?
node.xpath("dc:creator").text status != 'publish'
end end
def post_date def published?
DateTime.parse node.xpath("wp:post_date").text ! draft?
end end
def post_id def ==(other)
node.xpath("wp:post_id").text.to_i post_id == other.post_id
end end
def parent_id #NEED:
dump_id = node.xpath("wp:post_parent").text.to_i # dc:creator -> "user_id"
dump_id == 0 ? nil : dump_id # wp:post_name -> "slug"
end # pubDate -> "published_at"
#OK:
# title -> "title"
# content:encoded -> "body"
# wp:post_date_gmt -> "created_at"
def status def to_refinery
node.xpath("wp:status").text page = ::Page.create!(:id => post_id, :title => title,
end :created_at => post_date, :draft => draft?)
def draft? page.parts.create(:title => 'Body', :body => content_formatted)
status != 'publish' page
end end
def published? private
! draft?
end
def ==(other) def format_paragraphs(text, html_options={})
post_id == other.post_id # WordPress doesn't export <p>-Tags, so let's run a simple_format over
end # the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
def to_refinery text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
page = ::Page.create!(:id => post_id, :title => title, text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
:created_at => post_date, :draft => draft?) text.insert 0, start_tag
page.parts.create(:title => 'Body', :body => content_formatted) text.html_safe.safe_concat("</p>")
page end
end
private def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
def format_paragraphs(text, html_options={}) # In WordPress you can (via a plugin) enclose code in [lang][/lang]
# WordPress doesn't export <p>-Tags, so let's run a simple_format over # blocks, which are converted to a <pre>-tag with a class corresponding
# the content. As we trust ourselves, no sanatize. This code is heavily # to the language.
# inspired by the simple_format rails helper #
text = ''.html_safe if text.nil? # Example:
start_tag = tag('p', html_options, true) # [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
end end
end end
end end

View File

@ -1,85 +1,83 @@
module Refinery module WordPressImport
module WordPress class Post < Page
class Post < Page def tags
def tags # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0 path = if node.xpath("category[@domain='post_tag']").count > 0
path = if node.xpath("category[@domain='post_tag']").count > 0 "category[@domain='post_tag']"
"category[@domain='post_tag']" else
else "category[@domain='tag']"
"category[@domain='tag']"
end
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
end end
def tag_list node.xpath(path).collect do |tag_node|
tags.collect(&:name).join(',') Tag.new(tag_node.text)
end end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:user_id => user.id, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end end
def tag_list
tags.collect(&:name).join(',')
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:user_id => user.id, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end end
end end

View File

@ -1,9 +1,7 @@
module Refinery module WordPressImport
module WordPress class Railtie < Rails::Railtie
class Railtie < Rails::Railtie rake_tasks do
rake_tasks do load "tasks/wordpress.rake"
load "tasks/wordpress.rake"
end
end end
end end
end end

View File

@ -1,20 +1,18 @@
module Refinery module WordPressImport
module WordPress class Tag
class Tag attr_accessor :name
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
def initialize(text)
@name = text
end end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
end end
end end

View File

@ -1,23 +0,0 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "refinerycms-wordpress-import"
s.summary = "Import WordPress XML dumps into refinerycms(-blog)."
s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.0"
s.date = "2011-06-13"
s.authors = ['Marc Remolt']
s.email = 'marc.remolt@googlemail.com'
s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'refinerycms', '~> 1.0.0'
s.add_dependency 'refinerycms-blog', '~> 1.6.2'
s.add_dependency 'nokogiri', '~> 1.5.0'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Attachment, :type => :model do describe WordPressImport::Attachment, :type => :model do
context "an image attchment" do context "an image attchment" do
let(:attachment) { test_dump.attachments.first } let(:attachment) { test_dump.attachments.first }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Author, :type => :model do describe WordPressImport::Author, :type => :model do
let(:author) { test_dump.authors.first } let(:author) { test_dump.authors.first }
specify { author.login.should == 'admin' } specify { author.login.should == 'admin' }

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Category, :type => :model do describe WordPressImport::Category, :type => :model do
let(:category) { Refinery::WordPress::Category.new('Rant') } let(:category) { WordPressImport::Category.new('Rant') }
describe "#name" do describe "#name" do
specify { category.name.should == 'Rant' } specify { category.name.should == 'Rant' }
end end
describe "#==" do describe "#==" do
specify { category.should == Refinery::WordPress::Category.new('Rant') } specify { category.should == WordPressImport::Category.new('Rant') }
specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') } specify { category.should_not == WordPressImport::Category.new('Tutorials') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -1,10 +1,10 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Dump, :type => :model do describe WordPressImport::Dump, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
it "should create a Dump object given a xml file" do it "should create a Dump object given a xml file" do
dump.should be_a Refinery::WordPress::Dump dump.should be_a WordPressImport::Dump
end end
it "should include a Nokogiri::XML object" do it "should include a Nokogiri::XML object" do
@ -13,12 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#tags" do describe "#tags" do
let(:tags) do let(:tags) do
[ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'), [ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'),
Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')] WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')]
end end
specify { dump.tags.count == 4 } specify { dump.tags.count == 4 }
specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) } specify { dump.tags.first.should be_a(WordPressImport::Tag) }
it "should return all included tags" do it "should return all included tags" do
tags.each do |tag| tags.each do |tag|
@ -29,12 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#categories" do describe "#categories" do
let(:categories) do let(:categories) do
[ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'), [ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'),
Refinery::WordPress::Category.new('Uncategorized') ] WordPressImport::Category.new('Uncategorized') ]
end end
specify { dump.categories.count == 4 } specify { dump.categories.count == 4 }
specify { dump.categories.first.should be_a(Refinery::WordPress::Category) } specify { dump.categories.first.should be_a(WordPressImport::Category) }
it "should return all included categories" do it "should return all included categories" do
categories.each do |cat| categories.each do |cat|
@ -48,7 +48,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.pages.should have(3).pages dump.pages.should have(3).pages
end end
specify { dump.pages.first.should be_a(Refinery::WordPress::Page) } specify { dump.pages.first.should be_a(WordPressImport::Page) }
it "should return only published pages with only_published=true" do it "should return only published pages with only_published=true" do
dump.pages(true).should have(2).pages dump.pages(true).should have(2).pages
@ -60,7 +60,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.authors.should have(1).author dump.authors.should have(1).author
end end
specify { dump.authors.first.should be_a(Refinery::WordPress::Author) } specify { dump.authors.first.should be_a(WordPressImport::Author) }
end end
describe "#posts" do describe "#posts" do
@ -68,7 +68,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.posts.should have(3).posts dump.posts.should have(3).posts
end end
specify { dump.posts.first.should be_a(Refinery::WordPress::Post) } specify { dump.posts.first.should be_a(WordPressImport::Post) }
it "should return only published posts with only_published=true" do it "should return only published posts with only_published=true" do
dump.posts(true).should have(2).posts dump.posts(true).should have(2).posts
@ -80,6 +80,6 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.attachments.should have(2).attachments dump.attachments.should have(2).attachments
end end
specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) } specify { dump.attachments.first.should be_a(WordPressImport::Attachment) }
end end
end end

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Page, :type => :model do describe WordPressImport::Page, :type => :model do
let(:dump) { test_dump } let(:dump) { test_dump }
let(:page) { test_dump.pages.last } let(:page) { test_dump.pages.last }

View File

@ -1,6 +1,6 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Post, :type => :model do describe WordPressImport::Post, :type => :model do
let(:post) { test_dump.posts.last } let(:post) { test_dump.posts.last }
specify { post.title.should == 'Third blog post' } specify { post.title.should == 'Third blog post' }
@ -17,15 +17,15 @@ describe Refinery::WordPress::Post, :type => :model do
describe "#categories" do describe "#categories" do
specify { post.categories.should have(1).category } specify { post.categories.should have(1).category }
specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') } specify { post.categories.first.should == WordPressImport::Category.new('Rant') }
end end
describe "#tags" do describe "#tags" do
specify { post.tags.should have(3).tags } specify { post.tags.should have(3).tags }
specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) } specify { post.tags.should include(WordPressImport::Tag.new('css')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) } specify { post.tags.should include(WordPressImport::Tag.new('html')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) } specify { post.tags.should include(WordPressImport::Tag.new('php')) }
end end
specify { post.tag_list.should == 'css,html,php' } specify { post.tag_list.should == 'css,html,php' }

View File

@ -1,15 +1,15 @@
require 'spec_helper' require 'spec_helper'
describe Refinery::WordPress::Tag, :type => :model do describe WordPressImport::Tag, :type => :model do
let(:tag) { Refinery::WordPress::Tag.new('ruby') } let(:tag) { WordPressImport::Tag.new('ruby') }
describe "#name" do describe "#name" do
specify { tag.name.should == 'ruby' } specify { tag.name.should == 'ruby' }
end end
describe "#==" do describe "#==" do
specify { tag.should == Refinery::WordPress::Tag.new('ruby') } specify { tag.should == WordPressImport::Tag.new('ruby') }
specify { tag.should_not == Refinery::WordPress::Tag.new('php') } specify { tag.should_not == WordPressImport::Tag.new('php') }
end end
describe "#to_refinery" do describe "#to_refinery" do

View File

@ -1,7 +0,0 @@
require 'spec_helper'
describe Refinery::WordPress do
it "should be valid" do
Refinery::WordPress.should be_a(Module)
end
end

View File

@ -1,11 +1,11 @@
module Refinery::WordPress::SpecHelpers module WordPressImport::SpecHelpers
def test_dump def test_dump
file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml')) file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
Refinery::WordPress::Dump.new(file_name) WordPressImport::Dump.new(file_name)
end end
end end
RSpec.configure do |config| RSpec.configure do |config|
config.include Refinery::WordPress::SpecHelpers config.include WordPressImport::SpecHelpers
end end

View File

@ -0,0 +1,7 @@
require 'spec_helper'
describe WordPressImport do
it "should be valid" do
WordPressImport.should be_a(Module)
end
end

21
wordpress-import.gemspec Normal file
View File

@ -0,0 +1,21 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "wordpress-import"
s.summary = "Import WordPress XML dumps into your Ruby on Rails app."
s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.1"
s.date = "2014-03-04"
s.authors = ['Will Bradley']
s.email = 'bradley.will@gmail.com'
s.homepage = 'https://github.com/zyphlar/wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'nokogiri', '~> 1.6.0'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end