Beginning to remove refinery stuff

This commit is contained in:
Will Bradley 2014-03-04 15:47:53 -07:00
parent 1e1f3574eb
commit 60ce62ad1b
29 changed files with 530 additions and 744 deletions

View File

@ -1,6 +1,8 @@
source "http://rubygems.org"
gem "rails", "3.0.9"
ruby '2.0.0'
gem "rails", "4.0.0"
#gem "capybara", ">= 1.0.0.beta1"
gem "sqlite3"
gem "rmagick"
@ -19,6 +21,4 @@ end
# gem 'ruby-debug'
# gem 'ruby-debug19'
gem 'refinerycms'
gem 'refinerycms-blog'
gem 'refinerycms-wordpress-import', :path => './'
gem 'wordpress-import', :path => './'

View File

@ -1,200 +0,0 @@
PATH
remote: ./
specs:
refinerycms-wordpress-import (0.4.0)
bundler (~> 1.0)
nokogiri (~> 1.5.0)
refinerycms (~> 1.0.0)
refinerycms-blog (~> 1.6.2)
GEM
remote: http://rubygems.org/
specs:
abstract (1.0.0)
actionmailer (3.0.9)
actionpack (= 3.0.9)
mail (~> 2.2.19)
actionpack (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
builder (~> 2.1.2)
erubis (~> 2.6.6)
i18n (~> 0.5.0)
rack (~> 1.2.1)
rack-mount (~> 0.6.14)
rack-test (~> 0.5.7)
tzinfo (~> 0.3.23)
activemodel (3.0.9)
activesupport (= 3.0.9)
builder (~> 2.1.2)
i18n (~> 0.5.0)
activerecord (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
arel (~> 2.0.10)
tzinfo (~> 0.3.23)
activeresource (3.0.9)
activemodel (= 3.0.9)
activesupport (= 3.0.9)
activesupport (3.0.9)
acts-as-taggable-on (2.0.6)
acts_as_indexed (0.7.3)
arel (2.0.10)
awesome_nested_set (2.0.1)
activerecord (>= 3.0.0)
babosa (0.3.5)
bcrypt-ruby (2.1.4)
builder (2.1.2)
database_cleaner (0.6.7)
devise (1.3.4)
bcrypt-ruby (~> 2.1.2)
orm_adapter (~> 0.0.3)
warden (~> 1.0.3)
diff-lcs (1.1.2)
dragonfly (0.9.5)
rack
erubis (2.6.6)
abstract (>= 1.0.0)
fakeweb (1.3.0)
ffi (1.0.9)
filters_spam (0.3)
friendly_id_globalize3 (3.2.1.4)
babosa (~> 0.3.0)
globalize3 (0.1.0)
activemodel (>= 3.0.0)
activerecord (>= 3.0.0)
guard (0.5.1)
thor (~> 0.14.6)
guard-bundler (0.1.3)
bundler (>= 1.0.0)
guard (>= 0.2.2)
guard-rspec (0.4.1)
guard (>= 0.4.0)
i18n (0.5.0)
libnotify (0.5.7)
mail (2.2.19)
activesupport (>= 2.3.6)
i18n (>= 0.4.0)
mime-types (~> 1.16)
treetop (~> 1.4.8)
mime-types (1.16)
nokogiri (1.5.0)
orm_adapter (0.0.5)
polyglot (0.3.2)
rack (1.2.3)
rack-cache (1.0.2)
rack (>= 0.4)
rack-mount (0.6.14)
rack (>= 1.0.0)
rack-test (0.5.7)
rack (>= 1.0)
rails (3.0.9)
actionmailer (= 3.0.9)
actionpack (= 3.0.9)
activerecord (= 3.0.9)
activeresource (= 3.0.9)
activesupport (= 3.0.9)
bundler (~> 1.0)
railties (= 3.0.9)
railties (3.0.9)
actionpack (= 3.0.9)
activesupport (= 3.0.9)
rake (>= 0.8.7)
rdoc (~> 3.4)
thor (~> 0.14.4)
rake (0.9.2)
rdoc (3.9.2)
refinerycms (1.0.4)
bundler (~> 1.0)
refinerycms-authentication (= 1.0.4)
refinerycms-base (= 1.0.4)
refinerycms-core (= 1.0.4)
refinerycms-dashboard (= 1.0.4)
refinerycms-images (= 1.0.4)
refinerycms-pages (= 1.0.4)
refinerycms-resources (= 1.0.4)
refinerycms-settings (= 1.0.4)
refinerycms-authentication (1.0.4)
devise (~> 1.3.0)
friendly_id_globalize3 (~> 3.2.1)
refinerycms-core (= 1.0.4)
refinerycms-base (1.0.4)
refinerycms-blog (1.6.2)
acts-as-taggable-on
filters_spam (~> 0.2)
refinerycms-core (~> 1.0.3)
seo_meta (~> 1.1.0)
refinerycms-core (1.0.4)
acts_as_indexed (~> 0.7)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
rails (~> 3.0.9)
refinerycms-base (= 1.0.4)
refinerycms-generators (~> 1.0)
refinerycms-settings (= 1.0.4)
truncate_html (~> 0.5)
will_paginate (= 3.0.pre2)
refinerycms-dashboard (1.0.4)
refinerycms-core (= 1.0.4)
refinerycms-generators (1.0.3)
refinerycms-images (1.0.4)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.4)
refinerycms-pages (1.0.4)
awesome_nested_set (~> 2.0)
friendly_id_globalize3 (~> 3.2.1)
globalize3 (~> 0.1)
refinerycms-core (= 1.0.4)
seo_meta (~> 1.1)
refinerycms-resources (1.0.4)
dragonfly (~> 0.9.0)
rack-cache (>= 0.5.3)
refinerycms-core (= 1.0.4)
refinerycms-settings (1.0.4)
refinerycms-base (= 1.0.4)
rmagick (2.13.1)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.4)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.6.0)
rspec-rails (2.6.1)
actionpack (~> 3.0)
activesupport (~> 3.0)
railties (~> 3.0)
rspec (~> 2.6.0)
seo_meta (1.1.1)
refinerycms-generators (~> 1.0.1)
sqlite3 (1.3.4)
thor (0.14.6)
treetop (1.4.10)
polyglot
polyglot (>= 0.3.1)
truncate_html (0.5.1)
tzinfo (0.3.29)
warden (1.0.5)
rack (>= 1.0)
will_paginate (3.0.pre2)
PLATFORMS
ruby
DEPENDENCIES
database_cleaner
fakeweb
ffi
guard-bundler
guard-rspec
libnotify
rails (= 3.0.9)
refinerycms
refinerycms-blog
refinerycms-wordpress-import!
rmagick
rspec-rails (>= 2.6.0)
sqlite3

View File

@ -1,4 +1,5 @@
Copyright 2011 YOURNAME
Copyright 2014 Will Bradley
portions Copyright 2011 Marc Remolt
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the

View File

@ -1,8 +1,10 @@
= Refinerycms-wordpress-import
= Wordpress-import
This litte project is an importer for WordPress XML dumps into refinerycms(-blog).
Fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
This little project is an importer for WordPress XML dumps into Rails
You can find the source code on github: https://github.com/zyphlar/wordpress-import
Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags.
If your site (blog) structure uses new urls, the links WILL break! For example, if you used
@ -12,20 +14,18 @@ So your inner site links will point to the old WP url.
== Prerequisites
As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
the site is running, all migrations are run and you created the first refinery user.
TODO
== Installation
Just add the gem to your projects Gemfile:
gem 'refinerycms-wordpress-import'
gem 'wordpress-import'
Or if you want to stay on the bleeding edge:
gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
gem 'wordpress-import', :git => 'git://github.com/zyphlarz/wordpress-import.git'
and run

View File

@ -1,7 +0,0 @@
module Refinery
module WordPress
end
end
require 'wordpress'

View File

@ -13,24 +13,24 @@ namespace :wordpress do
end
desc "import blog data from a Refinery::WordPress XML dump"
desc "import blog data from a WordPressImport XML dump"
task :import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
dump = WordPressImport::Dump.new(params[:file_name])
dump.authors.each(&:to_refinery)
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.posts(only_published).each(&:to_refinery)
Refinery::WordPress::Post.create_blog_page_if_necessary
WordPressImport::Post.create_blog_page_if_necessary
ENV["MODEL"] = 'BlogPost'
Rake::Task["friendly_id:redo_slugs"].invoke
ENV.delete("MODEL")
end
desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
desc "reset blog tables and then import blog data from a WordPressImport XML dump"
task :reset_and_import_blog, :file_name do |task, params|
Rake::Task["environment"].invoke
Rake::Task["wordpress:reset_blog"].invoke
@ -51,7 +51,7 @@ namespace :wordpress do
desc "import cms data from a WordPress XML dump"
task :import_pages, :file_name do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
dump = WordPressImport::Dump.new(params[:file_name])
only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
dump.pages(only_published).each(&:to_refinery)
@ -65,7 +65,7 @@ namespace :wordpress do
page.save!
end
Refinery::WordPress::Post.create_blog_page_if_necessary
WordPressImport::Post.create_blog_page_if_necessary
ENV["MODEL"] = 'Page'
Rake::Task["friendly_id:redo_slugs"].invoke
@ -93,7 +93,7 @@ namespace :wordpress do
desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
task :import_and_replace_media, :file_name do |task, params|
Rake::Task["environment"].invoke
dump = Refinery::WordPress::Dump.new(params[:file_name])
dump = WordPressImport::Dump.new(params[:file_name])
attachments = dump.attachments.each(&:to_refinery)

5
lib/wordpress-import.rb Normal file
View File

@ -0,0 +1,5 @@
module WordPressImport
end
require 'wordpress'

View File

@ -1,16 +1,14 @@
require 'nokogiri'
require "wordpress/railtie"
module Refinery
module WordPress
autoload :Author, 'wordpress/author'
autoload :Tag, 'wordpress/tag'
autoload :Category, 'wordpress/category'
autoload :Page, 'wordpress/page'
autoload :Post, 'wordpress/post'
autoload :Comment, 'wordpress/comment'
autoload :Dump, 'wordpress/dump'
autoload :Attachment, 'wordpress/attachment'
end
module WordPressImport
autoload :Author, 'wordpress/author'
autoload :Tag, 'wordpress/tag'
autoload :Category, 'wordpress/category'
autoload :Page, 'wordpress/page'
autoload :Post, 'wordpress/post'
autoload :Comment, 'wordpress/comment'
autoload :Dump, 'wordpress/dump'
autoload :Attachment, 'wordpress/attachment'
end

View File

@ -1,130 +1,128 @@
module Refinery
module WordPress
class Attachment
attr_reader :node
attr_reader :refinery_image
attr_reader :refinery_resource
def initialize(node)
@node = node
end
def title
node.xpath("title").text
end
def description
node.xpath("description").text
end
def file_name
url.split('/').last
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def url
node.xpath("wp:attachment_url").text
end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image?
url.match /\.(png|jpg|jpeg|gif)$/
end
def to_refinery
if image?
to_image
else
to_resource
end
end
def replace_url
if image?
replace_image_url
else
replace_resource_url
end
end
private
def to_image
image = ::Image.new
image.created_at = post_date
image.image_url = url
image.save!
@refinery_image = image
image
end
def to_resource
resource = ::Resource.new
resource.created_at = post_date
resource.file_url = url
resource.save!
@refinery_resource = resource
resource
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_image.image.url)
end
def replace_image_url_in_pages
replace_url_in_pages(refinery_image.image.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(refinery_resource.file.url)
end
def replace_url_in_blog_posts(new_url)
::BlogPost.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.parts.each do |part|
if (! part.body.to_s.blank?) && part.body.include?(url)
part.body = part.body.gsub(url_pattern, new_url)
part.save!
end
end
end
end
module WordPressImport
class Attachment
attr_reader :node
attr_reader :refinery_image
attr_reader :refinery_resource
def initialize(node)
@node = node
end
def title
node.xpath("title").text
end
def description
node.xpath("description").text
end
def file_name
url.split('/').last
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def url
node.xpath("wp:attachment_url").text
end
def url_pattern
url_parts = url.split('.')
extension = url_parts.pop
url_without_extension = url_parts.join('.')
/#{url_without_extension}(-\d+x\d+)?\.#{extension}/
end
def image?
url.match /\.(png|jpg|jpeg|gif)$/
end
def to_refinery
if image?
to_image
else
to_resource
end
end
def replace_url
if image?
replace_image_url
else
replace_resource_url
end
end
private
def to_image
image = ::Image.new
image.created_at = post_date
image.image_url = url
image.save!
@refinery_image = image
image
end
def to_resource
resource = ::Resource.new
resource.created_at = post_date
resource.file_url = url
resource.save!
@refinery_resource = resource
resource
end
def replace_image_url
replace_image_url_in_blog_posts
replace_image_url_in_pages
end
def replace_resource_url
replace_resource_url_in_blog_posts
replace_resource_url_in_pages
end
def replace_image_url_in_blog_posts
replace_url_in_blog_posts(refinery_image.image.url)
end
def replace_image_url_in_pages
replace_url_in_pages(refinery_image.image.url)
end
def replace_resource_url_in_blog_posts
replace_url_in_blog_posts(refinery_resource.file.url)
end
def replace_resource_url_in_pages
replace_url_in_pages(refinery_resource.file.url)
end
def replace_url_in_blog_posts(new_url)
::BlogPost.all.each do |post|
if (! post.body.empty?) && post.body.include?(url)
post.body = post.body.gsub(url_pattern, new_url)
post.save!
end
end
end
def replace_url_in_pages(new_url)
::Page.all.each do |page|
page.parts.each do |part|
if (! part.body.to_s.blank?) && part.body.include?(url)
part.body = part.body.gsub(url_pattern, new_url)
part.save!
end
end
end
end
end
end

View File

@ -1,37 +1,35 @@
module Refinery
module WordPress
class Author
attr_reader :author_node
module WordPressImport
class Author
attr_reader :author_node
def initialize(author_node)
@author_node = author_node
end
def initialize(author_node)
@author_node = author_node
end
def login
author_node.xpath("wp:author_login").text
end
def login
author_node.xpath("wp:author_login").text
end
def email
author_node.xpath("wp:author_email").text
end
def email
author_node.xpath("wp:author_email").text
end
def ==(other)
login == other.login
end
def ==(other)
login == other.login
end
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def inspect
"WordPress::Author: #{login} <#{email}>"
end
def to_refinery
user = User.find_or_initialize_by_username_and_email(login, email)
unless user.persisted?
user.password = 'password'
user.password_confirmation = 'password'
user.save
end
user
def to_refinery
user = User.find_or_initialize_by_username_and_email(login, email)
unless user.persisted?
user.password = 'password'
user.password_confirmation = 'password'
user.save
end
user
end
end
end
end

View File

@ -1,19 +1,17 @@
module Refinery
module WordPress
class Category
attr_accessor :name
module WordPressImport
class Category
attr_accessor :name
def initialize(text)
@name = text
end
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def ==(other)
name == other.name
end
def to_refinery
BlogCategory.find_or_create_by_title(name)
end
def to_refinery
BlogCategory.find_or_create_by_title(name)
end
end
end

View File

@ -1,48 +1,46 @@
module Refinery
module WordPress
class Comment
attr_reader :node
module WordPressImport
class Comment
attr_reader :node
def initialize(node)
@node = node
end
def initialize(node)
@node = node
end
def author
node.xpath('wp:comment_author').text
end
def author
node.xpath('wp:comment_author').text
end
def email
node.xpath('wp:comment_author_email').text
end
def email
node.xpath('wp:comment_author_email').text
end
def url
node.xpath('wp:comment_author_url').text
end
def url
node.xpath('wp:comment_author_url').text
end
def date
DateTime.parse node.xpath("wp:comment_date").text
end
def date
DateTime.parse node.xpath("wp:comment_date").text
end
def content
node.xpath('wp:comment_content').text
end
def content
node.xpath('wp:comment_content').text
end
def approved?
node.xpath('wp:comment_approved').text.to_i == 1
end
def approved?
node.xpath('wp:comment_approved').text.to_i == 1
end
def ==(other)
(email == other.email) && (date == other.date) && (content == other.content)
end
def ==(other)
(email == other.email) && (date == other.date) && (content == other.content)
end
def to_refinery
comment = BlogComment.new :name => author, :email => email
def to_refinery
comment = BlogComment.new :name => author, :email => email
comment.body = content
comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected'
comment
end
comment.body = content
comment.created_at = date
comment.state = approved? ? 'approved' : 'rejected'
comment
end
end
end

View File

@ -1,57 +1,55 @@
module Refinery
module WordPress
class Dump
attr_reader :doc
module WordPressImport
class Dump
attr_reader :doc
def initialize(file_name)
file_name = File.expand_path(file_name)
def initialize(file_name)
file_name = File.expand_path(file_name)
raise "Given file '#{file_name}' no file or not readable." \
unless File.file?(file_name) && File.readable?(file_name)
file = File.open(file_name)
@doc = Nokogiri::XML(file)
raise "Given file '#{file_name}' no file or not readable." \
unless File.file?(file_name) && File.readable?(file_name)
file = File.open(file_name)
@doc = Nokogiri::XML(file)
end
def authors
doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end
def authors
doc.xpath("//wp:author").collect do |author|
Author.new(author)
end
pages = pages.select(&:published?) if only_published
pages
end
def posts(only_published=false)
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Post.new(post)
end
posts = posts.select(&:published?) if only_published
posts
end
def pages(only_published=false)
pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
Page.new(page)
end
pages = pages.select(&:published?) if only_published
pages
def tags
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Tag.new(tag.text)
end
end
def posts(only_published=false)
posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
Post.new(post)
end
posts = posts.select(&:published?) if only_published
posts
def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text)
end
end
def tags
doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
Tag.new(tag.text)
end
end
def categories
doc.xpath("//wp:category/wp:cat_name").collect do |category|
Category.new(category.text)
end
end
def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment)
end
def attachments
doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
Attachment.new(attachment)
end
end
end

View File

@ -1,107 +1,114 @@
module Refinery
module WordPress
class Page
include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TextHelper
module WordPressImport
class Page
include ::ActionView::Helpers::TagHelper
include ::ActionView::Helpers::TextHelper
attr_reader :node
attr_reader :node
def initialize(node)
@node = node
def initialize(node)
@node = node
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
def inspect
"WordPress::Page(#{post_id}): #{title}"
end
def title
node.xpath("title").text
end
def content
node.xpath("content:encoded").text
end
def content_formatted
formatted = format_syntax_highlighter(format_paragraphs(content))
# remove all tags inside <pre> that simple_format created
# TODO: replace format_paragraphs with a method, that ignores pre-tags
formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match|
"#{$1}#{strip_tags($2)}#{$3}"
end
formatted
end
def creator
node.xpath("dc:creator").text
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
def to_refinery
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :draft => draft?)
page.parts.create(:title => 'Body', :body => content_formatted)
page
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
formatted
end
text.html_safe.safe_concat("</p>")
end
def creator
node.xpath("dc:creator").text
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
def post_date
DateTime.parse node.xpath("wp:post_date").text
end
def post_id
node.xpath("wp:post_id").text.to_i
end
def parent_id
dump_id = node.xpath("wp:post_parent").text.to_i
dump_id == 0 ? nil : dump_id
end
def status
node.xpath("wp:status").text
end
def draft?
status != 'publish'
end
def published?
! draft?
end
def ==(other)
post_id == other.post_id
end
#NEED:
# dc:creator -> "user_id"
# wp:post_name -> "slug"
# pubDate -> "published_at"
#OK:
# title -> "title"
# content:encoded -> "body"
# wp:post_date_gmt -> "created_at"
def to_refinery
page = ::Page.create!(:id => post_id, :title => title,
:created_at => post_date, :draft => draft?)
page.parts.create(:title => 'Body', :body => content_formatted)
page
end
private
def format_paragraphs(text, html_options={})
# WordPress doesn't export <p>-Tags, so let's run a simple_format over
# the content. As we trust ourselves, no sanatize. This code is heavily
# inspired by the simple_format rails helper
text = ''.html_safe if text.nil?
start_tag = tag('p', html_options, true)
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.insert 0, start_tag
text.html_safe.safe_concat("</p>")
end
def format_syntax_highlighter(text)
# Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
# In WordPress you can (via a plugin) enclose code in [lang][/lang]
# blocks, which are converted to a <pre>-tag with a class corresponding
# to the language.
#
# Example:
# [ruby]p "Hello World"[/ruby]
# -> <pre class="brush: ruby">p "Hello world"</pre>
text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
end
end
end

View File

@ -1,85 +1,83 @@
module Refinery
module WordPress
class Post < Page
def tags
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
path = if node.xpath("category[@domain='post_tag']").count > 0
"category[@domain='post_tag']"
else
"category[@domain='tag']"
end
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
module WordPressImport
class Post < Page
def tags
# xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
path = if node.xpath("category[@domain='post_tag']").count > 0
"category[@domain='post_tag']"
else
"category[@domain='tag']"
end
def tag_list
tags.collect(&:name).join(',')
node.xpath(path).collect do |tag_node|
Tag.new(tag_node.text)
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:user_id => user.id, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end
def tag_list
tags.collect(&:name).join(',')
end
def categories
node.xpath("category[@domain='category']").collect do |cat|
Category.new(cat.text)
end
end
def comments
node.xpath("wp:comment").collect do |comment_node|
Comment.new(comment_node)
end
end
def to_refinery
user = ::User.find_by_username(creator) || ::User.first
raise "Referenced User doesn't exist! Make sure the authors are imported first." \
unless user
begin
post = ::BlogPost.new :title => title, :body => content_formatted,
:draft => draft?, :published_at => post_date, :created_at => post_date,
:user_id => user.id, :tag_list => tag_list
post.save!
::BlogPost.transaction do
categories.each do |category|
post.categories << category.to_refinery
end
comments.each do |comment|
comment = comment.to_refinery
comment.post = post
comment.save
end
end
rescue ActiveRecord::RecordInvalid
# if the title has already been taken (WP allows duplicates here,
# refinery doesn't) append the post_id to it, making it unique
post.title = "#{title}-#{post_id}"
post.save
end
post
end
def self.create_blog_page_if_necessary
# refinerycms wants a page at /blog, so let's make sure there is one
# taken from the original db seeds from refinery-blog
unless ::Page.where("link_url = ?", '/blog').exists?
page = ::Page.create(
:title => "Blog",
:link_url => "/blog",
:deletable => false,
:position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
:menu_match => "^/blogs?(\/|\/.+?|)$"
)
::Page.default_parts.each do |default_page_part|
page.parts.create(:title => default_page_part, :body => nil)
end
end
end
end
end

View File

@ -1,9 +1,7 @@
module Refinery
module WordPress
class Railtie < Rails::Railtie
rake_tasks do
load "tasks/wordpress.rake"
end
module WordPressImport
class Railtie < Rails::Railtie
rake_tasks do
load "tasks/wordpress.rake"
end
end
end

View File

@ -1,20 +1,18 @@
module Refinery
module WordPress
class Tag
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
module WordPressImport
class Tag
attr_accessor :name
def initialize(text)
@name = text
end
def ==(other)
name == other.name
end
def to_refinery
::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
end
end
end

View File

@ -1,23 +0,0 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "refinerycms-wordpress-import"
s.summary = "Import WordPress XML dumps into refinerycms(-blog)."
s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.0"
s.date = "2011-06-13"
s.authors = ['Marc Remolt']
s.email = 'marc.remolt@googlemail.com'
s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'refinerycms', '~> 1.0.0'
s.add_dependency 'refinerycms-blog', '~> 1.6.2'
s.add_dependency 'nokogiri', '~> 1.5.0'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Refinery::WordPress::Attachment, :type => :model do
describe WordPressImport::Attachment, :type => :model do
context "an image attchment" do
let(:attachment) { test_dump.attachments.first }

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Refinery::WordPress::Author, :type => :model do
describe WordPressImport::Author, :type => :model do
let(:author) { test_dump.authors.first }
specify { author.login.should == 'admin' }

View File

@ -1,15 +1,15 @@
require 'spec_helper'
describe Refinery::WordPress::Category, :type => :model do
let(:category) { Refinery::WordPress::Category.new('Rant') }
describe WordPressImport::Category, :type => :model do
let(:category) { WordPressImport::Category.new('Rant') }
describe "#name" do
specify { category.name.should == 'Rant' }
end
describe "#==" do
specify { category.should == Refinery::WordPress::Category.new('Rant') }
specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') }
specify { category.should == WordPressImport::Category.new('Rant') }
specify { category.should_not == WordPressImport::Category.new('Tutorials') }
end
describe "#to_refinery" do

View File

@ -1,10 +1,10 @@
require 'spec_helper'
describe Refinery::WordPress::Dump, :type => :model do
describe WordPressImport::Dump, :type => :model do
let(:dump) { test_dump }
it "should create a Dump object given a xml file" do
dump.should be_a Refinery::WordPress::Dump
dump.should be_a WordPressImport::Dump
end
it "should include a Nokogiri::XML object" do
@ -13,12 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#tags" do
let(:tags) do
[ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'),
Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')]
[ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'),
WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')]
end
specify { dump.tags.count == 4 }
specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) }
specify { dump.tags.first.should be_a(WordPressImport::Tag) }
it "should return all included tags" do
tags.each do |tag|
@ -29,12 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do
describe "#categories" do
let(:categories) do
[ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'),
Refinery::WordPress::Category.new('Uncategorized') ]
[ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'),
WordPressImport::Category.new('Uncategorized') ]
end
specify { dump.categories.count == 4 }
specify { dump.categories.first.should be_a(Refinery::WordPress::Category) }
specify { dump.categories.first.should be_a(WordPressImport::Category) }
it "should return all included categories" do
categories.each do |cat|
@ -48,7 +48,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.pages.should have(3).pages
end
specify { dump.pages.first.should be_a(Refinery::WordPress::Page) }
specify { dump.pages.first.should be_a(WordPressImport::Page) }
it "should return only published pages with only_published=true" do
dump.pages(true).should have(2).pages
@ -60,7 +60,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.authors.should have(1).author
end
specify { dump.authors.first.should be_a(Refinery::WordPress::Author) }
specify { dump.authors.first.should be_a(WordPressImport::Author) }
end
describe "#posts" do
@ -68,7 +68,7 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.posts.should have(3).posts
end
specify { dump.posts.first.should be_a(Refinery::WordPress::Post) }
specify { dump.posts.first.should be_a(WordPressImport::Post) }
it "should return only published posts with only_published=true" do
dump.posts(true).should have(2).posts
@ -80,6 +80,6 @@ describe Refinery::WordPress::Dump, :type => :model do
dump.attachments.should have(2).attachments
end
specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) }
specify { dump.attachments.first.should be_a(WordPressImport::Attachment) }
end
end

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Refinery::WordPress::Page, :type => :model do
describe WordPressImport::Page, :type => :model do
let(:dump) { test_dump }
let(:page) { test_dump.pages.last }

View File

@ -1,6 +1,6 @@
require 'spec_helper'
describe Refinery::WordPress::Post, :type => :model do
describe WordPressImport::Post, :type => :model do
let(:post) { test_dump.posts.last }
specify { post.title.should == 'Third blog post' }
@ -17,15 +17,15 @@ describe Refinery::WordPress::Post, :type => :model do
describe "#categories" do
specify { post.categories.should have(1).category }
specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') }
specify { post.categories.first.should == WordPressImport::Category.new('Rant') }
end
describe "#tags" do
specify { post.tags.should have(3).tags }
specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) }
specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) }
specify { post.tags.should include(WordPressImport::Tag.new('css')) }
specify { post.tags.should include(WordPressImport::Tag.new('html')) }
specify { post.tags.should include(WordPressImport::Tag.new('php')) }
end
specify { post.tag_list.should == 'css,html,php' }

View File

@ -1,15 +1,15 @@
require 'spec_helper'
describe Refinery::WordPress::Tag, :type => :model do
let(:tag) { Refinery::WordPress::Tag.new('ruby') }
describe WordPressImport::Tag, :type => :model do
let(:tag) { WordPressImport::Tag.new('ruby') }
describe "#name" do
specify { tag.name.should == 'ruby' }
end
describe "#==" do
specify { tag.should == Refinery::WordPress::Tag.new('ruby') }
specify { tag.should_not == Refinery::WordPress::Tag.new('php') }
specify { tag.should == WordPressImport::Tag.new('ruby') }
specify { tag.should_not == WordPressImport::Tag.new('php') }
end
describe "#to_refinery" do

View File

@ -1,7 +0,0 @@
require 'spec_helper'
describe Refinery::WordPress do
it "should be valid" do
Refinery::WordPress.should be_a(Module)
end
end

View File

@ -1,11 +1,11 @@
module Refinery::WordPress::SpecHelpers
module WordPressImport::SpecHelpers
def test_dump
file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
Refinery::WordPress::Dump.new(file_name)
WordPressImport::Dump.new(file_name)
end
end
RSpec.configure do |config|
config.include Refinery::WordPress::SpecHelpers
config.include WordPressImport::SpecHelpers
end

View File

@ -0,0 +1,7 @@
require 'spec_helper'
describe WordPressImport do
it "should be valid" do
WordPressImport.should be_a(Module)
end
end

21
wordpress-import.gemspec Normal file
View File

@ -0,0 +1,21 @@
# Provide a simple gemspec so you can easily use your enginex
# project in your rails apps through git.
Gem::Specification.new do |s|
s.name = "wordpress-import"
s.summary = "Import WordPress XML dumps into your Ruby on Rails app."
s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
s.version = "0.4.1"
s.date = "2014-03-04"
s.authors = ['Will Bradley']
s.email = 'bradley.will@gmail.com'
s.homepage = 'https://github.com/zyphlar/wordpress-import'
s.add_dependency 'bundler', '~> 1.0'
s.add_dependency 'nokogiri', '~> 1.6.0'
s.add_development_dependency 'rspec-rails'
s.add_development_dependency 'database_cleaner'
s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
end