diff --git a/Gemfile b/Gemfile index 4b41b28..89d7671 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,8 @@ source "http://rubygems.org" -gem "rails", "3.0.9" +ruby '2.0.0' + +gem "rails", "4.0.0" #gem "capybara", ">= 1.0.0.beta1" gem "sqlite3" gem "rmagick" @@ -19,6 +21,4 @@ end # gem 'ruby-debug' # gem 'ruby-debug19' -gem 'refinerycms' -gem 'refinerycms-blog' -gem 'refinerycms-wordpress-import', :path => './' +gem 'wordpress-import', :path => './' diff --git a/Gemfile.lock b/Gemfile.lock deleted file mode 100644 index c039bac..0000000 --- a/Gemfile.lock +++ /dev/null @@ -1,200 +0,0 @@ -PATH - remote: ./ - specs: - refinerycms-wordpress-import (0.4.0) - bundler (~> 1.0) - nokogiri (~> 1.5.0) - refinerycms (~> 1.0.0) - refinerycms-blog (~> 1.6.2) - -GEM - remote: http://rubygems.org/ - specs: - abstract (1.0.0) - actionmailer (3.0.9) - actionpack (= 3.0.9) - mail (~> 2.2.19) - actionpack (3.0.9) - activemodel (= 3.0.9) - activesupport (= 3.0.9) - builder (~> 2.1.2) - erubis (~> 2.6.6) - i18n (~> 0.5.0) - rack (~> 1.2.1) - rack-mount (~> 0.6.14) - rack-test (~> 0.5.7) - tzinfo (~> 0.3.23) - activemodel (3.0.9) - activesupport (= 3.0.9) - builder (~> 2.1.2) - i18n (~> 0.5.0) - activerecord (3.0.9) - activemodel (= 3.0.9) - activesupport (= 3.0.9) - arel (~> 2.0.10) - tzinfo (~> 0.3.23) - activeresource (3.0.9) - activemodel (= 3.0.9) - activesupport (= 3.0.9) - activesupport (3.0.9) - acts-as-taggable-on (2.0.6) - acts_as_indexed (0.7.3) - arel (2.0.10) - awesome_nested_set (2.0.1) - activerecord (>= 3.0.0) - babosa (0.3.5) - bcrypt-ruby (2.1.4) - builder (2.1.2) - database_cleaner (0.6.7) - devise (1.3.4) - bcrypt-ruby (~> 2.1.2) - orm_adapter (~> 0.0.3) - warden (~> 1.0.3) - diff-lcs (1.1.2) - dragonfly (0.9.5) - rack - erubis (2.6.6) - abstract (>= 1.0.0) - fakeweb (1.3.0) - ffi (1.0.9) - filters_spam (0.3) - friendly_id_globalize3 (3.2.1.4) - babosa (~> 0.3.0) - globalize3 (0.1.0) - activemodel (>= 3.0.0) - activerecord (>= 3.0.0) - guard (0.5.1) - thor (~> 0.14.6) - guard-bundler (0.1.3) - bundler (>= 1.0.0) - guard (>= 0.2.2) - guard-rspec (0.4.1) - guard (>= 0.4.0) - i18n (0.5.0) - libnotify (0.5.7) - mail (2.2.19) - activesupport (>= 2.3.6) - i18n (>= 0.4.0) - mime-types (~> 1.16) - treetop (~> 1.4.8) - mime-types (1.16) - nokogiri (1.5.0) - orm_adapter (0.0.5) - polyglot (0.3.2) - rack (1.2.3) - rack-cache (1.0.2) - rack (>= 0.4) - rack-mount (0.6.14) - rack (>= 1.0.0) - rack-test (0.5.7) - rack (>= 1.0) - rails (3.0.9) - actionmailer (= 3.0.9) - actionpack (= 3.0.9) - activerecord (= 3.0.9) - activeresource (= 3.0.9) - activesupport (= 3.0.9) - bundler (~> 1.0) - railties (= 3.0.9) - railties (3.0.9) - actionpack (= 3.0.9) - activesupport (= 3.0.9) - rake (>= 0.8.7) - rdoc (~> 3.4) - thor (~> 0.14.4) - rake (0.9.2) - rdoc (3.9.2) - refinerycms (1.0.4) - bundler (~> 1.0) - refinerycms-authentication (= 1.0.4) - refinerycms-base (= 1.0.4) - refinerycms-core (= 1.0.4) - refinerycms-dashboard (= 1.0.4) - refinerycms-images (= 1.0.4) - refinerycms-pages (= 1.0.4) - refinerycms-resources (= 1.0.4) - refinerycms-settings (= 1.0.4) - refinerycms-authentication (1.0.4) - devise (~> 1.3.0) - friendly_id_globalize3 (~> 3.2.1) - refinerycms-core (= 1.0.4) - refinerycms-base (1.0.4) - refinerycms-blog (1.6.2) - acts-as-taggable-on - filters_spam (~> 0.2) - refinerycms-core (~> 1.0.3) - seo_meta (~> 1.1.0) - refinerycms-core (1.0.4) - acts_as_indexed (~> 0.7) - awesome_nested_set (~> 2.0) - friendly_id_globalize3 (~> 3.2.1) - globalize3 (~> 0.1) - rails (~> 3.0.9) - refinerycms-base (= 1.0.4) - refinerycms-generators (~> 1.0) - refinerycms-settings (= 1.0.4) - truncate_html (~> 0.5) - will_paginate (= 3.0.pre2) - refinerycms-dashboard (1.0.4) - refinerycms-core (= 1.0.4) - refinerycms-generators (1.0.3) - refinerycms-images (1.0.4) - dragonfly (~> 0.9.0) - rack-cache (>= 0.5.3) - refinerycms-core (= 1.0.4) - refinerycms-pages (1.0.4) - awesome_nested_set (~> 2.0) - friendly_id_globalize3 (~> 3.2.1) - globalize3 (~> 0.1) - refinerycms-core (= 1.0.4) - seo_meta (~> 1.1) - refinerycms-resources (1.0.4) - dragonfly (~> 0.9.0) - rack-cache (>= 0.5.3) - refinerycms-core (= 1.0.4) - refinerycms-settings (1.0.4) - refinerycms-base (= 1.0.4) - rmagick (2.13.1) - rspec (2.6.0) - rspec-core (~> 2.6.0) - rspec-expectations (~> 2.6.0) - rspec-mocks (~> 2.6.0) - rspec-core (2.6.4) - rspec-expectations (2.6.0) - diff-lcs (~> 1.1.2) - rspec-mocks (2.6.0) - rspec-rails (2.6.1) - actionpack (~> 3.0) - activesupport (~> 3.0) - railties (~> 3.0) - rspec (~> 2.6.0) - seo_meta (1.1.1) - refinerycms-generators (~> 1.0.1) - sqlite3 (1.3.4) - thor (0.14.6) - treetop (1.4.10) - polyglot - polyglot (>= 0.3.1) - truncate_html (0.5.1) - tzinfo (0.3.29) - warden (1.0.5) - rack (>= 1.0) - will_paginate (3.0.pre2) - -PLATFORMS - ruby - -DEPENDENCIES - database_cleaner - fakeweb - ffi - guard-bundler - guard-rspec - libnotify - rails (= 3.0.9) - refinerycms - refinerycms-blog - refinerycms-wordpress-import! - rmagick - rspec-rails (>= 2.6.0) - sqlite3 diff --git a/MIT-LICENSE b/MIT-LICENSE index 80cf031..739062f 100644 --- a/MIT-LICENSE +++ b/MIT-LICENSE @@ -1,4 +1,5 @@ -Copyright 2011 YOURNAME +Copyright 2014 Will Bradley +portions Copyright 2011 Marc Remolt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/README.rdoc b/README.rdoc index 50ef919..976da2d 100644 --- a/README.rdoc +++ b/README.rdoc @@ -1,8 +1,10 @@ -= Refinerycms-wordpress-import += Wordpress-import -This litte project is an importer for WordPress XML dumps into refinerycms(-blog). +Fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import ) -You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import +This little project is an importer for WordPress XML dumps into Rails + +You can find the source code on github: https://github.com/zyphlar/wordpress-import Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as -Tags. If your site (blog) structure uses new urls, the links WILL break! For example, if you used @@ -12,20 +14,18 @@ So your inner site links will point to the old WP url. == Prerequisites -As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it. -So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure -the site is running, all migrations are run and you created the first refinery user. +TODO == Installation Just add the gem to your projects Gemfile: - gem 'refinerycms-wordpress-import' + gem 'wordpress-import' Or if you want to stay on the bleeding edge: - gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git' + gem 'wordpress-import', :git => 'git://github.com/zyphlarz/wordpress-import.git' and run diff --git a/lib/refinerycms-wordpress-import.rb b/lib/refinerycms-wordpress-import.rb deleted file mode 100644 index 9457f44..0000000 --- a/lib/refinerycms-wordpress-import.rb +++ /dev/null @@ -1,7 +0,0 @@ -module Refinery - module WordPress - - end -end - -require 'wordpress' diff --git a/lib/tasks/wordpress.rake b/lib/tasks/wordpress.rake index b629298..e7908a5 100644 --- a/lib/tasks/wordpress.rake +++ b/lib/tasks/wordpress.rake @@ -13,24 +13,24 @@ namespace :wordpress do end - desc "import blog data from a Refinery::WordPress XML dump" + desc "import blog data from a WordPressImport XML dump" task :import_blog, :file_name do |task, params| Rake::Task["environment"].invoke - dump = Refinery::WordPress::Dump.new(params[:file_name]) + dump = WordPressImport::Dump.new(params[:file_name]) dump.authors.each(&:to_refinery) only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false dump.posts(only_published).each(&:to_refinery) - Refinery::WordPress::Post.create_blog_page_if_necessary + WordPressImport::Post.create_blog_page_if_necessary ENV["MODEL"] = 'BlogPost' Rake::Task["friendly_id:redo_slugs"].invoke ENV.delete("MODEL") end - desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump" + desc "reset blog tables and then import blog data from a WordPressImport XML dump" task :reset_and_import_blog, :file_name do |task, params| Rake::Task["environment"].invoke Rake::Task["wordpress:reset_blog"].invoke @@ -51,7 +51,7 @@ namespace :wordpress do desc "import cms data from a WordPress XML dump" task :import_pages, :file_name do |task, params| Rake::Task["environment"].invoke - dump = Refinery::WordPress::Dump.new(params[:file_name]) + dump = WordPressImport::Dump.new(params[:file_name]) only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false dump.pages(only_published).each(&:to_refinery) @@ -65,7 +65,7 @@ namespace :wordpress do page.save! end - Refinery::WordPress::Post.create_blog_page_if_necessary + WordPressImport::Post.create_blog_page_if_necessary ENV["MODEL"] = 'Page' Rake::Task["friendly_id:redo_slugs"].invoke @@ -93,7 +93,7 @@ namespace :wordpress do desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts" task :import_and_replace_media, :file_name do |task, params| Rake::Task["environment"].invoke - dump = Refinery::WordPress::Dump.new(params[:file_name]) + dump = WordPressImport::Dump.new(params[:file_name]) attachments = dump.attachments.each(&:to_refinery) diff --git a/lib/wordpress-import.rb b/lib/wordpress-import.rb new file mode 100644 index 0000000..3bb8664 --- /dev/null +++ b/lib/wordpress-import.rb @@ -0,0 +1,5 @@ +module WordPressImport + +end + +require 'wordpress' diff --git a/lib/wordpress.rb b/lib/wordpress.rb index 05c4244..22aedd7 100644 --- a/lib/wordpress.rb +++ b/lib/wordpress.rb @@ -1,16 +1,14 @@ require 'nokogiri' require "wordpress/railtie" -module Refinery - module WordPress - autoload :Author, 'wordpress/author' - autoload :Tag, 'wordpress/tag' - autoload :Category, 'wordpress/category' - autoload :Page, 'wordpress/page' - autoload :Post, 'wordpress/post' - autoload :Comment, 'wordpress/comment' - autoload :Dump, 'wordpress/dump' - autoload :Attachment, 'wordpress/attachment' - end +module WordPressImport +autoload :Author, 'wordpress/author' +autoload :Tag, 'wordpress/tag' +autoload :Category, 'wordpress/category' +autoload :Page, 'wordpress/page' +autoload :Post, 'wordpress/post' +autoload :Comment, 'wordpress/comment' +autoload :Dump, 'wordpress/dump' +autoload :Attachment, 'wordpress/attachment' end diff --git a/lib/wordpress/attachment.rb b/lib/wordpress/attachment.rb index a8ab414..9ee68d0 100644 --- a/lib/wordpress/attachment.rb +++ b/lib/wordpress/attachment.rb @@ -1,130 +1,128 @@ -module Refinery - module WordPress - class Attachment - attr_reader :node - attr_reader :refinery_image - attr_reader :refinery_resource - - def initialize(node) - @node = node - end - - def title - node.xpath("title").text - end - - def description - node.xpath("description").text - end - - def file_name - url.split('/').last - end - - def post_date - DateTime.parse node.xpath("wp:post_date").text - end - - def url - node.xpath("wp:attachment_url").text - end - - def url_pattern - url_parts = url.split('.') - extension = url_parts.pop - url_without_extension = url_parts.join('.') - - /#{url_without_extension}(-\d+x\d+)?\.#{extension}/ - end - - def image? - url.match /\.(png|jpg|jpeg|gif)$/ - end - - def to_refinery - if image? - to_image - else - to_resource - end - end - - def replace_url - if image? - replace_image_url - else - replace_resource_url - end - end - - private - - def to_image - image = ::Image.new - image.created_at = post_date - image.image_url = url - image.save! - - @refinery_image = image - image - end - - def to_resource - resource = ::Resource.new - resource.created_at = post_date - resource.file_url = url - resource.save! - - @refinery_resource = resource - resource - end - - def replace_image_url - replace_image_url_in_blog_posts - replace_image_url_in_pages - end - - def replace_resource_url - replace_resource_url_in_blog_posts - replace_resource_url_in_pages - end - - def replace_image_url_in_blog_posts - replace_url_in_blog_posts(refinery_image.image.url) - end - - def replace_image_url_in_pages - replace_url_in_pages(refinery_image.image.url) - end - - def replace_resource_url_in_blog_posts - replace_url_in_blog_posts(refinery_resource.file.url) - end - - def replace_resource_url_in_pages - replace_url_in_pages(refinery_resource.file.url) - end - - def replace_url_in_blog_posts(new_url) - ::BlogPost.all.each do |post| - if (! post.body.empty?) && post.body.include?(url) - post.body = post.body.gsub(url_pattern, new_url) - post.save! - end - end - end - - def replace_url_in_pages(new_url) - ::Page.all.each do |page| - page.parts.each do |part| - if (! part.body.to_s.blank?) && part.body.include?(url) - part.body = part.body.gsub(url_pattern, new_url) - part.save! - end - end - end - end +module WordPressImport + class Attachment + attr_reader :node + attr_reader :refinery_image + attr_reader :refinery_resource + def initialize(node) + @node = node end + + def title + node.xpath("title").text + end + + def description + node.xpath("description").text + end + + def file_name + url.split('/').last + end + + def post_date + DateTime.parse node.xpath("wp:post_date").text + end + + def url + node.xpath("wp:attachment_url").text + end + + def url_pattern + url_parts = url.split('.') + extension = url_parts.pop + url_without_extension = url_parts.join('.') + + /#{url_without_extension}(-\d+x\d+)?\.#{extension}/ + end + + def image? + url.match /\.(png|jpg|jpeg|gif)$/ + end + + def to_refinery + if image? + to_image + else + to_resource + end + end + + def replace_url + if image? + replace_image_url + else + replace_resource_url + end + end + + private + + def to_image + image = ::Image.new + image.created_at = post_date + image.image_url = url + image.save! + + @refinery_image = image + image + end + + def to_resource + resource = ::Resource.new + resource.created_at = post_date + resource.file_url = url + resource.save! + + @refinery_resource = resource + resource + end + + def replace_image_url + replace_image_url_in_blog_posts + replace_image_url_in_pages + end + + def replace_resource_url + replace_resource_url_in_blog_posts + replace_resource_url_in_pages + end + + def replace_image_url_in_blog_posts + replace_url_in_blog_posts(refinery_image.image.url) + end + + def replace_image_url_in_pages + replace_url_in_pages(refinery_image.image.url) + end + + def replace_resource_url_in_blog_posts + replace_url_in_blog_posts(refinery_resource.file.url) + end + + def replace_resource_url_in_pages + replace_url_in_pages(refinery_resource.file.url) + end + + def replace_url_in_blog_posts(new_url) + ::BlogPost.all.each do |post| + if (! post.body.empty?) && post.body.include?(url) + post.body = post.body.gsub(url_pattern, new_url) + post.save! + end + end + end + + def replace_url_in_pages(new_url) + ::Page.all.each do |page| + page.parts.each do |part| + if (! part.body.to_s.blank?) && part.body.include?(url) + part.body = part.body.gsub(url_pattern, new_url) + part.save! + end + end + end + end + end end diff --git a/lib/wordpress/author.rb b/lib/wordpress/author.rb index 9f98e17..105693e 100644 --- a/lib/wordpress/author.rb +++ b/lib/wordpress/author.rb @@ -1,37 +1,35 @@ -module Refinery - module WordPress - class Author - attr_reader :author_node +module WordPressImport + class Author + attr_reader :author_node - def initialize(author_node) - @author_node = author_node - end + def initialize(author_node) + @author_node = author_node + end - def login - author_node.xpath("wp:author_login").text - end + def login + author_node.xpath("wp:author_login").text + end - def email - author_node.xpath("wp:author_email").text - end + def email + author_node.xpath("wp:author_email").text + end - def ==(other) - login == other.login - end + def ==(other) + login == other.login + end - def inspect - "WordPress::Author: #{login} <#{email}>" - end + def inspect + "WordPress::Author: #{login} <#{email}>" + end - def to_refinery - user = User.find_or_initialize_by_username_and_email(login, email) - unless user.persisted? - user.password = 'password' - user.password_confirmation = 'password' - user.save - end - user + def to_refinery + user = User.find_or_initialize_by_username_and_email(login, email) + unless user.persisted? + user.password = 'password' + user.password_confirmation = 'password' + user.save end + user end end -end +end \ No newline at end of file diff --git a/lib/wordpress/category.rb b/lib/wordpress/category.rb index b8e2fd4..b43e928 100644 --- a/lib/wordpress/category.rb +++ b/lib/wordpress/category.rb @@ -1,19 +1,17 @@ -module Refinery - module WordPress - class Category - attr_accessor :name +module WordPressImport + class Category + attr_accessor :name - def initialize(text) - @name = text - end + def initialize(text) + @name = text + end - def ==(other) - name == other.name - end + def ==(other) + name == other.name + end - def to_refinery - BlogCategory.find_or_create_by_title(name) - end + def to_refinery + BlogCategory.find_or_create_by_title(name) end end end diff --git a/lib/wordpress/comment.rb b/lib/wordpress/comment.rb index a7f463e..088b874 100644 --- a/lib/wordpress/comment.rb +++ b/lib/wordpress/comment.rb @@ -1,48 +1,46 @@ -module Refinery - module WordPress - class Comment - attr_reader :node +module WordPressImport + class Comment + attr_reader :node - def initialize(node) - @node = node - end + def initialize(node) + @node = node + end - def author - node.xpath('wp:comment_author').text - end + def author + node.xpath('wp:comment_author').text + end - def email - node.xpath('wp:comment_author_email').text - end + def email + node.xpath('wp:comment_author_email').text + end - def url - node.xpath('wp:comment_author_url').text - end + def url + node.xpath('wp:comment_author_url').text + end - def date - DateTime.parse node.xpath("wp:comment_date").text - end + def date + DateTime.parse node.xpath("wp:comment_date").text + end - def content - node.xpath('wp:comment_content').text - end + def content + node.xpath('wp:comment_content').text + end - def approved? - node.xpath('wp:comment_approved').text.to_i == 1 - end + def approved? + node.xpath('wp:comment_approved').text.to_i == 1 + end - def ==(other) - (email == other.email) && (date == other.date) && (content == other.content) - end + def ==(other) + (email == other.email) && (date == other.date) && (content == other.content) + end - def to_refinery - comment = BlogComment.new :name => author, :email => email + def to_refinery + comment = BlogComment.new :name => author, :email => email - comment.body = content - comment.created_at = date - comment.state = approved? ? 'approved' : 'rejected' - comment - end + comment.body = content + comment.created_at = date + comment.state = approved? ? 'approved' : 'rejected' + comment end end end diff --git a/lib/wordpress/dump.rb b/lib/wordpress/dump.rb index 80d9a6a..41cac23 100644 --- a/lib/wordpress/dump.rb +++ b/lib/wordpress/dump.rb @@ -1,57 +1,55 @@ -module Refinery - module WordPress - class Dump - attr_reader :doc +module WordPressImport + class Dump + attr_reader :doc - def initialize(file_name) - file_name = File.expand_path(file_name) + def initialize(file_name) + file_name = File.expand_path(file_name) - raise "Given file '#{file_name}' no file or not readable." \ - unless File.file?(file_name) && File.readable?(file_name) - - file = File.open(file_name) - @doc = Nokogiri::XML(file) + raise "Given file '#{file_name}' no file or not readable." \ + unless File.file?(file_name) && File.readable?(file_name) + + file = File.open(file_name) + @doc = Nokogiri::XML(file) + end + + def authors + doc.xpath("//wp:author").collect do |author| + Author.new(author) + end + end + + def pages(only_published=false) + pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page| + Page.new(page) end - def authors - doc.xpath("//wp:author").collect do |author| - Author.new(author) - end + pages = pages.select(&:published?) if only_published + pages + end + + def posts(only_published=false) + posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post| + Post.new(post) end + posts = posts.select(&:published?) if only_published + posts + end - def pages(only_published=false) - pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page| - Page.new(page) - end - - pages = pages.select(&:published?) if only_published - pages + def tags + doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| + Tag.new(tag.text) end + end - def posts(only_published=false) - posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post| - Post.new(post) - end - posts = posts.select(&:published?) if only_published - posts + def categories + doc.xpath("//wp:category/wp:cat_name").collect do |category| + Category.new(category.text) end + end - def tags - doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| - Tag.new(tag.text) - end - end - - def categories - doc.xpath("//wp:category/wp:cat_name").collect do |category| - Category.new(category.text) - end - end - - def attachments - doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment| - Attachment.new(attachment) - end + def attachments + doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment| + Attachment.new(attachment) end end end diff --git a/lib/wordpress/page.rb b/lib/wordpress/page.rb index 1ac5c4f..c2e1827 100644 --- a/lib/wordpress/page.rb +++ b/lib/wordpress/page.rb @@ -1,107 +1,114 @@ -module Refinery - module WordPress - class Page - include ::ActionView::Helpers::TagHelper - include ::ActionView::Helpers::TextHelper +module WordPressImport + class Page + include ::ActionView::Helpers::TagHelper + include ::ActionView::Helpers::TextHelper - attr_reader :node + attr_reader :node - def initialize(node) - @node = node + def initialize(node) + @node = node + end + + def inspect + "WordPress::Page(#{post_id}): #{title}" + end + + def title + node.xpath("title").text + end + + def content + node.xpath("content:encoded").text + end + + def content_formatted + formatted = format_syntax_highlighter(format_paragraphs(content)) + + # remove all tags inside
 that simple_format created
+      # TODO: replace format_paragraphs with a method, that ignores pre-tags
+      formatted.gsub!(/()(.+?)(<\/pre>)/m) do |match| 
+        "#{$1}#{strip_tags($2)}#{$3}"
       end
-
-      def inspect
-        "WordPress::Page(#{post_id}): #{title}"     
-      end
-
-      def title
-        node.xpath("title").text
-      end
-
-      def content
-        node.xpath("content:encoded").text
-      end
-
-      def content_formatted
-        formatted = format_syntax_highlighter(format_paragraphs(content))
-
-        # remove all tags inside 
 that simple_format created
-        # TODO: replace format_paragraphs with a method, that ignores pre-tags
-        formatted.gsub!(/()(.+?)(<\/pre>)/m) do |match| 
-          "#{$1}#{strip_tags($2)}#{$3}"
-        end
-          
-        formatted
-      end
-
-      def creator
-        node.xpath("dc:creator").text
-      end
-
-      def post_date
-        DateTime.parse node.xpath("wp:post_date").text
-      end
-
-      def post_id
-        node.xpath("wp:post_id").text.to_i
-      end
-
-      def parent_id
-        dump_id = node.xpath("wp:post_parent").text.to_i
-        dump_id == 0 ? nil : dump_id
-      end
-
-      def status
-        node.xpath("wp:status").text
-      end
-
-      def draft?
-        status != 'publish'
-      end
-
-      def published?
-        ! draft?
-      end
-
-      def ==(other)
-        post_id == other.post_id
-      end
-
-      def to_refinery
-        page = ::Page.create!(:id => post_id, :title => title, 
-          :created_at => post_date, :draft => draft?)
-
-        page.parts.create(:title => 'Body', :body => content_formatted)
-        page
-      end
-
-      private 
-
-      def format_paragraphs(text, html_options={})
-        # WordPress doesn't export 

-Tags, so let's run a simple_format over - # the content. As we trust ourselves, no sanatize. This code is heavily - # inspired by the simple_format rails helper - text = ''.html_safe if text.nil? - start_tag = tag('p', html_options, true) - text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n - text.gsub!(/\n\n+/, "

\n\n#{start_tag}") # 2+ newline -> paragraph - text.insert 0, start_tag + formatted + end - text.html_safe.safe_concat("

") - end + def creator + node.xpath("dc:creator").text + end - def format_syntax_highlighter(text) - # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/): - # In WordPress you can (via a plugin) enclose code in [lang][/lang] - # blocks, which are converted to a
-tag with a class corresponding
-        # to the language.
-        # 
-        # Example:
-        # [ruby]p "Hello World"[/ruby] 
-        # -> 
p "Hello world"
- text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '
\2
') - end + def post_date + DateTime.parse node.xpath("wp:post_date").text + end + + def post_id + node.xpath("wp:post_id").text.to_i + end + + def parent_id + dump_id = node.xpath("wp:post_parent").text.to_i + dump_id == 0 ? nil : dump_id + end + + def status + node.xpath("wp:status").text + end + + def draft? + status != 'publish' + end + + def published? + ! draft? + end + + def ==(other) + post_id == other.post_id + end + + #NEED: + # dc:creator -> "user_id" + # wp:post_name -> "slug" + # pubDate -> "published_at" + #OK: + # title -> "title" + # content:encoded -> "body" + # wp:post_date_gmt -> "created_at" + + def to_refinery + page = ::Page.create!(:id => post_id, :title => title, + :created_at => post_date, :draft => draft?) + + page.parts.create(:title => 'Body', :body => content_formatted) + page + end + + private + + def format_paragraphs(text, html_options={}) + # WordPress doesn't export

-Tags, so let's run a simple_format over + # the content. As we trust ourselves, no sanatize. This code is heavily + # inspired by the simple_format rails helper + text = ''.html_safe if text.nil? + start_tag = tag('p', html_options, true) + + text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n + text.gsub!(/\n\n+/, "

\n\n#{start_tag}") # 2+ newline -> paragraph + text.insert 0, start_tag + + text.html_safe.safe_concat("

") + end + + def format_syntax_highlighter(text) + # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/): + # In WordPress you can (via a plugin) enclose code in [lang][/lang] + # blocks, which are converted to a
-tag with a class corresponding
+      # to the language.
+      # 
+      # Example:
+      # [ruby]p "Hello World"[/ruby] 
+      # -> 
p "Hello world"
+ text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '
\2
') end end end diff --git a/lib/wordpress/post.rb b/lib/wordpress/post.rb index f082513..453b61d 100644 --- a/lib/wordpress/post.rb +++ b/lib/wordpress/post.rb @@ -1,85 +1,83 @@ -module Refinery - module WordPress - class Post < Page - def tags - # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0 - path = if node.xpath("category[@domain='post_tag']").count > 0 - "category[@domain='post_tag']" - else - "category[@domain='tag']" - end - - node.xpath(path).collect do |tag_node| - Tag.new(tag_node.text) - end +module WordPressImport + class Post < Page + def tags + # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0 + path = if node.xpath("category[@domain='post_tag']").count > 0 + "category[@domain='post_tag']" + else + "category[@domain='tag']" end - def tag_list - tags.collect(&:name).join(',') + node.xpath(path).collect do |tag_node| + Tag.new(tag_node.text) end - - def categories - node.xpath("category[@domain='category']").collect do |cat| - Category.new(cat.text) - end - end - - def comments - node.xpath("wp:comment").collect do |comment_node| - Comment.new(comment_node) - end - end - - def to_refinery - user = ::User.find_by_username(creator) || ::User.first - raise "Referenced User doesn't exist! Make sure the authors are imported first." \ - unless user - - begin - post = ::BlogPost.new :title => title, :body => content_formatted, - :draft => draft?, :published_at => post_date, :created_at => post_date, - :user_id => user.id, :tag_list => tag_list - post.save! - - ::BlogPost.transaction do - categories.each do |category| - post.categories << category.to_refinery - end - - comments.each do |comment| - comment = comment.to_refinery - comment.post = post - comment.save - end - end - rescue ActiveRecord::RecordInvalid - # if the title has already been taken (WP allows duplicates here, - # refinery doesn't) append the post_id to it, making it unique - post.title = "#{title}-#{post_id}" - post.save - end - - post - end - - def self.create_blog_page_if_necessary - # refinerycms wants a page at /blog, so let's make sure there is one - # taken from the original db seeds from refinery-blog - unless ::Page.where("link_url = ?", '/blog').exists? - page = ::Page.create( - :title => "Blog", - :link_url => "/blog", - :deletable => false, - :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1), - :menu_match => "^/blogs?(\/|\/.+?|)$" - ) - - ::Page.default_parts.each do |default_page_part| - page.parts.create(:title => default_page_part, :body => nil) - end - end - end - end + + def tag_list + tags.collect(&:name).join(',') + end + + def categories + node.xpath("category[@domain='category']").collect do |cat| + Category.new(cat.text) + end + end + + def comments + node.xpath("wp:comment").collect do |comment_node| + Comment.new(comment_node) + end + end + + def to_refinery + user = ::User.find_by_username(creator) || ::User.first + raise "Referenced User doesn't exist! Make sure the authors are imported first." \ + unless user + + begin + post = ::BlogPost.new :title => title, :body => content_formatted, + :draft => draft?, :published_at => post_date, :created_at => post_date, + :user_id => user.id, :tag_list => tag_list + post.save! + + ::BlogPost.transaction do + categories.each do |category| + post.categories << category.to_refinery + end + + comments.each do |comment| + comment = comment.to_refinery + comment.post = post + comment.save + end + end + rescue ActiveRecord::RecordInvalid + # if the title has already been taken (WP allows duplicates here, + # refinery doesn't) append the post_id to it, making it unique + post.title = "#{title}-#{post_id}" + post.save + end + + post + end + + def self.create_blog_page_if_necessary + # refinerycms wants a page at /blog, so let's make sure there is one + # taken from the original db seeds from refinery-blog + unless ::Page.where("link_url = ?", '/blog').exists? + page = ::Page.create( + :title => "Blog", + :link_url => "/blog", + :deletable => false, + :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1), + :menu_match => "^/blogs?(\/|\/.+?|)$" + ) + + ::Page.default_parts.each do |default_page_part| + page.parts.create(:title => default_page_part, :body => nil) + end + end + end + end end diff --git a/lib/wordpress/railtie.rb b/lib/wordpress/railtie.rb index 672bac6..33f3fa4 100644 --- a/lib/wordpress/railtie.rb +++ b/lib/wordpress/railtie.rb @@ -1,9 +1,7 @@ -module Refinery - module WordPress - class Railtie < Rails::Railtie - rake_tasks do - load "tasks/wordpress.rake" - end +module WordPressImport + class Railtie < Rails::Railtie + rake_tasks do + load "tasks/wordpress.rake" end end end diff --git a/lib/wordpress/tag.rb b/lib/wordpress/tag.rb index 6f0f3f4..2d4502d 100644 --- a/lib/wordpress/tag.rb +++ b/lib/wordpress/tag.rb @@ -1,20 +1,18 @@ -module Refinery - module WordPress - class Tag - attr_accessor :name - - def initialize(text) - @name = text - end - - def ==(other) - name == other.name - end - - def to_refinery - ::ActsAsTaggableOn::Tag.find_or_create_by_name(name) - end +module WordPressImport + class Tag + attr_accessor :name + def initialize(text) + @name = text end + + def ==(other) + name == other.name + end + + def to_refinery + ::ActsAsTaggableOn::Tag.find_or_create_by_name(name) + end + end end diff --git a/refinerycms-wordpress-import.gemspec b/refinerycms-wordpress-import.gemspec deleted file mode 100644 index 6076ffc..0000000 --- a/refinerycms-wordpress-import.gemspec +++ /dev/null @@ -1,23 +0,0 @@ -# Provide a simple gemspec so you can easily use your enginex -# project in your rails apps through git. -Gem::Specification.new do |s| - s.name = "refinerycms-wordpress-import" - s.summary = "Import WordPress XML dumps into refinerycms(-blog)." - s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)" - s.version = "0.4.0" - s.date = "2011-06-13" - - s.authors = ['Marc Remolt'] - s.email = 'marc.remolt@googlemail.com' - s.homepage = 'https://github.com/mremolt/refinerycms-wordpress-import' - - s.add_dependency 'bundler', '~> 1.0' - s.add_dependency 'refinerycms', '~> 1.0.0' - s.add_dependency 'refinerycms-blog', '~> 1.6.2' - s.add_dependency 'nokogiri', '~> 1.5.0' - - s.add_development_dependency 'rspec-rails' - s.add_development_dependency 'database_cleaner' - - s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"] -end diff --git a/spec/lib/wordpress/attachment_spec.rb b/spec/lib/wordpress/attachment_spec.rb index a2b5ee2..a97ec7f 100644 --- a/spec/lib/wordpress/attachment_spec.rb +++ b/spec/lib/wordpress/attachment_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Refinery::WordPress::Attachment, :type => :model do +describe WordPressImport::Attachment, :type => :model do context "an image attchment" do let(:attachment) { test_dump.attachments.first } diff --git a/spec/lib/wordpress/author_spec.rb b/spec/lib/wordpress/author_spec.rb index d1a7aff..f7f31d7 100644 --- a/spec/lib/wordpress/author_spec.rb +++ b/spec/lib/wordpress/author_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Refinery::WordPress::Author, :type => :model do +describe WordPressImport::Author, :type => :model do let(:author) { test_dump.authors.first } specify { author.login.should == 'admin' } diff --git a/spec/lib/wordpress/category_spec.rb b/spec/lib/wordpress/category_spec.rb index 19822d1..00bc764 100644 --- a/spec/lib/wordpress/category_spec.rb +++ b/spec/lib/wordpress/category_spec.rb @@ -1,15 +1,15 @@ require 'spec_helper' -describe Refinery::WordPress::Category, :type => :model do - let(:category) { Refinery::WordPress::Category.new('Rant') } +describe WordPressImport::Category, :type => :model do + let(:category) { WordPressImport::Category.new('Rant') } describe "#name" do specify { category.name.should == 'Rant' } end describe "#==" do - specify { category.should == Refinery::WordPress::Category.new('Rant') } - specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') } + specify { category.should == WordPressImport::Category.new('Rant') } + specify { category.should_not == WordPressImport::Category.new('Tutorials') } end describe "#to_refinery" do diff --git a/spec/lib/wordpress/dump_spec.rb b/spec/lib/wordpress/dump_spec.rb index 6fdcdec..5cf2cc5 100644 --- a/spec/lib/wordpress/dump_spec.rb +++ b/spec/lib/wordpress/dump_spec.rb @@ -1,10 +1,10 @@ require 'spec_helper' -describe Refinery::WordPress::Dump, :type => :model do +describe WordPressImport::Dump, :type => :model do let(:dump) { test_dump } it "should create a Dump object given a xml file" do - dump.should be_a Refinery::WordPress::Dump + dump.should be_a WordPressImport::Dump end it "should include a Nokogiri::XML object" do @@ -13,12 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do describe "#tags" do let(:tags) do - [ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'), - Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')] + [ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'), + WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')] end specify { dump.tags.count == 4 } - specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) } + specify { dump.tags.first.should be_a(WordPressImport::Tag) } it "should return all included tags" do tags.each do |tag| @@ -29,12 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do describe "#categories" do let(:categories) do - [ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'), - Refinery::WordPress::Category.new('Uncategorized') ] + [ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'), + WordPressImport::Category.new('Uncategorized') ] end specify { dump.categories.count == 4 } - specify { dump.categories.first.should be_a(Refinery::WordPress::Category) } + specify { dump.categories.first.should be_a(WordPressImport::Category) } it "should return all included categories" do categories.each do |cat| @@ -48,7 +48,7 @@ describe Refinery::WordPress::Dump, :type => :model do dump.pages.should have(3).pages end - specify { dump.pages.first.should be_a(Refinery::WordPress::Page) } + specify { dump.pages.first.should be_a(WordPressImport::Page) } it "should return only published pages with only_published=true" do dump.pages(true).should have(2).pages @@ -60,7 +60,7 @@ describe Refinery::WordPress::Dump, :type => :model do dump.authors.should have(1).author end - specify { dump.authors.first.should be_a(Refinery::WordPress::Author) } + specify { dump.authors.first.should be_a(WordPressImport::Author) } end describe "#posts" do @@ -68,7 +68,7 @@ describe Refinery::WordPress::Dump, :type => :model do dump.posts.should have(3).posts end - specify { dump.posts.first.should be_a(Refinery::WordPress::Post) } + specify { dump.posts.first.should be_a(WordPressImport::Post) } it "should return only published posts with only_published=true" do dump.posts(true).should have(2).posts @@ -80,6 +80,6 @@ describe Refinery::WordPress::Dump, :type => :model do dump.attachments.should have(2).attachments end - specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) } + specify { dump.attachments.first.should be_a(WordPressImport::Attachment) } end end diff --git a/spec/lib/wordpress/page_spec.rb b/spec/lib/wordpress/page_spec.rb index 2265a0f..a265039 100644 --- a/spec/lib/wordpress/page_spec.rb +++ b/spec/lib/wordpress/page_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Refinery::WordPress::Page, :type => :model do +describe WordPressImport::Page, :type => :model do let(:dump) { test_dump } let(:page) { test_dump.pages.last } diff --git a/spec/lib/wordpress/post_spec.rb b/spec/lib/wordpress/post_spec.rb index ac3b4d8..2b9189a 100644 --- a/spec/lib/wordpress/post_spec.rb +++ b/spec/lib/wordpress/post_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Refinery::WordPress::Post, :type => :model do +describe WordPressImport::Post, :type => :model do let(:post) { test_dump.posts.last } specify { post.title.should == 'Third blog post' } @@ -17,15 +17,15 @@ describe Refinery::WordPress::Post, :type => :model do describe "#categories" do specify { post.categories.should have(1).category } - specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') } + specify { post.categories.first.should == WordPressImport::Category.new('Rant') } end describe "#tags" do specify { post.tags.should have(3).tags } - specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) } - specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) } - specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) } + specify { post.tags.should include(WordPressImport::Tag.new('css')) } + specify { post.tags.should include(WordPressImport::Tag.new('html')) } + specify { post.tags.should include(WordPressImport::Tag.new('php')) } end specify { post.tag_list.should == 'css,html,php' } diff --git a/spec/lib/wordpress/tag_spec.rb b/spec/lib/wordpress/tag_spec.rb index 6b45b35..c1ba9de 100644 --- a/spec/lib/wordpress/tag_spec.rb +++ b/spec/lib/wordpress/tag_spec.rb @@ -1,15 +1,15 @@ require 'spec_helper' -describe Refinery::WordPress::Tag, :type => :model do - let(:tag) { Refinery::WordPress::Tag.new('ruby') } +describe WordPressImport::Tag, :type => :model do + let(:tag) { WordPressImport::Tag.new('ruby') } describe "#name" do specify { tag.name.should == 'ruby' } end describe "#==" do - specify { tag.should == Refinery::WordPress::Tag.new('ruby') } - specify { tag.should_not == Refinery::WordPress::Tag.new('php') } + specify { tag.should == WordPressImport::Tag.new('ruby') } + specify { tag.should_not == WordPressImport::Tag.new('php') } end describe "#to_refinery" do diff --git a/spec/refinerycms_wordpress_import_spec.rb b/spec/refinerycms_wordpress_import_spec.rb deleted file mode 100644 index 3c82c27..0000000 --- a/spec/refinerycms_wordpress_import_spec.rb +++ /dev/null @@ -1,7 +0,0 @@ -require 'spec_helper' - -describe Refinery::WordPress do - it "should be valid" do - Refinery::WordPress.should be_a(Module) - end -end diff --git a/spec/support/helpers.rb b/spec/support/helpers.rb index 2e7af3b..b87b971 100644 --- a/spec/support/helpers.rb +++ b/spec/support/helpers.rb @@ -1,11 +1,11 @@ -module Refinery::WordPress::SpecHelpers +module WordPressImport::SpecHelpers def test_dump file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml')) - Refinery::WordPress::Dump.new(file_name) + WordPressImport::Dump.new(file_name) end end RSpec.configure do |config| - config.include Refinery::WordPress::SpecHelpers + config.include WordPressImport::SpecHelpers end diff --git a/spec/wordpress_import_spec.rb b/spec/wordpress_import_spec.rb new file mode 100644 index 0000000..391071c --- /dev/null +++ b/spec/wordpress_import_spec.rb @@ -0,0 +1,7 @@ +require 'spec_helper' + +describe WordPressImport do + it "should be valid" do + WordPressImport.should be_a(Module) + end +end diff --git a/wordpress-import.gemspec b/wordpress-import.gemspec new file mode 100644 index 0000000..d425403 --- /dev/null +++ b/wordpress-import.gemspec @@ -0,0 +1,21 @@ +# Provide a simple gemspec so you can easily use your enginex +# project in your rails apps through git. +Gem::Specification.new do |s| + s.name = "wordpress-import" + s.summary = "Import WordPress XML dumps into your Ruby on Rails app." + s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)" + s.version = "0.4.1" + s.date = "2014-03-04" + + s.authors = ['Will Bradley'] + s.email = 'bradley.will@gmail.com' + s.homepage = 'https://github.com/zyphlar/wordpress-import' + + s.add_dependency 'bundler', '~> 1.0' + s.add_dependency 'nokogiri', '~> 1.6.0' + + s.add_development_dependency 'rspec-rails' + s.add_development_dependency 'database_cleaner' + + s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"] +end