Beginning to remove refinery stuff

2014-03-04 15:47:53 -07:00 · 2014-03-04 15:47:53 -07:00 · 60ce62ad1b
commit 60ce62ad1b
parent 1e1f3574eb
29 changed files with 530 additions and 744 deletions
--- a/8
+++ b/8
@ -1,6 +1,8 @@
 source "http://rubygems.org"
-gem "rails", "3.0.9"
+ruby '2.0.0'
 gem "rails", "4.0.0"
 #gem "capybara", ">= 1.0.0.beta1"
 gem "sqlite3"
 gem "rmagick"
@ -19,6 +21,4 @@ end
 # gem 'ruby-debug'
 # gem 'ruby-debug19'
-gem 'refinerycms'
+gem 'wordpress-import', :path => './'
 gem 'refinerycms-blog'
 gem 'refinerycms-wordpress-import', :path => './'
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -1,200 +0,0 @@
 PATH
  remote: ./
  specs:
    refinerycms-wordpress-import (0.4.0)
      bundler (~> 1.0)
      nokogiri (~> 1.5.0)
      refinerycms (~> 1.0.0)
      refinerycms-blog (~> 1.6.2)
 GEM
  remote: http://rubygems.org/
  specs:
    abstract (1.0.0)
    actionmailer (3.0.9)
      actionpack (= 3.0.9)
      mail (~> 2.2.19)
    actionpack (3.0.9)
      activemodel (= 3.0.9)
      activesupport (= 3.0.9)
      builder (~> 2.1.2)
      erubis (~> 2.6.6)
      i18n (~> 0.5.0)
      rack (~> 1.2.1)
      rack-mount (~> 0.6.14)
      rack-test (~> 0.5.7)
      tzinfo (~> 0.3.23)
    activemodel (3.0.9)
      activesupport (= 3.0.9)
      builder (~> 2.1.2)
      i18n (~> 0.5.0)
    activerecord (3.0.9)
      activemodel (= 3.0.9)
      activesupport (= 3.0.9)
      arel (~> 2.0.10)
      tzinfo (~> 0.3.23)
    activeresource (3.0.9)
      activemodel (= 3.0.9)
      activesupport (= 3.0.9)
    activesupport (3.0.9)
    acts-as-taggable-on (2.0.6)
    acts_as_indexed (0.7.3)
    arel (2.0.10)
    awesome_nested_set (2.0.1)
      activerecord (>= 3.0.0)
    babosa (0.3.5)
    bcrypt-ruby (2.1.4)
    builder (2.1.2)
    database_cleaner (0.6.7)
    devise (1.3.4)
      bcrypt-ruby (~> 2.1.2)
      orm_adapter (~> 0.0.3)
      warden (~> 1.0.3)
    diff-lcs (1.1.2)
    dragonfly (0.9.5)
      rack
    erubis (2.6.6)
      abstract (>= 1.0.0)
    fakeweb (1.3.0)
    ffi (1.0.9)
    filters_spam (0.3)
    friendly_id_globalize3 (3.2.1.4)
      babosa (~> 0.3.0)
    globalize3 (0.1.0)
      activemodel (>= 3.0.0)
      activerecord (>= 3.0.0)
    guard (0.5.1)
      thor (~> 0.14.6)
    guard-bundler (0.1.3)
      bundler (>= 1.0.0)
      guard (>= 0.2.2)
    guard-rspec (0.4.1)
      guard (>= 0.4.0)
    i18n (0.5.0)
    libnotify (0.5.7)
    mail (2.2.19)
      activesupport (>= 2.3.6)
      i18n (>= 0.4.0)
      mime-types (~> 1.16)
      treetop (~> 1.4.8)
    mime-types (1.16)
    nokogiri (1.5.0)
    orm_adapter (0.0.5)
    polyglot (0.3.2)
    rack (1.2.3)
    rack-cache (1.0.2)
      rack (>= 0.4)
    rack-mount (0.6.14)
      rack (>= 1.0.0)
    rack-test (0.5.7)
      rack (>= 1.0)
    rails (3.0.9)
      actionmailer (= 3.0.9)
      actionpack (= 3.0.9)
      activerecord (= 3.0.9)
      activeresource (= 3.0.9)
      activesupport (= 3.0.9)
      bundler (~> 1.0)
      railties (= 3.0.9)
    railties (3.0.9)
      actionpack (= 3.0.9)
      activesupport (= 3.0.9)
      rake (>= 0.8.7)
      rdoc (~> 3.4)
      thor (~> 0.14.4)
    rake (0.9.2)
    rdoc (3.9.2)
    refinerycms (1.0.4)
      bundler (~> 1.0)
      refinerycms-authentication (= 1.0.4)
      refinerycms-base (= 1.0.4)
      refinerycms-core (= 1.0.4)
      refinerycms-dashboard (= 1.0.4)
      refinerycms-images (= 1.0.4)
      refinerycms-pages (= 1.0.4)
      refinerycms-resources (= 1.0.4)
      refinerycms-settings (= 1.0.4)
    refinerycms-authentication (1.0.4)
      devise (~> 1.3.0)
      friendly_id_globalize3 (~> 3.2.1)
      refinerycms-core (= 1.0.4)
    refinerycms-base (1.0.4)
    refinerycms-blog (1.6.2)
      acts-as-taggable-on
      filters_spam (~> 0.2)
      refinerycms-core (~> 1.0.3)
      seo_meta (~> 1.1.0)
    refinerycms-core (1.0.4)
      acts_as_indexed (~> 0.7)
      awesome_nested_set (~> 2.0)
      friendly_id_globalize3 (~> 3.2.1)
      globalize3 (~> 0.1)
      rails (~> 3.0.9)
      refinerycms-base (= 1.0.4)
      refinerycms-generators (~> 1.0)
      refinerycms-settings (= 1.0.4)
      truncate_html (~> 0.5)
      will_paginate (= 3.0.pre2)
    refinerycms-dashboard (1.0.4)
      refinerycms-core (= 1.0.4)
    refinerycms-generators (1.0.3)
    refinerycms-images (1.0.4)
      dragonfly (~> 0.9.0)
      rack-cache (>= 0.5.3)
      refinerycms-core (= 1.0.4)
    refinerycms-pages (1.0.4)
      awesome_nested_set (~> 2.0)
      friendly_id_globalize3 (~> 3.2.1)
      globalize3 (~> 0.1)
      refinerycms-core (= 1.0.4)
      seo_meta (~> 1.1)
    refinerycms-resources (1.0.4)
      dragonfly (~> 0.9.0)
      rack-cache (>= 0.5.3)
      refinerycms-core (= 1.0.4)
    refinerycms-settings (1.0.4)
      refinerycms-base (= 1.0.4)
    rmagick (2.13.1)
    rspec (2.6.0)
      rspec-core (~> 2.6.0)
      rspec-expectations (~> 2.6.0)
      rspec-mocks (~> 2.6.0)
    rspec-core (2.6.4)
    rspec-expectations (2.6.0)
      diff-lcs (~> 1.1.2)
    rspec-mocks (2.6.0)
    rspec-rails (2.6.1)
      actionpack (~> 3.0)
      activesupport (~> 3.0)
      railties (~> 3.0)
      rspec (~> 2.6.0)
    seo_meta (1.1.1)
      refinerycms-generators (~> 1.0.1)
    sqlite3 (1.3.4)
    thor (0.14.6)
    treetop (1.4.10)
      polyglot
      polyglot (>= 0.3.1)
    truncate_html (0.5.1)
    tzinfo (0.3.29)
    warden (1.0.5)
      rack (>= 1.0)
    will_paginate (3.0.pre2)
 PLATFORMS
  ruby
 DEPENDENCIES
  database_cleaner
  fakeweb
  ffi
  guard-bundler
  guard-rspec
  libnotify
  rails (= 3.0.9)
  refinerycms
  refinerycms-blog
  refinerycms-wordpress-import!
  rmagick
  rspec-rails (>= 2.6.0)
  sqlite3
--- a/3
+++ b/3
@ -1,4 +1,5 @@
-Copyright 2011 YOURNAME
+Copyright 2014 Will Bradley
 portions Copyright 2011 Marc Remolt
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
--- a/README.rdoc
+++ b/README.rdoc
@ -1,8 +1,10 @@
-= Refinerycms-wordpress-import
+= Wordpress-import
-This litte project is an importer for WordPress XML dumps into refinerycms(-blog). 
+Fork of Marc Remolt's Refinerycms-wordpress-import ( https://github.com/mremolt/refinerycms-wordpress-import )
-You can find the source code on github: https://github.com/mremolt/refinerycms-wordpress-import
+This little project is an importer for WordPress XML dumps into Rails
 You can find the source code on github: https://github.com/zyphlar/wordpress-import
 Keep in mind that links to other pages of your blog are just copied, as WordPress exports them as <a>-Tags. 
 If your site (blog) structure uses new urls, the links WILL break! For example, if you used 
@ -12,20 +14,18 @@ So your inner site links will point to the old WP url.
 == Prerequisites
-As refinerycms-wordpress-import is an addon for RefineryCMS, is shares the prerequisites with it.
+TODO
 So you'll first need a running installation of refinerycms and refinerycms-blog. Make sure
 the site is running, all migrations are run and you created the first refinery user. 
 == Installation
 Just add the gem to your projects Gemfile:
-  gem 'refinerycms-wordpress-import'
+  gem 'wordpress-import'
 Or if you want to stay on the bleeding edge: 
-  gem 'refinerycms-wordpress-import', :git => 'git://github.com/mremolt/refinerycms-wordpress-import.git'
+  gem 'wordpress-import', :git => 'git://github.com/zyphlarz/wordpress-import.git'
 and run
--- a/lib/refinerycms-wordpress-import.rb
+++ b/lib/refinerycms-wordpress-import.rb
@ -1,7 +0,0 @@
 module Refinery
  module WordPress
  end
 end
 require 'wordpress'
--- a/lib/tasks/wordpress.rake
+++ b/lib/tasks/wordpress.rake
@ -13,24 +13,24 @@ namespace :wordpress do
  end
-  desc "import blog data from a Refinery::WordPress XML dump"
+  desc "import blog data from a WordPressImport XML dump"
  task :import_blog, :file_name do |task, params|
    Rake::Task["environment"].invoke
-    dump = Refinery::WordPress::Dump.new(params[:file_name])
+    dump = WordPressImport::Dump.new(params[:file_name])
    dump.authors.each(&:to_refinery)
    only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
    dump.posts(only_published).each(&:to_refinery)
-    Refinery::WordPress::Post.create_blog_page_if_necessary
+    WordPressImport::Post.create_blog_page_if_necessary
    ENV["MODEL"] = 'BlogPost'
    Rake::Task["friendly_id:redo_slugs"].invoke
    ENV.delete("MODEL")
  end
-  desc "reset blog tables and then import blog data from a Refinery::WordPress XML dump"
+  desc "reset blog tables and then import blog data from a WordPressImport XML dump"
  task :reset_and_import_blog, :file_name do |task, params|
    Rake::Task["environment"].invoke
    Rake::Task["wordpress:reset_blog"].invoke
@ -51,7 +51,7 @@ namespace :wordpress do
  desc "import cms data from a WordPress XML dump"
  task :import_pages, :file_name do |task, params|
    Rake::Task["environment"].invoke
-    dump = Refinery::WordPress::Dump.new(params[:file_name])
+    dump = WordPressImport::Dump.new(params[:file_name])
    only_published = ENV['ONLY_PUBLISHED'] == 'true' ? true : false
    dump.pages(only_published).each(&:to_refinery)
@ -65,7 +65,7 @@ namespace :wordpress do
      page.save!
    end
-    Refinery::WordPress::Post.create_blog_page_if_necessary
+    WordPressImport::Post.create_blog_page_if_necessary
    ENV["MODEL"] = 'Page'
    Rake::Task["friendly_id:redo_slugs"].invoke
@ -93,7 +93,7 @@ namespace :wordpress do
  desc "import media data (images and files) from a WordPress XML dump and replace target URLs in pages and posts"
  task :import_and_replace_media, :file_name do |task, params|
    Rake::Task["environment"].invoke
-    dump = Refinery::WordPress::Dump.new(params[:file_name])
+    dump = WordPressImport::Dump.new(params[:file_name])
    attachments = dump.attachments.each(&:to_refinery)
--- a/lib/wordpress-import.rb
+++ b/lib/wordpress-import.rb
@ -0,0 +1,5 @@
 module WordPressImport
 end
 require 'wordpress'
--- a/lib/wordpress.rb
+++ b/lib/wordpress.rb
@ -1,16 +1,14 @@
 require 'nokogiri'
 require "wordpress/railtie" 
-module Refinery
+module WordPressImport
-  module WordPress
+autoload :Author, 'wordpress/author' 
-    autoload :Author, 'wordpress/author' 
+autoload :Tag, 'wordpress/tag'
-    autoload :Tag, 'wordpress/tag'
+autoload :Category, 'wordpress/category'
-    autoload :Category, 'wordpress/category'
+autoload :Page, 'wordpress/page'
-    autoload :Page, 'wordpress/page'
+autoload :Post, 'wordpress/post'
-    autoload :Post, 'wordpress/post'
+autoload :Comment, 'wordpress/comment'
-    autoload :Comment, 'wordpress/comment'
+autoload :Dump, 'wordpress/dump'
-    autoload :Dump, 'wordpress/dump'
+autoload :Attachment, 'wordpress/attachment'
    autoload :Attachment, 'wordpress/attachment'
  end
 end
--- a/lib/wordpress/attachment.rb
+++ b/lib/wordpress/attachment.rb
@ -1,130 +1,128 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Attachment
-    class Attachment
+    attr_reader :node
-      attr_reader :node
+    attr_reader :refinery_image
-      attr_reader :refinery_image
+    attr_reader :refinery_resource
      attr_reader :refinery_resource
      def initialize(node)
        @node = node
      end
      def title
        node.xpath("title").text
      end
      def description
        node.xpath("description").text
      end
      def file_name
        url.split('/').last
      end
      def post_date
        DateTime.parse node.xpath("wp:post_date").text
      end
      def url
        node.xpath("wp:attachment_url").text
      end
      def url_pattern
        url_parts = url.split('.')
        extension = url_parts.pop
        url_without_extension = url_parts.join('.')
        /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
      end
      def image?
        url.match /\.(png|jpg|jpeg|gif)$/ 
      end
      def to_refinery
        if image?
          to_image
        else
          to_resource
        end
      end
      def replace_url
        if image?
          replace_image_url
        else
          replace_resource_url
        end
      end
      private
      def to_image
        image = ::Image.new
        image.created_at = post_date
        image.image_url = url
        image.save!
        @refinery_image = image
        image
      end
      def to_resource
        resource = ::Resource.new
        resource.created_at = post_date
        resource.file_url = url
        resource.save!
        @refinery_resource = resource
        resource
      end
      def replace_image_url
        replace_image_url_in_blog_posts
        replace_image_url_in_pages
      end
      def replace_resource_url
        replace_resource_url_in_blog_posts
        replace_resource_url_in_pages
      end
      def replace_image_url_in_blog_posts
        replace_url_in_blog_posts(refinery_image.image.url)
      end
      def replace_image_url_in_pages
        replace_url_in_pages(refinery_image.image.url)
      end
      def replace_resource_url_in_blog_posts
        replace_url_in_blog_posts(refinery_resource.file.url)
      end
      def replace_resource_url_in_pages
        replace_url_in_pages(refinery_resource.file.url)
      end
      def replace_url_in_blog_posts(new_url)
        ::BlogPost.all.each do |post|
          if (! post.body.empty?) && post.body.include?(url)
            post.body = post.body.gsub(url_pattern, new_url)
            post.save!
          end
        end
      end
      def replace_url_in_pages(new_url)
        ::Page.all.each do |page|
          page.parts.each do |part|
            if (! part.body.to_s.blank?) && part.body.include?(url)
              part.body = part.body.gsub(url_pattern, new_url)
              part.save!
            end
          end
        end
      end
    def initialize(node)
      @node = node
    end
    def title
      node.xpath("title").text
    end
    def description
      node.xpath("description").text
    end
    def file_name
      url.split('/').last
    end
    def post_date
      DateTime.parse node.xpath("wp:post_date").text
    end
    def url
      node.xpath("wp:attachment_url").text
    end
    def url_pattern
      url_parts = url.split('.')
      extension = url_parts.pop
      url_without_extension = url_parts.join('.')
      /#{url_without_extension}(-\d+x\d+)?\.#{extension}/
    end
    def image?
      url.match /\.(png|jpg|jpeg|gif)$/ 
    end
    def to_refinery
      if image?
        to_image
      else
        to_resource
      end
    end
    def replace_url
      if image?
        replace_image_url
      else
        replace_resource_url
      end
    end
    private
    def to_image
      image = ::Image.new
      image.created_at = post_date
      image.image_url = url
      image.save!
      @refinery_image = image
      image
    end
    def to_resource
      resource = ::Resource.new
      resource.created_at = post_date
      resource.file_url = url
      resource.save!
      @refinery_resource = resource
      resource
    end
    def replace_image_url
      replace_image_url_in_blog_posts
      replace_image_url_in_pages
    end
    def replace_resource_url
      replace_resource_url_in_blog_posts
      replace_resource_url_in_pages
    end
    def replace_image_url_in_blog_posts
      replace_url_in_blog_posts(refinery_image.image.url)
    end
    def replace_image_url_in_pages
      replace_url_in_pages(refinery_image.image.url)
    end
    def replace_resource_url_in_blog_posts
      replace_url_in_blog_posts(refinery_resource.file.url)
    end
    def replace_resource_url_in_pages
      replace_url_in_pages(refinery_resource.file.url)
    end
    def replace_url_in_blog_posts(new_url)
      ::BlogPost.all.each do |post|
        if (! post.body.empty?) && post.body.include?(url)
          post.body = post.body.gsub(url_pattern, new_url)
          post.save!
        end
      end
    end
    def replace_url_in_pages(new_url)
      ::Page.all.each do |page|
        page.parts.each do |part|
          if (! part.body.to_s.blank?) && part.body.include?(url)
            part.body = part.body.gsub(url_pattern, new_url)
            part.save!
          end
        end
      end
    end
  end
 end
--- a/lib/wordpress/author.rb
+++ b/lib/wordpress/author.rb
@ -1,37 +1,35 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Author
-    class Author
+    attr_reader :author_node
      attr_reader :author_node
-      def initialize(author_node)
+    def initialize(author_node)
-        @author_node = author_node
+      @author_node = author_node
-      end
+    end
-      def login
+    def login
-        author_node.xpath("wp:author_login").text
+      author_node.xpath("wp:author_login").text
-      end
+    end
-      def email
+    def email
-        author_node.xpath("wp:author_email").text
+      author_node.xpath("wp:author_email").text
-      end
+    end
-      def ==(other)
+    def ==(other)
-        login == other.login
+      login == other.login
-      end
+    end
-      def inspect
+    def inspect
-        "WordPress::Author: #{login} <#{email}>"
+      "WordPress::Author: #{login} <#{email}>"
-      end
+    end
-      def to_refinery
+    def to_refinery
-        user = User.find_or_initialize_by_username_and_email(login, email)
+      user = User.find_or_initialize_by_username_and_email(login, email)
-        unless user.persisted?
+      unless user.persisted?
-          user.password = 'password'
+        user.password = 'password'
-          user.password_confirmation = 'password'
+        user.password_confirmation = 'password'
-          user.save
+        user.save
        end
        user
      end
      user
    end
  end
-end
+end
--- a/lib/wordpress/category.rb
+++ b/lib/wordpress/category.rb
@ -1,19 +1,17 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Category
-    class Category
+    attr_accessor :name
      attr_accessor :name
-      def initialize(text)
+    def initialize(text)
-        @name = text
+      @name = text
-      end
+    end
-      def ==(other)
+    def ==(other)
-        name == other.name
+      name == other.name
-      end
+    end
-      def to_refinery
+    def to_refinery
-        BlogCategory.find_or_create_by_title(name)
+      BlogCategory.find_or_create_by_title(name)
      end
    end
  end
 end
--- a/lib/wordpress/comment.rb
+++ b/lib/wordpress/comment.rb
@ -1,48 +1,46 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Comment
-    class Comment
+    attr_reader :node
      attr_reader :node
-      def initialize(node) 
+    def initialize(node) 
-        @node = node
+      @node = node
-      end
+    end
-      def author
+    def author
-        node.xpath('wp:comment_author').text
+      node.xpath('wp:comment_author').text
-      end
+    end
-      def email
+    def email
-        node.xpath('wp:comment_author_email').text
+      node.xpath('wp:comment_author_email').text
-      end
+    end
-      def url
+    def url
-        node.xpath('wp:comment_author_url').text
+      node.xpath('wp:comment_author_url').text
-      end
+    end
-      def date
+    def date
-        DateTime.parse node.xpath("wp:comment_date").text
+      DateTime.parse node.xpath("wp:comment_date").text
-      end
+    end
-      def content
+    def content
-        node.xpath('wp:comment_content').text
+      node.xpath('wp:comment_content').text
-      end
+    end
-      def approved?
+    def approved?
-        node.xpath('wp:comment_approved').text.to_i == 1
+      node.xpath('wp:comment_approved').text.to_i == 1
-      end
+    end
-      def ==(other) 
+    def ==(other) 
-        (email == other.email) && (date == other.date) && (content == other.content)
+      (email == other.email) && (date == other.date) && (content == other.content)
-      end
+    end
-      def to_refinery
+    def to_refinery
-        comment = BlogComment.new :name => author, :email => email
+      comment = BlogComment.new :name => author, :email => email
-        comment.body = content
+      comment.body = content
-        comment.created_at = date
+      comment.created_at = date
-        comment.state = approved? ? 'approved' : 'rejected'
+      comment.state = approved? ? 'approved' : 'rejected'
-        comment
+      comment
      end
    end
  end
 end
--- a/lib/wordpress/dump.rb
+++ b/lib/wordpress/dump.rb
@ -1,57 +1,55 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Dump
-    class Dump
+    attr_reader :doc
      attr_reader :doc
-      def initialize(file_name)
+    def initialize(file_name)
-        file_name = File.expand_path(file_name)
+      file_name = File.expand_path(file_name)
-        raise "Given file '#{file_name}' no file or not readable." \
+      raise "Given file '#{file_name}' no file or not readable." \
-          unless File.file?(file_name) && File.readable?(file_name)
+        unless File.file?(file_name) && File.readable?(file_name)
-        
+      
-        file = File.open(file_name)
+      file = File.open(file_name)
-        @doc = Nokogiri::XML(file)
+      @doc = Nokogiri::XML(file)
    end
    def authors
      doc.xpath("//wp:author").collect do |author|
        Author.new(author)
      end
    end
    def pages(only_published=false)
      pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
        Page.new(page)
      end
-      def authors
+      pages = pages.select(&:published?) if only_published
-        doc.xpath("//wp:author").collect do |author|
+      pages
-          Author.new(author)
+    end
-        end
+
    def posts(only_published=false)
      posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
        Post.new(post)
      end
      posts = posts.select(&:published?) if only_published
      posts
    end
-      def pages(only_published=false)
+    def tags
-        pages = doc.xpath("//item[wp:post_type = 'page']").collect do |page|
+      doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
-          Page.new(page)
+        Tag.new(tag.text)
        end
        pages = pages.select(&:published?) if only_published
        pages
      end
    end
-      def posts(only_published=false)
+    def categories
-        posts = doc.xpath("//item[wp:post_type = 'post']").collect do |post|
+      doc.xpath("//wp:category/wp:cat_name").collect do |category|
-          Post.new(post)
+        Category.new(category.text)
        end
        posts = posts.select(&:published?) if only_published
        posts
      end
    end
-      def tags
+    def attachments
-        doc.xpath("//wp:tag/wp:tag_slug").collect do |tag|
+      doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
-          Tag.new(tag.text)
+        Attachment.new(attachment)
        end
      end
      def categories
        doc.xpath("//wp:category/wp:cat_name").collect do |category|
          Category.new(category.text)
        end
      end
      def attachments
        doc.xpath("//item[wp:post_type = 'attachment']").collect do |attachment|
          Attachment.new(attachment)
        end
      end
    end
  end
--- a/lib/wordpress/page.rb
+++ b/lib/wordpress/page.rb
@ -1,107 +1,114 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Page
-    class Page
+    include ::ActionView::Helpers::TagHelper
-      include ::ActionView::Helpers::TagHelper
+    include ::ActionView::Helpers::TextHelper
      include ::ActionView::Helpers::TextHelper
-      attr_reader :node
+    attr_reader :node
-      def initialize(node)
+    def initialize(node)
-        @node = node
+      @node = node
    end
    def inspect
      "WordPress::Page(#{post_id}): #{title}"     
    end
    def title
      node.xpath("title").text
    end
    def content
      node.xpath("content:encoded").text
    end
    def content_formatted
      formatted = format_syntax_highlighter(format_paragraphs(content))
      # remove all tags inside <pre> that simple_format created
      # TODO: replace format_paragraphs with a method, that ignores pre-tags
      formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match| 
        "#{$1}#{strip_tags($2)}#{$3}"
      end
      def inspect
        "WordPress::Page(#{post_id}): #{title}"     
      end
      def title
        node.xpath("title").text
      end
      def content
        node.xpath("content:encoded").text
      end
      def content_formatted
        formatted = format_syntax_highlighter(format_paragraphs(content))
        # remove all tags inside <pre> that simple_format created
        # TODO: replace format_paragraphs with a method, that ignores pre-tags
        formatted.gsub!(/(<pre.*?>)(.+?)(<\/pre>)/m) do |match| 
          "#{$1}#{strip_tags($2)}#{$3}"
        end
        formatted
      end
      def creator
        node.xpath("dc:creator").text
      end
      def post_date
        DateTime.parse node.xpath("wp:post_date").text
      end
      def post_id
        node.xpath("wp:post_id").text.to_i
      end
      def parent_id
        dump_id = node.xpath("wp:post_parent").text.to_i
        dump_id == 0 ? nil : dump_id
      end
      def status
        node.xpath("wp:status").text
      end
      def draft?
        status != 'publish'
      end
      def published?
        ! draft?
      end
      def ==(other)
        post_id == other.post_id
      end
      def to_refinery
        page = ::Page.create!(:id => post_id, :title => title, 
          :created_at => post_date, :draft => draft?)
        page.parts.create(:title => 'Body', :body => content_formatted)
        page
      end
      private 
      def format_paragraphs(text, html_options={})
        # WordPress doesn't export <p>-Tags, so let's run a simple_format over
        # the content. As we trust ourselves, no sanatize. This code is heavily
        # inspired by the simple_format rails helper
        text = ''.html_safe if text.nil?
        start_tag = tag('p', html_options, true)
-        text.gsub!(/\r\n?/, "\n")                    # \r\n and \r -> \n
+      formatted
-        text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}")  # 2+ newline  -> paragraph
+    end
        text.insert 0, start_tag
-        text.html_safe.safe_concat("</p>")
+    def creator
-      end
+      node.xpath("dc:creator").text
    end
-      def format_syntax_highlighter(text)
+    def post_date
-        # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
+      DateTime.parse node.xpath("wp:post_date").text
-        # In WordPress you can (via a plugin) enclose code in [lang][/lang]
+    end
-        # blocks, which are converted to a <pre>-tag with a class corresponding
+
-        # to the language.
+    def post_id
-        # 
+      node.xpath("wp:post_id").text.to_i
-        # Example:
+    end
-        # [ruby]p "Hello World"[/ruby] 
+
-        # -> <pre class="brush: ruby">p "Hello world"</pre> 
+    def parent_id
-        text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
+      dump_id = node.xpath("wp:post_parent").text.to_i
-      end
+      dump_id == 0 ? nil : dump_id
    end
    def status
      node.xpath("wp:status").text
    end
    def draft?
      status != 'publish'
    end
    def published?
      ! draft?
    end
    def ==(other)
      post_id == other.post_id
    end
    #NEED:
    # dc:creator ->  "user_id"
    # wp:post_name ->   "slug"
    # pubDate -> "published_at"
    #OK:
    # title      ->  "title"
    # content:encoded ->     "body"
    # wp:post_date_gmt -> "created_at"
    def to_refinery
      page = ::Page.create!(:id => post_id, :title => title, 
        :created_at => post_date, :draft => draft?)
      page.parts.create(:title => 'Body', :body => content_formatted)
      page
    end
    private 
    def format_paragraphs(text, html_options={})
      # WordPress doesn't export <p>-Tags, so let's run a simple_format over
      # the content. As we trust ourselves, no sanatize. This code is heavily
      # inspired by the simple_format rails helper
      text = ''.html_safe if text.nil?
      start_tag = tag('p', html_options, true)
      text.gsub!(/\r\n?/, "\n")                    # \r\n and \r -> \n
      text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}")  # 2+ newline  -> paragraph
      text.insert 0, start_tag
      text.html_safe.safe_concat("</p>")
    end
    def format_syntax_highlighter(text)
      # Support for SyntaxHighlighter (http://alexgorbatchev.com/SyntaxHighlighter/):
      # In WordPress you can (via a plugin) enclose code in [lang][/lang]
      # blocks, which are converted to a <pre>-tag with a class corresponding
      # to the language.
      # 
      # Example:
      # [ruby]p "Hello World"[/ruby] 
      # -> <pre class="brush: ruby">p "Hello world"</pre> 
      text.gsub(/\[(\w+)\](.+?)\[\/\1\]/m, '<pre class="brush: \1">\2</pre>')
    end
  end
 end
--- a/lib/wordpress/post.rb
+++ b/lib/wordpress/post.rb
@ -1,85 +1,83 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Post < Page
-    class Post < Page
+    def tags
-      def tags
+      # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
-        # xml dump has "post_tag" for wordpress 3.1 and "tag" for 3.0
+      path = if node.xpath("category[@domain='post_tag']").count > 0
-        path = if node.xpath("category[@domain='post_tag']").count > 0
+        "category[@domain='post_tag']"
-          "category[@domain='post_tag']"
+      else
-        else
+        "category[@domain='tag']"
          "category[@domain='tag']"
        end
        node.xpath(path).collect do |tag_node| 
          Tag.new(tag_node.text)
        end
      end
-      def tag_list
+      node.xpath(path).collect do |tag_node| 
-        tags.collect(&:name).join(',')
+        Tag.new(tag_node.text)
      end
      def categories
        node.xpath("category[@domain='category']").collect do |cat|
          Category.new(cat.text)
        end
      end
      def comments
        node.xpath("wp:comment").collect do |comment_node|
          Comment.new(comment_node)
        end
      end
      def to_refinery
        user = ::User.find_by_username(creator) || ::User.first
        raise "Referenced User doesn't exist! Make sure the authors are imported first." \
          unless user
        begin
          post = ::BlogPost.new :title => title, :body => content_formatted,
            :draft => draft?, :published_at => post_date, :created_at => post_date,
            :user_id => user.id, :tag_list => tag_list
          post.save!
          ::BlogPost.transaction do
            categories.each do |category|
              post.categories << category.to_refinery
            end
            comments.each do |comment|
              comment = comment.to_refinery
              comment.post = post
              comment.save
            end
          end
        rescue ActiveRecord::RecordInvalid
          # if the title has already been taken (WP allows duplicates here,
          # refinery doesn't) append the post_id to it, making it unique
          post.title = "#{title}-#{post_id}"
          post.save
        end
        post
      end
      def self.create_blog_page_if_necessary
        # refinerycms wants a page at /blog, so let's make sure there is one
        # taken from the original db seeds from refinery-blog
        unless ::Page.where("link_url = ?", '/blog').exists?
          page = ::Page.create(
            :title => "Blog",
            :link_url => "/blog",
            :deletable => false,
            :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
            :menu_match => "^/blogs?(\/|\/.+?|)$"
          )
          ::Page.default_parts.each do |default_page_part|
            page.parts.create(:title => default_page_part, :body => nil)
          end
        end
      end
    end
    def tag_list
      tags.collect(&:name).join(',')
    end
    def categories
      node.xpath("category[@domain='category']").collect do |cat|
        Category.new(cat.text)
      end
    end
    def comments
      node.xpath("wp:comment").collect do |comment_node|
        Comment.new(comment_node)
      end
    end
    def to_refinery
      user = ::User.find_by_username(creator) || ::User.first
      raise "Referenced User doesn't exist! Make sure the authors are imported first." \
        unless user
      begin
        post = ::BlogPost.new :title => title, :body => content_formatted,
          :draft => draft?, :published_at => post_date, :created_at => post_date,
          :user_id => user.id, :tag_list => tag_list
        post.save!
        ::BlogPost.transaction do
          categories.each do |category|
            post.categories << category.to_refinery
          end
          comments.each do |comment|
            comment = comment.to_refinery
            comment.post = post
            comment.save
          end
        end
      rescue ActiveRecord::RecordInvalid
        # if the title has already been taken (WP allows duplicates here,
        # refinery doesn't) append the post_id to it, making it unique
        post.title = "#{title}-#{post_id}"
        post.save
      end
      post
    end
    def self.create_blog_page_if_necessary
      # refinerycms wants a page at /blog, so let's make sure there is one
      # taken from the original db seeds from refinery-blog
      unless ::Page.where("link_url = ?", '/blog').exists?
        page = ::Page.create(
          :title => "Blog",
          :link_url => "/blog",
          :deletable => false,
          :position => ((::Page.maximum(:position, :conditions => {:parent_id => nil}) || -1)+1),
          :menu_match => "^/blogs?(\/|\/.+?|)$"
        )
        ::Page.default_parts.each do |default_page_part|
          page.parts.create(:title => default_page_part, :body => nil)
        end
      end
    end
  end
 end
--- a/lib/wordpress/railtie.rb
+++ b/lib/wordpress/railtie.rb
@ -1,9 +1,7 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Railtie < Rails::Railtie
-    class Railtie < Rails::Railtie
+    rake_tasks do
-      rake_tasks do
+      load "tasks/wordpress.rake"
        load "tasks/wordpress.rake"
      end
    end
  end
 end
--- a/lib/wordpress/tag.rb
+++ b/lib/wordpress/tag.rb
@ -1,20 +1,18 @@
-module Refinery
+module WordPressImport
-  module WordPress
+  class Tag
-    class Tag
+    attr_accessor :name
      attr_accessor :name
      def initialize(text)
        @name = text
      end
      def ==(other)
        name == other.name
      end
      def to_refinery
        ::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
      end
    def initialize(text)
      @name = text
    end
    def ==(other)
      name == other.name
    end
    def to_refinery
      ::ActsAsTaggableOn::Tag.find_or_create_by_name(name)
    end
  end
 end
--- a/refinerycms-wordpress-import.gemspec
+++ b/refinerycms-wordpress-import.gemspec
@ -1,23 +0,0 @@
 # Provide a simple gemspec so you can easily use your enginex
 # project in your rails apps through git.
 Gem::Specification.new do |s|
  s.name        = "refinerycms-wordpress-import"
  s.summary     = "Import WordPress XML dumps into refinerycms(-blog)."
  s.description = "This gem imports a WordPress XML dump into refinerycms (Page, User) and refinerycms-blog (BlogPost, BlogCategory, Tag, BlogComment)"
  s.version     = "0.4.0"
  s.date        = "2011-06-13"
  s.authors     = ['Marc Remolt']
  s.email       = 'marc.remolt@googlemail.com'
  s.homepage    = 'https://github.com/mremolt/refinerycms-wordpress-import'
  s.add_dependency 'bundler', '~> 1.0'
  s.add_dependency 'refinerycms', '~> 1.0.0'
  s.add_dependency 'refinerycms-blog', '~> 1.6.2'
  s.add_dependency 'nokogiri', '~> 1.5.0'
  s.add_development_dependency 'rspec-rails'
  s.add_development_dependency 'database_cleaner'
  s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
 end
--- a/spec/lib/wordpress/attachment_spec.rb
+++ b/spec/lib/wordpress/attachment_spec.rb
@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Refinery::WordPress::Attachment, :type => :model do
+describe WordPressImport::Attachment, :type => :model do
  context "an image attchment" do
    let(:attachment) { test_dump.attachments.first }
--- a/spec/lib/wordpress/author_spec.rb
+++ b/spec/lib/wordpress/author_spec.rb
@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Refinery::WordPress::Author, :type => :model do
+describe WordPressImport::Author, :type => :model do
  let(:author) { test_dump.authors.first }
  specify { author.login.should == 'admin' }
--- a/spec/lib/wordpress/category_spec.rb
+++ b/spec/lib/wordpress/category_spec.rb
@ -1,15 +1,15 @@
 require 'spec_helper'
-describe Refinery::WordPress::Category, :type => :model do
+describe WordPressImport::Category, :type => :model do
-  let(:category) { Refinery::WordPress::Category.new('Rant') }
+  let(:category) { WordPressImport::Category.new('Rant') }
  describe "#name" do
    specify { category.name.should == 'Rant' }
  end
  describe "#==" do
-    specify { category.should == Refinery::WordPress::Category.new('Rant') }
+    specify { category.should == WordPressImport::Category.new('Rant') }
-    specify { category.should_not == Refinery::WordPress::Category.new('Tutorials') }
+    specify { category.should_not == WordPressImport::Category.new('Tutorials') }
  end
  describe "#to_refinery" do
--- a/spec/lib/wordpress/dump_spec.rb
+++ b/spec/lib/wordpress/dump_spec.rb
@ -1,10 +1,10 @@
 require 'spec_helper'
-describe Refinery::WordPress::Dump, :type => :model do
+describe WordPressImport::Dump, :type => :model do
  let(:dump) { test_dump }
  it "should create a Dump object given a xml file" do
-    dump.should be_a Refinery::WordPress::Dump
+    dump.should be_a WordPressImport::Dump
  end
  it "should include a Nokogiri::XML object" do
@ -13,12 +13,12 @@ describe Refinery::WordPress::Dump, :type => :model do
  describe "#tags" do
    let(:tags) do
-      [ Refinery::WordPress::Tag.new('css'), Refinery::WordPress::Tag.new('html'),
+      [ WordPressImport::Tag.new('css'), WordPressImport::Tag.new('html'),
-        Refinery::WordPress::Tag.new('php'), Refinery::WordPress::Tag.new('ruby')]
+        WordPressImport::Tag.new('php'), WordPressImport::Tag.new('ruby')]
    end
    specify { dump.tags.count == 4 }
-    specify { dump.tags.first.should be_a(Refinery::WordPress::Tag) }
+    specify { dump.tags.first.should be_a(WordPressImport::Tag) }
    it "should return all included tags" do
      tags.each do |tag|
@ -29,12 +29,12 @@ describe Refinery::WordPress::Dump, :type => :model do
  describe "#categories" do
    let(:categories) do
-      [ Refinery::WordPress::Category.new('Rant'), Refinery::WordPress::Category.new('Tutorials'),
+      [ WordPressImport::Category.new('Rant'), WordPressImport::Category.new('Tutorials'),
-       Refinery::WordPress::Category.new('Uncategorized') ]
+       WordPressImport::Category.new('Uncategorized') ]
    end
    specify { dump.categories.count == 4 }
-    specify { dump.categories.first.should be_a(Refinery::WordPress::Category) }
+    specify { dump.categories.first.should be_a(WordPressImport::Category) }
    it "should return all included categories" do
      categories.each do |cat|
@ -48,7 +48,7 @@ describe Refinery::WordPress::Dump, :type => :model do
      dump.pages.should have(3).pages
    end
-    specify { dump.pages.first.should be_a(Refinery::WordPress::Page) }
+    specify { dump.pages.first.should be_a(WordPressImport::Page) }
    it "should return only published pages with only_published=true" do
      dump.pages(true).should have(2).pages
@ -60,7 +60,7 @@ describe Refinery::WordPress::Dump, :type => :model do
      dump.authors.should have(1).author
    end
-    specify { dump.authors.first.should be_a(Refinery::WordPress::Author) }
+    specify { dump.authors.first.should be_a(WordPressImport::Author) }
  end
  describe "#posts" do
@ -68,7 +68,7 @@ describe Refinery::WordPress::Dump, :type => :model do
      dump.posts.should have(3).posts
    end
-    specify { dump.posts.first.should be_a(Refinery::WordPress::Post) }
+    specify { dump.posts.first.should be_a(WordPressImport::Post) }
    it "should return only published posts with only_published=true" do
      dump.posts(true).should have(2).posts
@ -80,6 +80,6 @@ describe Refinery::WordPress::Dump, :type => :model do
      dump.attachments.should have(2).attachments
    end
-    specify { dump.attachments.first.should be_a(Refinery::WordPress::Attachment) }
+    specify { dump.attachments.first.should be_a(WordPressImport::Attachment) }
  end
 end
--- a/spec/lib/wordpress/page_spec.rb
+++ b/spec/lib/wordpress/page_spec.rb
@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Refinery::WordPress::Page, :type => :model do
+describe WordPressImport::Page, :type => :model do
  let(:dump) { test_dump }
  let(:page) { test_dump.pages.last }
--- a/spec/lib/wordpress/post_spec.rb
+++ b/spec/lib/wordpress/post_spec.rb
@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Refinery::WordPress::Post, :type => :model do
+describe WordPressImport::Post, :type => :model do
  let(:post) { test_dump.posts.last }
   specify { post.title.should == 'Third blog post' }
@ -17,15 +17,15 @@ describe Refinery::WordPress::Post, :type => :model do
  describe "#categories" do
     specify { post.categories.should have(1).category }
-     specify { post.categories.first.should == Refinery::WordPress::Category.new('Rant') }
+     specify { post.categories.first.should == WordPressImport::Category.new('Rant') }
  end
  describe "#tags" do
     specify { post.tags.should have(3).tags }
-     specify { post.tags.should include(Refinery::WordPress::Tag.new('css')) }
+     specify { post.tags.should include(WordPressImport::Tag.new('css')) }
-     specify { post.tags.should include(Refinery::WordPress::Tag.new('html')) }
+     specify { post.tags.should include(WordPressImport::Tag.new('html')) }
-     specify { post.tags.should include(Refinery::WordPress::Tag.new('php')) }
+     specify { post.tags.should include(WordPressImport::Tag.new('php')) }
  end
   specify { post.tag_list.should == 'css,html,php' }
--- a/spec/lib/wordpress/tag_spec.rb
+++ b/spec/lib/wordpress/tag_spec.rb
@ -1,15 +1,15 @@
 require 'spec_helper'
-describe Refinery::WordPress::Tag, :type => :model do
+describe WordPressImport::Tag, :type => :model do
-  let(:tag) { Refinery::WordPress::Tag.new('ruby') }
+  let(:tag) { WordPressImport::Tag.new('ruby') }
  describe "#name" do
    specify { tag.name.should == 'ruby' }
  end
  describe "#==" do
-    specify { tag.should == Refinery::WordPress::Tag.new('ruby') }
+    specify { tag.should == WordPressImport::Tag.new('ruby') }
-    specify { tag.should_not == Refinery::WordPress::Tag.new('php') }
+    specify { tag.should_not == WordPressImport::Tag.new('php') }
  end
  describe "#to_refinery" do
--- a/spec/refinerycms_wordpress_import_spec.rb
+++ b/spec/refinerycms_wordpress_import_spec.rb
@ -1,7 +0,0 @@
 require 'spec_helper'
 describe Refinery::WordPress do
  it "should be valid" do
    Refinery::WordPress.should be_a(Module)
  end
 end
--- a/spec/support/helpers.rb
+++ b/spec/support/helpers.rb
@ -1,11 +1,11 @@
-module Refinery::WordPress::SpecHelpers
+module WordPressImport::SpecHelpers
  def test_dump
    file_name = File.expand_path(File.join(File.dirname(__FILE__), '../fixtures/wordpress_dump.xml'))
-    Refinery::WordPress::Dump.new(file_name) 
+    WordPressImport::Dump.new(file_name) 
  end
 end
 RSpec.configure do |config|
-  config.include Refinery::WordPress::SpecHelpers
+  config.include WordPressImport::SpecHelpers
 end
--- a/spec/wordpress_import_spec.rb
+++ b/spec/wordpress_import_spec.rb
@ -0,0 +1,7 @@
 require 'spec_helper'
 describe WordPressImport do
  it "should be valid" do
   WordPressImport.should be_a(Module)
  end
 end
--- a/wordpress-import.gemspec
+++ b/wordpress-import.gemspec
@ -0,0 +1,21 @@
 # Provide a simple gemspec so you can easily use your enginex
 # project in your rails apps through git.
 Gem::Specification.new do |s|
  s.name        = "wordpress-import"
  s.summary     = "Import WordPress XML dumps into your Ruby on Rails app."
  s.description = "This gem imports a WordPress XML dump into Rails (Page, User, BlogPost, BlogCategory, Tag, BlogComment)"
  s.version     = "0.4.1"
  s.date        = "2014-03-04"
  s.authors     = ['Will Bradley']
  s.email       = 'bradley.will@gmail.com'
  s.homepage    = 'https://github.com/zyphlar/wordpress-import'
  s.add_dependency 'bundler', '~> 1.0'
  s.add_dependency 'nokogiri', '~> 1.6.0'
  s.add_development_dependency 'rspec-rails'
  s.add_development_dependency 'database_cleaner'
  s.files = Dir["{app,lib,config}/**/*"] + ["MIT-LICENSE", "Rakefile", "Gemfile", "README.rdoc"]
 end