diff --git a/lib/tasks/wordpress.rake b/lib/tasks/wordpress.rake new file mode 100644 index 0000000..18fe125 --- /dev/null +++ b/lib/tasks/wordpress.rake @@ -0,0 +1,86 @@ +require 'nokogiri' +require 'wordpress' + +namespace :wordpress do + desc "Reset the blog relevant tables for a clean import" + task :reset do + Rake::Task["environment"].invoke + + %w(blog_categories blog_posts).each do |table_name| + p "Truncating #{table_name} ..." + ActiveRecord::Base.connection.execute "TRUNCATE TABLE #{table_name}" + end + end + + desc "Import data from a WordPress XML dump" + task :import, :file_name do |task, params| + Rake::Task["environment"].invoke + + file_name = File.absolute_path(params[:file_name]) + unless File.file?(file_name) && File.readable?(file_name) + raise "Given file '#{file_name}' no file or not readable." + end + + file = File.open(file_name) + doc = Nokogiri::XML(file) + file.close + + p "Importing blog categories ..." + doc.xpath("//wp:category/wp:cat_name").each do |category| + BlogCategory.exists?(:title => category.text) || BlogCategory.create!(:title => category.text) + end + + doc.xpath("//wp:tag/wp:tag_slug").each do |tag| + p tag.text + end + + doc.xpath("//item[wp:post_type = 'page']").each do |post| + title = post.xpath("title").text + body = post.xpath("content:encoded").text + author = post.xpath("dc:creator").text + published_at = DateTime.parse(post.xpath("wp:post_date").text) + + tags = post.xpath("category[@domain='tag'][not(@nicename)]").collect {|tag| tag.text } + tag_list = tags.join(', ') + + categories = post.xpath("category[not(@*)]").collect {|cat| cat.text } + + + p '*' * 100 + p title + p author + p published_at + p tag_list + p categories + end + end + + desc "New import (testing)" + task :new_import, :file_name do |task, params| + Rake::Task["environment"].invoke + + file_name = File.absolute_path(params[:file_name]) + unless File.file?(file_name) && File.readable?(file_name) + raise "Given file '#{file_name}' no file or not readable." + end + + dump = WordPress::Dump.new(file_name) + p dump.authors + p dump.pages + dump.posts.each do |post| + p post.title + p post.categories + p post.tags + p post.creator + #p post.content + end + end + + + desc "Import data from a WordPress XML dump into a clean database (reset first)" + task :import_clean, :file_name do |task, params| + Rake::Task["wordpress:reset"].invoke + Rake::Task["wordpress:import"].invoke(params[:file_name]) + + end +end diff --git a/lib/wordpress.rb b/lib/wordpress.rb new file mode 100644 index 0000000..9e2bb88 --- /dev/null +++ b/lib/wordpress.rb @@ -0,0 +1,11 @@ +module WordPress + +end + +require 'wordpress/author' +require 'wordpress/tag' +require 'wordpress/category' +require 'wordpress/page' +require 'wordpress/post' +require 'wordpress/comment' +require 'wordpress/dump' diff --git a/lib/wordpress/author.rb b/lib/wordpress/author.rb new file mode 100644 index 0000000..f32b0e4 --- /dev/null +++ b/lib/wordpress/author.rb @@ -0,0 +1,35 @@ +module WordPress + class Author + attr_reader :author_node + + def initialize(author_node) + @author_node = author_node + end + + def login + author_node.xpath("wp:author_login").text + end + + def email + author_node.xpath("wp:author_email").text + end + + def ==(other) + login == other.login + end + + def inspect + "WordPress::Author: #{login} <#{email}>" + end + + def to_refinery + user = User.find_or_initialize_by_username_and_email(login, email) + unless user.persisted? + user.password = 'password' + user.password_confirmation = 'password' + user.save + end + user + end + end +end diff --git a/lib/wordpress/category.rb b/lib/wordpress/category.rb new file mode 100644 index 0000000..39281bd --- /dev/null +++ b/lib/wordpress/category.rb @@ -0,0 +1,17 @@ +module WordPress + class Category + attr_accessor :name + + def initialize(text) + @name = text + end + + def ==(other) + name == other.name + end + + def to_refinery + BlogCategory.find_or_create_by_title(name) + end + end +end diff --git a/lib/wordpress/comment.rb b/lib/wordpress/comment.rb new file mode 100644 index 0000000..27efe58 --- /dev/null +++ b/lib/wordpress/comment.rb @@ -0,0 +1,46 @@ +module WordPress + class Comment + attr_reader :node + + def initialize(node) + @node = node + end + + def author + node.xpath('wp:comment_author').text + end + + def email + node.xpath('wp:comment_author_email').text + end + + def url + node.xpath('wp:comment_author_url').text + end + + def date + DateTime.parse node.xpath("wp:comment_date").text + end + + def content + node.xpath('wp:comment_content').text + end + + def approved? + node.xpath('wp:comment_approved').text.to_i == 1 + end + + def ==(other) + (email == other.email) && (date == other.date) && (content == other.content) + end + + def to_refinery + comment = BlogComment.new :name => author, :email => email + + comment.body = content + comment.created_at = date + comment.state = approved? ? 'approved' : 'rejected' + comment + end + end +end diff --git a/lib/wordpress/dump.rb b/lib/wordpress/dump.rb new file mode 100644 index 0000000..e0fbb4b --- /dev/null +++ b/lib/wordpress/dump.rb @@ -0,0 +1,42 @@ +require 'nokogiri' + +module WordPress + class Dump + attr_reader :doc + + def initialize(file_name) + file = File.open(file_name) + @doc = Nokogiri::XML(file) + end + + def authors + doc.xpath("//wp:author").collect do |author| + WordPress::Author.new(author) + end + end + + def pages + doc.xpath("//item[wp:post_type = 'page']").collect do |page| + WordPress::Page.new(page) + end + end + + def posts + doc.xpath("//item[wp:post_type = 'post']").collect do |post| + WordPress::Post.new(post) + end + end + + def tags + doc.xpath("//wp:tag/wp:tag_slug").collect do |tag| + WordPress::Tag.new(tag.text) + end + end + + def categories + doc.xpath("//wp:category/wp:cat_name").collect do |category| + WordPress::Category.new(category.text) + end + end + end +end diff --git a/lib/wordpress/page.rb b/lib/wordpress/page.rb new file mode 100644 index 0000000..18d41ab --- /dev/null +++ b/lib/wordpress/page.rb @@ -0,0 +1,57 @@ +module WordPress + class Page + attr_reader :node + + def initialize(node) + @node = node + end + + def inspect + "WordPress::Page(#{post_id}): #{title}" + end + + def title + node.xpath("title").text + end + + def content + node.xpath("content:encoded").text + end + + def creator + node.xpath("dc:creator").text + end + + def post_date + DateTime.parse node.xpath("wp:post_date").text + end + + def post_id + node.xpath("wp:post_id").text.to_i + end + + def parent_id + node.xpath("wp:post_parent").text.to_i + end + + def status + node.xpath("wp:status").text + end + + def draft? + status != 'publish' + end + + def ==(other) + post_id == other.post_id + end + + def to_refinery + page = ::Page.create!(:title => title, :created_at => post_date, + :draft => draft?, :parent_id => parent_id) + + page.parts.create(:title => 'Body', :body => content) + page + end + end +end diff --git a/lib/wordpress/post.rb b/lib/wordpress/post.rb new file mode 100644 index 0000000..21ae99c --- /dev/null +++ b/lib/wordpress/post.rb @@ -0,0 +1,49 @@ +module WordPress + class Post < Page + def tags + node.xpath("category[@domain='post_tag']").collect do |tag_node| + WordPress::Tag.new(tag_node.text) + end + end + + def tag_list + tags.collect(&:name).join(',') + end + + def categories + node.xpath("category[@domain='category']").collect do |cat| + WordPress::Category.new(cat.text) + end + end + + def comments + node.xpath("wp:comment").collect do |comment_node| + WordPress::Comment.new(comment_node) + end + end + + def to_refinery + user = User.find_by_username creator + raise "Referenced User doesn't exist! Make sure the authors are imported first." \ + unless user + + post = BlogPost.create! :title => title, :body => content, :draft => draft?, + :published_at => post_date, :created_at => post_date, :author => user, + :tag_list => tag_list + + BlogPost.transaction do + categories.each do |category| + post.categories << category.to_refinery + end + + comments.each do |comment| + comment = comment.to_refinery + comment.post = post + comment.save + end + end + + post + end + end +end diff --git a/lib/wordpress/tag.rb b/lib/wordpress/tag.rb new file mode 100644 index 0000000..9dd1378 --- /dev/null +++ b/lib/wordpress/tag.rb @@ -0,0 +1,18 @@ +module WordPress + class Tag + attr_accessor :name + + def initialize(text) + @name = text + end + + def ==(other) + name == other.name + end + + def to_refinery + ActsAsTaggableOn::Tag.find_or_create_by_name(name) + end + + end +end diff --git a/spec/lib/wordpress/dump_spec.rb b/spec/lib/wordpress/dump_spec.rb new file mode 100644 index 0000000..d0fdd36 --- /dev/null +++ b/spec/lib/wordpress/dump_spec.rb @@ -0,0 +1,256 @@ +require 'spec_helper' +require 'wordpress' + +describe WordPress::Dump, :type => :model do + let(:file_name) { File.realpath(File.join(File.dirname(__FILE__), '../../fixtures/wordpress_dump.xml')) } + let(:dump) { WordPress::Dump.new(file_name) } + + it "should create a Dump object given a xml file" do + dump.should be_a WordPress::Dump + end + + it "should include a Nokogiri::XML object" do + dump.doc.should be_a Nokogiri::XML::Document + end + + describe "#tags" do + let(:tags) do + [ WordPress::Tag.new('css'), WordPress::Tag.new('html'), + WordPress::Tag.new('php'), WordPress::Tag.new('ruby')] + end + + it "should return all included tags" do + tags.each do |tag| + dump.tags.should include(tag) + end + end + + context "the last tag" do + let(:tag) { dump.tags.last } + + describe "#to_refinery" do + before do + @tag = tag.to_refinery + end + + it "should create a ActsAsTaggableOn::Tag" do + ActsAsTaggableOn::Tag.should have(1).record + end + + it "should copy the name over to the Tag object" do + @tag.name.should == tag.name + end + end + end + end + + describe "#categories" do + let(:categories) do + [ WordPress::Category.new('Rant'), WordPress::Category.new('Tutorials'), + WordPress::Category.new('Uncategorized') ] + end + + it "should return all included categories" do + categories.each do |cat| + dump.categories.should include(cat) + end + end + + context "the last category" do + let(:category) { dump.categories.last } + + describe "#to_refinery" do + before do + @category = category.to_refinery + end + + it "should create a BlogCategory" do + BlogCategory.should have(1).record + end + + it "should copy the name over to the BlogCategory object" do + @category.title.should == category.name + end + end + end + + end + + describe "#pages" do + it "should return all included pages" do + dump.pages.should have(3).pages + end + + context "the About me page" do + let(:page) { dump.pages.last } + + it { page.title.should == 'About me' } + it { page.content.should include('Lorem ipsum dolor sit') } + it { page.creator.should == 'admin' } + it { page.post_date.should == DateTime.new(2011, 5, 21, 12, 25, 42) } + it { page.post_id.should == 10 } + it { page.parent_id.should == 8 } + + it { page.should == dump.pages.last } + it { page.should_not == dump.pages.first } + + describe "#to_refinery" do + before do + # "About me" has a parent page with id 8 in the XML dump, + # would otherwise fails creation + Page.create! :id => 8, :title => 'About' + + @count = Page.count + @page = page.to_refinery + end + + it "should create a Page object" do + Page.should have(@count + 1).record + end + + it "should copy the attributes from WordPress::Page" do + @page.title.should == page.title + @page.draft.should == page.draft? + @page.created_at.should == page.post_date + @page.parts.first.body.should == "
#{page.content}
" + end + end + end + end + + describe "#authors" do + it "should return all authors" do + dump.authors.should have(1).author + end + + context "the first author" do + let(:author) { dump.authors.first } + + it { author.login.should == 'admin' } + it { author.email.should == 'admin@example.com' } + + describe "#to_refinery" do + before do + @user = author.to_refinery + end + + it "should create a User object" do + User.should have(1).record + @user.should be_a(User) + end + + it "the @user should be persisted" do + @user.should be_persisted + end + + it "should have copied the attributes from WordPress::Author" do + author.login.should == @user.username + author.email.should == @user.email + end + end + end + end + + describe "#posts" do + it "should return all posts" do + dump.posts.should have(3).posts + end + + context "the last post" do + let(:post) { dump.posts.last } + + it { post.title.should == 'Third blog post' } + it { post.content.should include('Lorem ipsum dolor sit') } + it { post.creator.should == 'admin' } + it { post.post_date.should == DateTime.new(2011, 5, 21, 12, 24, 45) } + it { post.post_id.should == 6 } + it { post.parent_id.should == 0 } + it { post.status.should == 'publish' } + + it { post.should == dump.posts.last } + it { post.should_not == dump.posts.first } + + describe "#categories" do + it { post.categories.should have(1).category } + it { post.categories.first.should == WordPress::Category.new('Rant') } + end + + describe "#tags" do + it { post.tags.should have(3).tags } + + it { post.tags.should include(WordPress::Tag.new('css')) } + it { post.tags.should include(WordPress::Tag.new('html')) } + it { post.tags.should include(WordPress::Tag.new('php')) } + end + + it { post.tag_list.should == 'css,html,php' } + + describe "#comments" do + it "should return all attached comments" do + post.comments.should have(2).comments + end + + context "the last comment" do + let(:comment) { post.comments.last } + + it { comment.author.should == 'admin' } + it { comment.email.should == 'admin@example.com' } + it { comment.url.should == 'http://www.example.com/' } + it { comment.date.should == DateTime.new(2011, 5, 21, 12, 26, 30) } + it { comment.content.should include('Another one!') } + it { comment.should be_approved } + + it { comment.should == post.comments.last } + + describe "#to_refinery" do + before do + @comment = comment.to_refinery + end + + it "should not save the comment, only initialize it" do + BlogComment.should have(0).records + @comment.should be_new_record + end + + it "should copy the attributes from WordPress::Comment" do + @comment.name.should == comment.author + @comment.email.should == comment.email + @comment.body.should == comment.content + @comment.state.should == 'approved' + @comment.created_at.should == comment.date + end + end + end + end + + describe "#to_refinery" do + before do + User.create! :username => 'admin', :email => 'admin@example.com', + :password => 'password', :password_confirmation => 'password' + + @post = post.to_refinery + end + + it { BlogPost.should have(1).record } + + it "should copy the attributes from WordPress::Page" do + @post.title.should == post.title + @post.body.should == post.content + @post.draft.should == post.draft? + @post.published_at.should == post.post_date + @post.created_at.should == post.post_date + @post.author.username.should == post.creator + end + + it "should assign a category for each WordPress::Category" do + @post.categories.should have(post.categories.count).records + end + + it "should assign a comment for each WordPress::Comment" do + @post.comments.should have(post.comments.count).records + end + + end + end + end +end