2014-03-04 22:47:53 +00:00
module WordPressImport
class Dump
attr_reader :doc
def initialize ( file_name )
2014-03-07 00:25:39 +00:00
begin
file_name = File . expand_path ( file_name )
raise " error " unless File . file? ( file_name ) && File . readable? ( file_name )
rescue
raise " Given file ' #{ file_name } ' is not a file or not readable. Rake tasks take filename arguments like this: rake wordpress:full_import['/path/to/my_file'] "
end
2014-03-04 22:47:53 +00:00
file = File . open ( file_name )
2014-03-06 04:27:57 +00:00
if file . size > = 10485760 # 10MB
2014-03-07 00:25:39 +00:00
puts " WARNING: LibXML by default supports 10MB max file size. On some systems your file will be silently truncated; on others, an error will be raised. Consider splitting your file into smaller chunks and running rake tasks individually (authors, then blog/pages, then media), and double-check the import results. "
2014-03-06 04:27:57 +00:00
end
2014-03-13 01:59:31 +00:00
@doc = Nokogiri :: XML ( file . read ( ) . gsub ( " \ u0004 " , " " ) ) # get rid of all EOT characters
2014-03-04 22:47:53 +00:00
end
2011-06-01 19:11:57 +00:00
2014-03-04 22:47:53 +00:00
def authors
doc . xpath ( " //wp:author " ) . collect do | author |
Author . new ( author )
2011-06-01 19:11:57 +00:00
end
2014-03-04 22:47:53 +00:00
end
2011-06-01 19:11:57 +00:00
2014-03-04 22:47:53 +00:00
def pages ( only_published = false )
pages = doc . xpath ( " //item[wp:post_type = 'page'] " ) . collect do | page |
Page . new ( page )
2011-06-01 19:11:57 +00:00
end
2014-03-04 22:47:53 +00:00
pages = pages . select ( & :published? ) if only_published
pages
end
def posts ( only_published = false )
posts = doc . xpath ( " //item[wp:post_type = 'post'] " ) . collect do | post |
Post . new ( post )
2011-06-01 19:11:57 +00:00
end
2014-03-04 22:47:53 +00:00
posts = posts . select ( & :published? ) if only_published
posts
end
2011-06-01 19:11:57 +00:00
2014-03-04 22:47:53 +00:00
def tags
doc . xpath ( " //wp:tag/wp:tag_slug " ) . collect do | tag |
Tag . new ( tag . text )
2011-06-01 19:11:57 +00:00
end
2014-03-04 22:47:53 +00:00
end
2011-06-01 19:11:57 +00:00
2014-03-04 22:47:53 +00:00
def categories
doc . xpath ( " //wp:category/wp:cat_name " ) . collect do | category |
Category . new ( category . text )
2011-06-01 19:11:57 +00:00
end
2014-03-04 22:47:53 +00:00
end
2011-06-05 15:43:30 +00:00
2014-03-04 22:47:53 +00:00
def attachments
doc . xpath ( " //item[wp:post_type = 'attachment'] " ) . collect do | attachment |
Attachment . new ( attachment )
2011-06-05 15:43:30 +00:00
end
2011-06-01 19:11:57 +00:00
end
end
end