2009-11-17 02:18:17 +00:00
|
|
|
require 'nokogiri'
|
|
|
|
require 'nanoc3'
|
2009-11-19 08:42:22 +00:00
|
|
|
require 'uri'
|
2009-11-17 02:18:17 +00:00
|
|
|
|
|
|
|
module Importer
|
2009-11-18 02:02:37 +00:00
|
|
|
|
2009-11-17 02:18:17 +00:00
|
|
|
class Wordpress
|
|
|
|
|
2009-11-19 08:42:22 +00:00
|
|
|
def initialize(wordpress_export_path, nanoc_site_path, rewrite_map_path)
|
2009-11-17 02:18:17 +00:00
|
|
|
@export_file = File.open(wordpress_export_path)
|
|
|
|
@export = Nokogiri::XML(@export_file)
|
2009-11-18 02:02:37 +00:00
|
|
|
@site = Nanoc3::Site.new(nanoc_site_path)
|
2009-11-19 08:42:22 +00:00
|
|
|
@rewrite_map = []
|
|
|
|
@rewrite_map_path = rewrite_map_path
|
2009-11-18 02:35:09 +00:00
|
|
|
|
|
|
|
load_categories
|
|
|
|
load_tags
|
|
|
|
end
|
|
|
|
|
|
|
|
def load_categories
|
2009-11-19 08:24:53 +00:00
|
|
|
puts "Loading categories"
|
2009-11-18 02:35:09 +00:00
|
|
|
@categories = {}
|
|
|
|
@export.xpath('//rss/channel/wp:category').each do |category|
|
|
|
|
name = get(category, 'wp:cat_name')
|
|
|
|
parent = get(category, 'wp:category_parent')
|
|
|
|
parent = nil if parent.empty?
|
|
|
|
@categories[name] = {
|
|
|
|
:slug => get(category, 'wp:category_nicename'),
|
|
|
|
:name => name,
|
|
|
|
:parent => parent
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def load_tags
|
2009-11-19 08:24:53 +00:00
|
|
|
puts "Loading tags"
|
2009-11-18 02:35:09 +00:00
|
|
|
@tags = {}
|
2009-11-19 08:24:53 +00:00
|
|
|
@export.xpath('//rss/channel/wp:tag').each do |tag|
|
|
|
|
slug = get(tag, 'wp:tag_slug')
|
|
|
|
@tags[slug] = {
|
|
|
|
:slug => slug,
|
|
|
|
:name => get(tag, 'wp:tag_name'),
|
|
|
|
}
|
|
|
|
end
|
2009-11-18 02:35:09 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def find_topmost_category(category)
|
|
|
|
return category if category[:parent].nil?
|
|
|
|
find_topmost_category(@categories[category[:parent]])
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def run
|
|
|
|
# Loop over each post
|
|
|
|
@export.xpath('//rss/channel/item').each do |item|
|
|
|
|
item_type = item.xpath('wp:post_type').first.text
|
|
|
|
case item_type
|
|
|
|
when 'post'
|
|
|
|
process_post(item)
|
|
|
|
when 'page'
|
|
|
|
process_page(item)
|
|
|
|
when 'attachment'
|
|
|
|
process_attachment(item)
|
|
|
|
else
|
|
|
|
puts "Unknown post type: #{item_type}"
|
|
|
|
end
|
|
|
|
end
|
2009-11-19 08:42:22 +00:00
|
|
|
write_rewrite_map
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
2009-11-18 02:02:37 +00:00
|
|
|
def get(node, xpath)
|
2009-11-19 08:24:53 +00:00
|
|
|
elem = node.at_xpath(xpath)
|
2009-11-18 02:02:37 +00:00
|
|
|
elem ? elem.content : nil
|
|
|
|
end
|
|
|
|
|
2009-11-17 02:18:17 +00:00
|
|
|
def process_post(post)
|
2009-11-19 08:32:10 +00:00
|
|
|
puts "Processing post: #{post.at_css('title').text}"
|
2009-11-18 02:02:37 +00:00
|
|
|
content = get(post, 'content:encoded')
|
|
|
|
|
|
|
|
tags = []
|
|
|
|
post.css('category[domain=tag]').each do |tag|
|
|
|
|
if tag['nicename']
|
|
|
|
tags << tag['nicename']
|
|
|
|
else
|
|
|
|
tags << tag.text.downcase
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
categories = []
|
|
|
|
post.css('category[domain=category]').each do |category|
|
|
|
|
categories << category.text
|
|
|
|
end
|
2009-11-18 02:35:09 +00:00
|
|
|
categories.uniq!
|
2009-11-18 02:02:37 +00:00
|
|
|
|
2009-11-19 08:24:53 +00:00
|
|
|
begin
|
|
|
|
post_date = Date.strptime(get(post, 'wp:post_date_gmt'), "%Y-%m-%d %H:%M:%S")
|
|
|
|
rescue ArgumentError
|
|
|
|
post_date = Date.today
|
|
|
|
end
|
|
|
|
|
2009-11-18 02:02:37 +00:00
|
|
|
attributes = {
|
|
|
|
:tags => tags.uniq,
|
2009-11-18 02:35:09 +00:00
|
|
|
:categories => categories,
|
2009-11-18 02:02:37 +00:00
|
|
|
:permalink => get(post, 'link'),
|
|
|
|
:status => get(post, 'wp:status'),
|
|
|
|
:slug => get(post, 'wp:post_name'),
|
|
|
|
:post_id => get(post, 'wp:post_id').to_i,
|
2009-11-19 08:24:53 +00:00
|
|
|
:section => find_topmost_category(@categories[categories.first])[:slug],
|
|
|
|
:title => get(post, 'title'),
|
2009-11-19 08:32:10 +00:00
|
|
|
:created_at => get(post, 'wp:post_date_gmt'),
|
2009-11-18 02:02:37 +00:00
|
|
|
}
|
2009-11-22 06:21:16 +00:00
|
|
|
attributes[:kind] = attributes[:status] == 'publish' ? 'article' : 'draft'
|
2009-11-18 02:02:37 +00:00
|
|
|
|
2009-11-18 02:35:09 +00:00
|
|
|
if attributes[:slug].empty?
|
2009-11-19 08:24:53 +00:00
|
|
|
puts "WARNING: Error post #{attributes[:post_id]} has no slug, generating one"
|
|
|
|
attributes[:slug] = attributes[:title].downcase.gsub(/[^0-9a-zA-Z]/, '-').gsub(/-{2,}/, '-')
|
2009-11-18 02:02:37 +00:00
|
|
|
end
|
|
|
|
|
2009-11-19 08:24:53 +00:00
|
|
|
path = ['', attributes[:section], post_date.year, ("%02d" % post_date.month), attributes[:slug], ''].join('/')
|
2009-11-18 02:35:09 +00:00
|
|
|
|
2009-11-19 08:24:53 +00:00
|
|
|
# require 'pp'
|
|
|
|
# pp attributes
|
2009-11-18 02:35:09 +00:00
|
|
|
|
2009-11-19 08:42:22 +00:00
|
|
|
add_item(content, attributes, path)
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
2009-11-18 02:02:37 +00:00
|
|
|
|
2009-11-17 02:18:17 +00:00
|
|
|
def process_page(page)
|
2009-11-19 08:32:10 +00:00
|
|
|
puts "WARNING: Skipping page: #{page.at_css('title').text}"
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
def process_attachment(attachment)
|
2009-11-19 08:32:10 +00:00
|
|
|
# puts "Processing attachment"
|
|
|
|
# Don't need these for now
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
|
|
|
|
2009-11-18 02:02:37 +00:00
|
|
|
def add_item(content, attributes, identifier)
|
|
|
|
# content = row['post_content']
|
|
|
|
# attributes = {
|
|
|
|
# :title => row['post_title'],
|
|
|
|
# :published_on => row['post_date_gmt'],
|
|
|
|
# :modified_on => row['post_modified_gmt'],
|
|
|
|
# :status => row['post_status'],
|
|
|
|
# :excerpt => row['post_excerpt']
|
|
|
|
# }
|
|
|
|
# identifier = '/posts/' + post_date.year.to_s + '/' + post_date.month.to_s + '/' + post_name + '/'
|
2009-11-19 08:42:22 +00:00
|
|
|
if attributes[:status] == 'publish'
|
|
|
|
@rewrite_map << [attributes[:permalink], identifier]
|
|
|
|
end
|
2009-11-18 02:02:37 +00:00
|
|
|
@site.data_sources.first.create_item(content, attributes, identifier)
|
|
|
|
puts "Added item at #{identifier}"
|
|
|
|
end
|
|
|
|
|
2009-11-19 08:42:22 +00:00
|
|
|
def write_rewrite_map
|
|
|
|
File.open(@rewrite_map_path, 'w') do |f|
|
|
|
|
@rewrite_map.each do |old_url, new_path|
|
|
|
|
uri = URI.parse(old_url)
|
|
|
|
f.puts uri.path + "\t" + new_path
|
|
|
|
puts uri.path + " => " + new_path
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|
2009-11-18 02:02:37 +00:00
|
|
|
|
2009-11-17 02:18:17 +00:00
|
|
|
end
|