From d5f28162047750ef3271454d0984e163dc07b9ad Mon Sep 17 00:00:00 2001 From: Wesley Moore Date: Wed, 18 Nov 2009 13:02:37 +1100 Subject: [PATCH] Implement post importing --- importer/import.rb | 7 ++++- importer/wordpress.rb | 61 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/importer/import.rb b/importer/import.rb index 121c5bd..2dbbbe0 100644 --- a/importer/import.rb +++ b/importer/import.rb @@ -2,6 +2,11 @@ require 'rubygems' require 'wordpress' -i = Importer::Wordpress.new('wezm.net.2009-11-17.xml') +if ARGV.size < 2 + puts "Usage importer.rb worpress-export.xml /path/to/nanoc/site" + exit 3 +end + +i = Importer::Wordpress.new(ARGV[0], ARGV[1]) i.run diff --git a/importer/wordpress.rb b/importer/wordpress.rb index abf8fe1..bfc7be5 100644 --- a/importer/wordpress.rb +++ b/importer/wordpress.rb @@ -2,12 +2,13 @@ require 'nokogiri' require 'nanoc3' module Importer - + class Wordpress - def initialize(wordpress_export_path) + def initialize(wordpress_export_path, nanoc_site_path) @export_file = File.open(wordpress_export_path) @export = Nokogiri::XML(@export_file) + @site = Nanoc3::Site.new(nanoc_site_path) end def run @@ -29,10 +30,48 @@ module Importer protected + def get(node, xpath) + elem = node.xpath(xpath).first + elem ? elem.content : nil + end + def process_post(post) puts "Processing post: #{post.css('title').first.text}" + content = get(post, 'content:encoded') + + tags = [] + post.css('category[domain=tag]').each do |tag| + if tag['nicename'] + tags << tag['nicename'] + else + tags << tag.text.downcase + end + end + + categories = [] + post.css('category[domain=category]').each do |category| + categories << category.text + end + + attributes = { + :tags => tags.uniq, + :categories => categories.uniq, + :permalink => get(post, 'link'), + :status => get(post, 'wp:status'), + :slug => get(post, 'wp:post_name'), + :post_id => get(post, 'wp:post_id').to_i, + :post_date => get(post, 'wp:post_date_gmt'), + } + + unless attributes[:slug] + puts "Error post #{post_id} has no slug" + return + end + + path = "/articles/#{slug}" + add_item(content, attributes, identifier) end - + def process_page(page) puts "Processing page: #{page.css('title').first.text}" end @@ -41,6 +80,20 @@ module Importer puts "Processing attachment" end + def add_item(content, attributes, identifier) + # content = row['post_content'] + # attributes = { + # :title => row['post_title'], + # :published_on => row['post_date_gmt'], + # :modified_on => row['post_modified_gmt'], + # :status => row['post_status'], + # :excerpt => row['post_excerpt'] + # } + # identifier = '/posts/' + post_date.year.to_s + '/' + post_date.month.to_s + '/' + post_name + '/' + @site.data_sources.first.create_item(content, attributes, identifier) + puts "Added item at #{identifier}" + end + end - + end