) $tableposts = $wpdb->posts; $tableusers = $wpdb->users; $tablecomments = $wpdb->comments; $tablecategories = $wpdb->categories; /*********************************************************************** / POSTS /**********************************************************************/ function readXML($xmlFile) { global $mydata; $xmlParser = xml_parser_create(); xml_parser_set_option($xmlParser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($xmlParser, startElement, endElement); xml_set_character_data_handler($xmlParser, characterData); switch($mydata['mechanism']) { case 'curl': $filedata = ara_blogger_import_getCURL($xmlFile); xml_parse($xmlParser, $filedata, true); break; case 'fopen': $fp = fopen($xmlFile, "r") or die('Unable to extract post from '.$xmlFile); while($data = fread($fp, 8192)) { xml_parse($xmlParser, $data, feof($fp)); } break; } xml_parser_free($xmlParser); } // Sets the current XML element, and pushes itself onto the element hierarchy function startElement($parser, $name, $attrs) { global $currentElements, $itemCount; array_push($currentElements, $name); if($name == "item"){$itemCount += 1;} } // Prints XML data; finds highlights and links function characterData($parser, $data) { global $currentElements, $postsArray, $itemCount; $currentCount = count($currentElements); $parentElement = $currentElements[$currentCount-2]; $thisElement = $currentElements[$currentCount-1]; if($parentElement == "item") { if($thisElement == 'category') $postsArray[$itemCount-1][$thisElement] .= $data . '#cat#'; else $postsArray[$itemCount-1][$thisElement] .= $data; } } // If the XML element has ended, it is poped off the hierarchy function endElement($parser, $name) { global $currentElements; $currentCount = count($currentElements); if($currentElements[$currentCount-1] == $name) { array_pop($currentElements); } } /*********************************************************************** / COMMENTS /**********************************************************************/ // Reads XML file into formatted html function readCXML($xmlFile) { global $mydata; $xmlParser = xml_parser_create(); xml_parser_set_option($xmlParser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($xmlParser, startCElement, endCElement); xml_set_character_data_handler($xmlParser, characterCData); switch($mydata['mechanism']) { case 'curl': $filedata = ara_blogger_import_getCURL($xmlFile); xml_parse($xmlParser, $filedata); break; case 'fopen': $fp = fopen($xmlFile, "r") or die('
ERROR: ['.mysql_error().'] Unable to extract comments from '.$xmlFile); while($data = fread($fp, 8192)) { xml_parse($xmlParser, $data, feof($fp)); } break; } xml_parser_free($xmlParser); } // Sets the current XML element, and pushes itself onto the element hierarchy function startCElement($parser, $name, $attrs) { global $currentCElements, $itemCCount; array_push($currentCElements, $name); if($name == "item"){$itemCCount += 1;} } // Prints XML data; finds highlights and links function characterCData($parser, $data) { global $currentCElements, $commentsArray, $itemCCount; $currentCount = count($currentCElements); $parentElement = $currentCElements[$currentCount-2]; $thisElement = $currentCElements[$currentCount-1]; if($parentElement == "item") { $commentsArray[$itemCCount-1][$thisElement] .= $data; } } // If the XML element has ended, it is poped off the hierarchy function endCElement($parser, $name) { global $currentCElements; $currentCount = count($currentCElements); if($currentCElements[$currentCount-1] == $name) { array_pop($currentCElements); } } function ara_blogger_import_getCURL($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_HEADER, 0); $filedata = curl_exec($ch); curl_close($ch); return $filedata; } function ara_blogger_import_showthanks() { global $mydata; echo '

Thanks for using this plugin.

'; echo '

Some points worth noting:

'; echo ''; echo '

If you think of anything else that can be listed here, do let me know.

'; } function ara_blogger_import_process($skip = false) { global $wpdb, $tableposts, $tablecomments, $mydata, $postsArray, $currentElements, $itemCount; //, $commentsArray, $currentCElements, $itemCCount; echo '
'; echo '

Blogger RSS Import v1.2.1

'; print '

Blogger ID: ['.$mydata['bloggerId'].']'; print '
Number of posts: ['.$mydata['totalPosts'].']'; if(strlen(trim($mydata['allatonce'])) == 0) { print '
Number of cycles: ['.$mydata['numLoop'].']'; print '
Current cycle: ['.($mydata['currLoop']).']'; print '
Number of cycles to go: ['.($mydata['currLoop']-1).']'; } print '
Download mechanism: ['.$mydata['mechanism'].']'; print '
Haloscan friendly: ['.(strlen($mydata['importhalo']) > 0 ? $mydata['importhalo'] : 'false').']'; print '
Do not import comments: ['.(strlen($mydata['nocomments']) > 0 ? $mydata['nocomments'] : 'false').']'; print '
Maintain permalinks: ['.(strlen($mydata['maintainperma']) > 0 ? $mydata['maintainperma'] : 'false').']'; print '
Import all entries at once: ['.(strlen($mydata['allatonce']) > 0 ? $mydata['allatonce'] : 'false').']'; print '
Match categories: ['.(strlen($mydata['matchcats']) > 0 ? $mydata['matchcats'] : 'false').']'; print '
Match authors: ['.(strlen($mydata['matchauthors']) > 0 ? $mydata['matchauthors'] : 'false').']'; print '
'; if(!$skip) : if(strlen($mydata['allatonce']) > 0) { for($l=$mydata['currLoop'];$l>0;$l--) { readXML($mydata['post_url'].($mydata['currLoop'] * $mydata['numposts'] - ($mydata['numposts'] - 1)),$mydata['mechanism']); $mydata['currLoop']--; } $mydata['currLoop'] = 1; } else readXML($mydata['post_url'].($mydata['currLoop'] * $mydata['numposts'] - ($mydata['numposts'] - 1)),$mydata['mechanism']); $postsArray = array_reverse($postsArray); // Now that we have the data for($x=0;$x<$itemCount;$x++) { global $commentsArray, $currentCElements, $itemCCount; unset($to_insert); $commentsArray = array(); $currentCElements = array(); $itemCCount = 0; $to_insert = array(); // Get blogpost ID - useful to retrieve comments later $postsArray[$x]['blogpost-id'] = substr($postsArray[$x]['guid'],strpos($postsArray[$x]['guid'],'.post-')+6); // Debugging display if($mydata['currLoop'] == $mydata['numLoop'] || strlen($mydata['allatonce']) > 0) print "
Post ".($mydata['totalPosts'] - $x)." read from XML: ".$postsArray[$x]['title']; //." - ".$postsArray[$x]['blogpost-id']; else print "
Post ".($mydata['numposts'] * $mydata['currLoop'] - $x)." read from XML: ".$postsArray[$x]['title']; //." - ".$postsArray[$x]['blogpost-id']; // Cleanup of javascript onblur thingies from images $postsArray[$x]['description'] = preg_replace('/\sonblur=\"(\w*\{*\}*\s*\.*\(*\)*\;*)*\"/','',$postsArray[$x]['description']); // Wed, 13 Dec 2006 15:27:00 +0000 $datetime = strtotime($postsArray[$x]['pubDate']); $postDate = date("Y-m-d H:i:s",$datetime); $postDateGMT = gmdate("Y-m-d H:i:s",$datetime); if(strlen($mydata['matchcats']) > 0) { unset($cat_IDs); $exploded_cats = explode('#cat#', $postsArray[$x]['category']); for($c=0;$c     New category '.ucwords($exploded_cats[$c]).' found'; } else $cat_IDs[] = $result; } } if(strlen($mydata['matchauthors']) > 0) { $uid = username_exists($postsArray[$x]['author']); if(empty($uid) or is_null($uid)) { $user_insert['user_login'] = $postsArray[$x]['author']; $user_insert['user_nicename'] = $postsArray[$x]['author']; $user_insert['display_name'] = $postsArray[$x]['author']; $uid = wp_insert_user($user_insert); echo '
     New user '.$postsArray[$x]['author'].' found'; } } $to_insert['post_category'] = (strlen($mydata['matchcats']) > 0 && count($cat_IDs) > 0 ? $cat_IDs : $mydata['category']); $to_insert['post_author'] = (strlen($mydata['matchauthors']) > 0 ? $uid : $mydata['user']); $to_insert['post_date'] = $postDate; $to_insert['post_date_gmt'] = $postDateGMT; if(strlen($mydata['importhalo']) > 0) $to_insert['post_content'] = '' . str_replace(array('
','
','
','
','
','
'), "\n", addslashes($postsArray[$x]['description'])); else $to_insert['post_content'] = str_replace(array('
','
','
','
','
','
'), "\n", addslashes($postsArray[$x]['description'])); $to_insert['post_title'] = addslashes($postsArray[$x]['title']); $to_insert['post_status'] = 'publish'; if(strlen($mydata['maintainperma']) > 0) { // Get post name from blogger $link = preg_replace('/http:\/\/\w+(-|_*\w*)*.\w+.\w+\/\d+\/\d+\//','',$postsArray[$x]['link']); $link = str_replace('.html','',$link); $link = str_replace('.htm','',$link); $to_insert['post_name'] = $link; echo '
     Permalink captured: '.$link.''; } else $to_insert['post_name'] = sanitize_title_with_dashes($postsArray[$x]['title']); $Post_ID = wp_insert_post($to_insert); if(strlen($mydata['nocomments']) == 0) { // Look for the number of comments in this post $number_comments = ara_blogger_import_getnumcomments($postsArray[$x]['blogpost-id']); // Setting number of loops, by default take 100 comments at a time (hopefully won't be a problem) $numCLoop = ceil($number_comments/100); // Extract comments based on post ID for($cc=0;$cc<$numCLoop;$cc++){ readCXML(str_replace('XXXXXXXXXXXX',$postsArray[$x]['blogpost-id'],$mydata['comment_url']).(($cc * 100) + 1),$mydata['mechanism']); } for($y=0;$y<$itemCCount;$y++) { $datetime = strtotime($commentsArray[$y]['pubDate']); $commentDate = date("Y-m-d H:i:s",$datetime); $commentDateGMT = gmdate("Y-m-d H:i:s",$datetime); $comment_insert['comment_post_ID'] = $Post_ID; $comment_insert['comment_author'] = addslashes($commentsArray[$y]['author']); $comment_insert['comment_author_email'] = ''; $comment_insert['comment_author_url'] = (isset($commentsArray[$y]['uri']) ? $commentsArray[$y]['uri'] : ''); $comment_insert['comment_date'] = $commentDate; $comment_insert['comment_date_gmt'] = $commentDateGMT; $comment_insert['comment_content'] = addslashes($commentsArray[$y]['description']); $comment_insert['comment_approved'] = '1'; $comment_insert['user_id'] = 0; wp_insert_comment($comment_insert); } echo "
      $y comments captured along..."; } } else: echo '

Cycle skipped

'; endif; if($mydata['currLoop'] == 1) { echo '

Blogger RSS Import v1.2.1

'; echo '

You\'re done!

'; ara_blogger_import_showthanks(); echo '

Now if you want... you can map these comment authors to their website and emails, below.

'; // Now look for comments author if user wants to $comment_authors = $wpdb->get_results("SELECT comment_author,count(comment_author) AS count,comment_author_email,comment_author_url FROM $tablecomments GROUP BY comment_author ORDER BY count DESC"); $numrows = 0; echo '
'; echo ''; echo ''; foreach ($comment_authors as $author) { echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; $numrows++; } echo '
AuthorNo of commentsEmailWebsite (please include http://)
'.$author->comment_author.''.$author->count.'
'; echo ''; echo ''; echo '
'; } else { echo '

You are not done, there are more posts to be imported.

'; echo '
'; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; // this might not be needed anymore echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ''; echo ' '; echo '
'; } } function ara_blogger_import_getmetadata($post) { global $mydata; switch($mydata['mechanism']) { case 'curl': $filedata = ara_blogger_import_getCURL($mydata['basic_url']); break; case 'fopen': $fp = fopen($mydata['basic_url'],"r") or die('
ERROR: ['.mysql_error().'] Unable to extract initial data from '.$mydata['basic_url']); $filedata = fread($fp,8192); fclose($fp); break; } $bloggerId = substr($filedata, strpos($filedata,'')+9, strpos($filedata,'')-strpos($filedata,'')-9); $mydata['bloggerId'] = preg_replace('/tag:\w+\.\w+\,\d+:blog-/','',$bloggerId); // Too lazy to parse XML $mydata['totalPosts'] = substr($filedata, strpos($filedata,'')+25, strpos($filedata,'')-strpos($filedata,'')-25); $numLoop = ceil($mydata['totalPosts']/$mydata['numposts']); $mydata['numLoop'] = $numLoop; $mydata['currLoop'] = $numLoop; } function ara_blogger_import_getnumcomments($Post_ID) { global $mydata; $comment_basic_url = 'http://'.$mydata['blogger'].'.blogspot.com/feeds/XXXXXXXXXXXX/comments/full?alt=rss&max-results=1&start-index=1'; $comment_basic_url = str_replace('XXXXXXXXXXXX',$Post_ID,$comment_basic_url); switch($mydata['mechanism']) { case 'curl': $filedata = ara_blogger_import_getCURL($comment_basic_url); break; case 'fopen': $fp = fopen($comment_basic_url,"r") or die('
ERROR: ['.mysql_error().'] Unable to get comment information from '.$mydata['comment_basic_url']); $filedata = fread($fp,8192); fclose($fp); break; } // Too lazy to parse XML $numComments = substr($filedata, strpos($filedata,'')+25, strpos($filedata,'')-strpos($filedata,'')-25); return $numComments; } function ara_blogger_import_page() { global $mydata, $wpdb, $tablecomments; if (isset($_POST['blogger']) && !isset($_POST['next'])) { $mydata['allatonce'] = $_POST['allatonce']; if(strlen($mydata['allatonce']) > 0) $mydata['numposts'] = 100; else $mydata['numposts'] = $_POST['numposts']; // Get general variables $basic_url = 'http://'.$_POST['blogger'].'.blogspot.com/feeds/posts/full?alt=rss&max-results=1&start-index=1'; $post_url = 'http://'.$_POST['blogger'].'.blogspot.com/feeds/posts/full?alt=rss&max-results='.$mydata['numposts'].'&start-index='; $comment_url = 'http://'.$_POST['blogger'].'.blogspot.com/feeds/XXXXXXXXXXXX/comments/full?alt=rss&max-results=100&start-index='; // Decide which file mechanism to use - 1. fopen, 2. curl if(ini_get('allow_url_fopen')) { $mechanism = 'fopen'; } else if(function_exists('curl_init')) { $mechanism = 'curl'; } else { $mechanism = 'null'; echo '

Blogger RSS Import v1.2.1

'; echo 'Sorry, fopen is not allowed to open URLs (allow_url_fopen), and no curl extension is found on your host. Process aborted.'; exit; } $mydata['blogger'] = $_POST['blogger']; $mydata['basic_url'] = $basic_url; $mydata['post_url'] = $post_url; $mydata['comment_url'] = $comment_url; $mydata['mechanism'] = $mechanism; $mydata['importhalo'] = $_POST['importhalo']; $mydata['maintainperma'] = $_POST['maintainperma']; $mydata['matchcats'] = $_POST['matchcats']; $mydata['matchauthors'] = $_POST['matchauthors']; $mydata['nocomments'] = $_POST['nocomments']; ara_blogger_import_getmetadata($_POST); if(isset($_POST['skip']) && strlen($_POST['skip']) > 0) ara_blogger_import_process(true); else ara_blogger_import_process(); } else if (isset($_POST['next'])) { global $mydata; // Manipulate mydata first $mydata['blogger'] = $_POST['blogger']; $mydata['bloggerId'] = $_POST['bloggerId']; $mydata['numposts'] = $_POST['numposts']; $mydata['totalPosts'] = $_POST['totalPosts']; $mydata['numLoop'] = $_POST['numLoop']; $mydata['currLoop'] = $_POST['currLoop']-1; $mydata['basic_url'] = $_POST['basic_url']; // this might not be needed anymore $mydata['post_url'] = $_POST['post_url']; $mydata['comment_url'] = $_POST['comment_url']; $mydata['mechanism'] = $_POST['mechanism']; $mydata['importhalo'] = $_POST['importhalo']; $mydata['maintainperma'] = $_POST['maintainperma']; $mydata['matchcats'] = $_POST['matchcats']; $mydata['matchauthors'] = $_POST['matchauthors']; $mydata['nocomments'] = $_POST['nocomments']; ara_blogger_import_process(); } else if (isset($_POST['skip'])) { global $mydata; // Manipulate mydata first $mydata['blogger'] = $_POST['blogger']; $mydata['bloggerId'] = $_POST['bloggerId']; $mydata['numposts'] = $_POST['numposts']; $mydata['totalPosts'] = $_POST['totalPosts']; $mydata['numLoop'] = $_POST['numLoop']; $mydata['currLoop'] = $_POST['currLoop']-1; $mydata['basic_url'] = $_POST['basic_url']; // this might not be needed anymore $mydata['post_url'] = $_POST['post_url']; $mydata['comment_url'] = $_POST['comment_url']; $mydata['mechanism'] = $_POST['mechanism']; $mydata['importhalo'] = $_POST['importhalo']; $mydata['maintainperma'] = $_POST['maintainperma']; $mydata['matchcats'] = $_POST['matchcats']; $mydata['matchauthors'] = $_POST['matchauthors']; $mydata['nocomments'] = $_POST['nocomments']; ara_blogger_import_process(true); } else if(isset($_POST['email0'])) { echo '
'; echo '

Blogger RSS Import v1.2.1

'; for($x=0;$x<$_POST['commentcount'];$x++) { if($_POST['email'.$x] != '' || $_POST['website'.$x] != '') { $sql = "UPDATE $tablecomments SET "; if($_POST['email'.$x] != '') $sql.= 'comment_author_email = \''.$_POST['email'.$x].'\' '; if($_POST['email'.$x] != '' && $_POST['website'.$x] != '') $sql.=','; if($_POST['website'.$x] != '') $sql.= 'comment_author_url=\''.$_POST['website'.$x].'\' '; $sql.= 'WHERE comment_author = \''.$_POST['author'.$x].'\''; $wpdb->query($sql); echo '
Updated comment author '.$_POST['author'.$x]; } } echo '

Now you\'re really done!

'; echo '
'; } else { global $wpdb, $tableusers, $tablecategories; ?>

Blogger RSS Import v1.2.1

Blogger ID:.blogspot.com
Default Author:
Match authors from my blogger to WordPress, create if not exist
Default Category:
Match categories from my blogger to WordPress, create if not exist
Number of posts per import cycle: *Change this only if you encounter problems due to bandwidth or server speed or you encounter problems with max_execution_time setting
I want to import everything all at once as my server connection to blogger is very fast and my server has big max_execution_time setting
Haloscan friendly import (to be used with this)
Do not import comments (useful if your blog only uses Haloscan, for example)
Maintain my permalinks structure (the post name)