adding untracked directories for plugins and new browser_extensions directory

Chris Pollett [2011-06-07 07:Jun:th]
adding untracked directories for plugins and new browser_extensions directory
Filename
browser_extensions/firefox/TOOLBAR INSTALL.txt
browser_extensions/firefox/yiooptoolbar/chrome.manifest
browser_extensions/firefox/yiooptoolbar/chrome/content/main.css
browser_extensions/firefox/yiooptoolbar/chrome/content/main.js
browser_extensions/firefox/yiooptoolbar/chrome/content/main.xul
browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_16x16.png
browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_32x32.png
browser_extensions/firefox/yiooptoolbar/install.rdf
lib/indexing_plugins/indexing_plugin.php
lib/indexing_plugins/recipe_plugin.php
lib/processors/page_processor.php
diff --git a/browser_extensions/firefox/TOOLBAR INSTALL.txt b/browser_extensions/firefox/TOOLBAR INSTALL.txt
new file mode 100644
index 000000000..f3856b08c
--- /dev/null
+++ b/browser_extensions/firefox/TOOLBAR INSTALL.txt
@@ -0,0 +1,12 @@
+Steps to install the add-on
+
+Follow the below steps to install the project
+
+   1. Download the project zip file. Save it on your desktop.
+   2. Unzip the file.
+   3. Create a file named "sampletest@example.com".
+      Give the path of your extension folder in that file.
+      Eg: C:\Documents and Settings\<username>\Desktop\extensions\my_extension\
+   4. Save that file at the following location
+      C:\Documents and Settings\<username>\Application Data\Mozilla\Firefox\Profiles\<somename>.default\extensions\
+   5. Start the Firefox browser.
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome.manifest b/browser_extensions/firefox/yiooptoolbar/chrome.manifest
new file mode 100644
index 000000000..d77cbcbee
--- /dev/null
+++ b/browser_extensions/firefox/yiooptoolbar/chrome.manifest
@@ -0,0 +1,5 @@
+content     yiooptoolbar    chrome/content/
+overlay chrome://browser/content/browser.xul chrome://yiooptoolbar/content/main.xul
+skin yioop_toolbar classic/1.0 chrome/skin/
+
+
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome/content/main.css b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.css
new file mode 100644
index 000000000..23d5548e3
--- /dev/null
+++ b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.css
@@ -0,0 +1,3 @@
+#toolbar-button {
+    list-style-image: url("chrome://yiooptoolbar/content/yioop_16x16.png");
+}
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome/content/main.js b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.js
new file mode 100644
index 000000000..f8b64d6db
--- /dev/null
+++ b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.js
@@ -0,0 +1,160 @@
+
+function createXHR(){
+    var request = false;
+    try {
+        request = new XMLHttpRequest();
+    }
+    catch (err1) {
+        request = false;
+    }
+    return request;
+}
+
+/**
+ * Deletes the rows in the table after sending
+ * toolbar data to the Yioop!
+ */
+function deleteRows(){
+    var file = Components.classes["@mozilla.org/file/directory_service;1"]
+        .getService(Components.interfaces.nsIProperties)
+        .get("ProfD", Components.interfaces.nsIFile);
+    file.append("user_searchcapture.sqlite");
+
+    var storageService = Components.classes["@mozilla.org/storage/service;1"]
+        .getService(Components.interfaces.mozIStorageService);
+    var mDBConn = storageService.openDatabase(file);
+    // Will also create the file if it does not exist
+
+    var statement = mDBConn.createStatement("DELETE FROM search_capture");
+    statement.executeAsync();
+}
+
+/**
+ * Makes a legitimate POST request to Yioop!
+ * to send toolbar data to the Yioop!
+ */
+
+function uploadAsyc(url, record){
+    // url is the script and data is a string of parameters
+    params = "c=toolbar&a=toolbarTraffic&b=" + record;
+    var xhr = createXHR();
+    xhr.onreadystatechange=function(){
+        if(xhr.readyState == 4)
+            {
+                // calls deleteRowsfunction on staus Ok
+                if(xhr.status == 200){
+                    deleteRows();
+                }
+            }
+    };
+    xhr.open("POST", url, true);
+    xhr.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
+    xhr.send(params);
+}
+
+/**
+ * Creates the sqlite database in profiles folder.
+ * creates and insers the required captured data from user clicks
+ * @ params event is to capture the user click event from linkclick funtion.
+ */
+
+function getword(event){
+    var language1 = content.document.getElementsByTagName("html")[0]
+        .getAttribute("lang");
+    if(language1 == null){
+        var language1 = content.document.getElementsByTagName("html")[0]
+            .getAttribute("xml:lang");
+    }
+    var file = Components.classes["@mozilla.org/file/directory_service;1"]
+        .getService(Components.interfaces.nsIProperties)
+        .get("ProfD", Components.interfaces.nsIFile);
+    file.append("user_searchcapture.sqlite");
+
+    var storageService = Components.classes["@mozilla.org/storage/service;1"]
+        .getService(Components.interfaces.mozIStorageService);
+    var mDBConn = storageService.openDatabase(file);
+    // Will also create the file if it does not exist
+
+    mDBConn.executeSimpleSQL("CREATE TABLE IF NOT EXISTS search_capture " +
+        "(word TEXT, searchurl TEXT, searchurl1 TEXT, " +
+        "timestamp TEXT, language TEXT)");
+
+    var stmt = mDBConn.createStatement("INSERT INTO search_capture " +
+        "(word,searchurl,searchurl1,timestamp,language) " +
+        "VALUES(:word1,:url1,:url2,:time1,:lang1)");
+
+    var params = stmt.newBindingParamsArray();
+
+    stmt.params.word1 = event.target.innerHTML;
+    stmt.params.url1 = window.content.location.href;
+    stmt.params.url2 = event.target.href;
+    stmt.params.time1 = new Date();
+    stmt.params.lang1 = language1;
+    stmt.executeAsync();
+
+    sendCaptureTest();
+    void commitTransaction();
+}
+
+/**
+ * Retrieves all the rows from the search_capture table and
+ * checks if the rows reached to the count 10 if true
+ * then calls the uploadAsync function to send toolbar data to Yioop!
+ */
+
+function sendCaptureTest(){
+    var yioopurl = "http://www.yioop.com/";
+    var file = Components.classes["@mozilla.org/file/directory_service;1"]
+        .getService(Components.interfaces.nsIProperties)
+        .get("ProfD", Components.interfaces.nsIFile);
+    file.append("user_searchcapture.sqlite");
+
+    var storageService = Components.classes["@mozilla.org/storage/service;1"]
+        .getService(Components.interfaces.mozIStorageService);
+    var mDBConn = storageService.openDatabase(file);
+    // Will also create the file if it does not exist
+
+    var colnew = new Array();
+    var statement = mDBConn.createStatement("SELECT * FROM search_capture");
+
+    statement.executeAsync({
+        handleResult: function(aResultSet) {
+        var i = 0;
+        let row = aResultSet.getNextRow();
+
+    for (var row = aResultSet.getNextRow(); row; row = aResultSet.getNextRow()){
+        colnew[i] = row.getResultByName("word") + "|:|"
+        + row.getResultByName("searchurl") + "|:|"
+        + row.getResultByName("searchurl1")+ "|:|"
+        + row.getResultByName("timestamp") +  "|:|"
+        + row.getResultByName("language") + "\n";
+        ++i;
+    }
+        if(colnew.length >= 10){
+            uploadAsyc(yioopurl, colnew);
+        }
+       },
+        handleError: function(aError) {
+            alert("Error: " + aError.message);
+            },
+
+        handleCompletion: function(aReason) {
+            if (aReason != Components.interfaces
+                .mozIStorageStatementCallback.REASON_FINISHED)
+                alert("Query canceled or aborted!");
+        }
+    });
+    commitTransaction();
+}
+
+/**
+ * The very begining function which is loaded when a Firefox window with the
+ * Smart seach toolbar add-on. This stores all the hyperlinks in web page then
+ * calls the getword function on the click event i.e when user clciks on a link.
+ */
+function linkclick() {
+    var len = content.document.getElementsByTagName("a");
+    for (var i=0; i<len.length; i++) {
+        len[i].addEventListener("click", getword, true) //invoke function
+    }
+}
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome/content/main.xul b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.xul
new file mode 100644
index 000000000..2a5c1ad9b
--- /dev/null
+++ b/browser_extensions/firefox/yiooptoolbar/chrome/content/main.xul
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+
+<?xml-stylesheet
+    href="chrome://yiooptoolbar/content/main.css" type="text/css" ?>
+<overlay id="sample"
+        xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul" >
+
+<script type="application/x-javascript"
+    src="chrome://yiooptoolbar/content/main.js" />
+
+<script>
+    document.addEventListener("load", function() { linkclick(); }, true);
+</script>
+
+<window id="main-window">
+
+<toolbox id="navigator-toolbox">
+
+<toolbar id="tool-toolbar" toolbarname="Smartsearch Toolbar" accesskey="T"
+    class="chromeclass-toolbar" context="toolbar-context-menu"
+    hidden="false" persist="hidden">
+
+<toolbarbutton id="toolbar-button" label="SmartSearch" value="3"/>
+</toolbar>
+</toolbox>
+</window>
+</overlay>
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_16x16.png b/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_16x16.png
new file mode 100644
index 000000000..4a9f6b46d
Binary files /dev/null and b/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_16x16.png differ
diff --git a/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_32x32.png b/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_32x32.png
new file mode 100644
index 000000000..35d06d6ec
Binary files /dev/null and b/browser_extensions/firefox/yiooptoolbar/chrome/content/yioop_32x32.png differ
diff --git a/browser_extensions/firefox/yiooptoolbar/install.rdf b/browser_extensions/firefox/yiooptoolbar/install.rdf
new file mode 100644
index 000000000..f5d06c7ff
--- /dev/null
+++ b/browser_extensions/firefox/yiooptoolbar/install.rdf
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+
+<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+     xmlns:em="http://www.mozilla.org/2004/em-rdf#">
+
+<Description about="urn:mozilla:install-manifest">
+    <em:id>yioop@seekquarry.com</em:id>
+    <em:version>1.0</em:version>
+    <em:type>2</em:type>
+
+<!-- Target Application this extension can install into,
+    with minimum and maximum supported versions. -->
+<em:targetApplication>
+    <Description>
+    <em:id>{ec8030f7-c20a-464f-9b0e-13a3a9e97384}</em:id>
+    <em:minVersion>1.5</em:minVersion>
+    <em:maxVersion>4.0.*</em:maxVersion>
+    </Description>
+</em:targetApplication>
+
+<!-- Front End MetaData -->
+    <em:name>Yioop! Toolbar</em:name>
+    <em:creator>Seekquarry.com</em:creator>
+    <em:description>Used to send web traffic data to the Yioop!
+Search Engine</em:description>
+    <em:homepageURL>http://www.seekquarry.com/</em:homepageURL>
+    <em:iconURL>chrome://yiooptoolbar/content/yioop_32x32.png</em:iconURL>
+
+</Description>
+</RDF>
diff --git a/lib/indexing_plugins/indexing_plugin.php b/lib/indexing_plugins/indexing_plugin.php
new file mode 100644
index 000000000..00348a3db
--- /dev/null
+++ b/lib/indexing_plugins/indexing_plugin.php
@@ -0,0 +1,79 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2011 Priya Gangaraju priya.gangaraju@gmail.com
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Priya Gangaraju priya.gangaraju@gmail.com
+ * @package seek_quarry
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/**
+ * Indexing Component Class
+ * @author Priya Gangaraju
+ * @package seek_quarry
+ * @subpackage component
+ */
+
+/** Some models might interface with a DBMS so load the DBMS manager*/
+require_once BASE_DIR."/models/datasources/".DBMS."_manager.php";
+
+abstract class IndexingPlugin
+{
+   /**
+    * list of models
+    */
+    var $processors = array();
+    var $models = array();
+    var $index_archive;
+    var $db;
+
+    function __construct()
+    {
+        $db_class = ucfirst(DBMS)."Manager";
+        $this->db = new $db_class();
+
+        require_once BASE_DIR."/models/model.php";
+
+        foreach($this->models as $model) {
+            require_once BASE_DIR."/models/".$model."_model.php";
+
+            $model_name = ucfirst($model)."Model";
+            $model_instance_name = lcfirst($model_name);
+
+            $this->$model_instance_name = new $model_name();
+        }
+
+    }
+
+    static function getProcessors() {return NULL;}
+
+    static function getAdditionalMetaWords() {return array();}
+
+    abstract function postProcessing($index_name);
+}
+?>
diff --git a/lib/indexing_plugins/recipe_plugin.php b/lib/indexing_plugins/recipe_plugin.php
new file mode 100644
index 000000000..b1a4d9bb1
--- /dev/null
+++ b/lib/indexing_plugins/recipe_plugin.php
@@ -0,0 +1,825 @@
+
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2011  Priya Gangaraju priya.gangaraju@gmail.com
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Priya Gangaraju priya.gangaraju@gmail.com
+ * @package seek_quarry
+ * @subpackage component
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/**
+ * Flag to say that post_processing is occurring (used to control logging in
+ * models)
+ */
+define("POST_PROCESSING", true);
+
+/**
+ * Ratio of clusters/total number of recipes seen
+ */
+define("CLUSTER_RATIO", 0.1);
+
+/** Base indexing plugin class*/
+require_once BASE_DIR."/lib/indexing_plugins/indexing_plugin.php";
+/** Used to create index shards to add ingredient: entries
+ *  to index
+ */
+require_once BASE_DIR."/lib/index_shard.php";
+/** Used to extract text from documents*/
+require_once BASE_DIR."/lib/phrase_parser.php";
+/** Get the crawlHash function */
+require_once BASE_DIR."/lib/utility.php";
+/** Loads common constants for web crawling */
+require_once BASE_DIR."/lib/crawl_constants.php";
+
+/**
+ * This class handles recipe processing.
+ * It extracts ingredients from the recipe pages while crawling.
+ * It clusters the recipes using Kruskal's minimum spanning tree
+ * algorithm after crawl is stopped. This plugin was designed by
+ * looking at what was needed to screen scrape recipes from the
+ * following sites:
+ *
+ * http://allrecipes.com/
+ * http://www.food.com/
+ * http://www.betterrecipes.com/
+ * http://www.foodnetwork.com/
+ * http://www.bettycrocker.com/
+ *
+ *
+ * @author Priya Gangaraju
+ * @package seek_quarry
+ * @subpackage component
+ */
+
+class RecipePlugin extends IndexingPlugin implements CrawlConstants
+{
+
+    /**
+     * The models used by this indexing plugin
+     * @var array
+     */
+    var $models = array("phrase", "locale", "crawl");
+
+
+    /**
+     * Which mime type page processors this plugin should do additional
+     * processing for
+     *
+     * @return array an array of page processors
+     */
+    static function getProcessors()
+    {
+        return array("HtmlProcessor");
+    }
+
+    /**
+     * Returns an array of additional meta words which have been added by
+     * this plugin
+     *
+     * @return array meta words and maximum description length of results
+     *      allowed for that meta word (in this case 2000 as want
+     *      to allow sufficient descriptions of whole recipes)
+     */
+    static function getAdditionalMetaWords()
+    {
+        return array("recipe:" => 2000, "ingredient:" => 2000);
+    }
+
+    /**
+     * Extracts title and description from a recipe page. This is
+     * called by the PageProcessor (or subclass) handle($page, $url) method
+     *
+     * @param object $dom a document object to extract a description from.
+     * @return string a description of the page
+     */
+    function pageProcessing($page, $url)
+    {
+        $page = preg_replace('@<script[^>]*?>.*?</script>@si', ' ', $page);
+        $page = preg_replace('/>/', '> ', $page);
+        $dom = HtmlProcessor::dom($page);
+        if($dom == NULL) return NULL;
+
+        $xpath = new DOMXPath($dom);
+        $recipes_per_page = $xpath->evaluate(
+           "/html//div[@class = 'ingredients'] |
+            /html//div[@class = 'body-text'] |
+            /html//ul[@class = 'clr'] |
+            /html//div[@class = 'recipeDetails']
+                /ul[@class='ingredient_list']");
+        $recipe = array();
+        $subdocs_description = array();
+        if($recipes_per_page->length != 0) {
+            $recipes_count = $recipes_per_page->length;
+            $titles = $xpath->evaluate(
+                "/html//div[@class='rectitle'] |
+               /html//h1[@class = 'fn'] |
+               /html//div[@class =
+                'pod about-recipe clrfix']/p |
+               /html//h1[@class = 'recipeTitle']");
+            for($i=0; $i<$recipes_count;$i++) {
+                $ingredients = $xpath->evaluate("/html//div[@class =
+                    'ingredients']/ul/li |
+                    /html//div[@class = 'body-text']
+                    /ul/li[@class = 'ingredient'] |
+                    /html//ul[@class = 'clr']/li |
+                    /html//div[@class = 'recipeDetails']
+                    /ul[@class='ingredient_list']/li |
+                    /html//div[@class = 'ingredients']
+                    /table/tr[@class = 'ingredient']");
+                $ingredients_result = "";
+                if($ingredients->length != 0){
+                    $lastIngredient = end($ingredients);
+                    foreach($ingredients as $ingredient) {
+                        $content = trim($ingredient->textContent);
+                        if(!empty($content)) {
+                            if($content  != $lastIngredient)
+                                $ingredients_result .= $content."||";
+                            else
+                                $ingredients_result .= $content;
+                        }
+                    }
+                    $ingredients_result = mb_ereg_replace(
+                        "(\s)+", " ", $ingredients_result);
+                }
+                $recipe[self::TITLE] = $titles->item($i)->textContent;
+                $recipe[self::DESCRIPTION] = $ingredients_result;
+                $subdocs_description[] = $recipe;
+            }
+        }
+
+        return $subdocs_description;
+    }
+
+
+    /**
+     * Implements post processing of recipes. recipes are extracted
+     * ingredients are scrubbed and recipes are clustered. The clustered
+     * recipes are added back to the index.
+     *
+     * @param string $index_name  index name of the current crawl.
+     */
+    function postProcessing($index_name)
+    {
+        $this->phraseModel->index_name = $index_name;
+        $this->crawlModel->index_name = $index_name;
+
+        $index_archive_name = self::index_data_base_name . $index_name;
+        $index_archive = new IndexArchiveBundle(
+            CRAWL_DIR.'/cache/'.$index_archive_name);
+        $query_iterator = new WordIterator(crawlHash("recipe:all"),
+            $index_archive);
+        $raw_recipes = array();
+        while(is_array($next_docs = $query_iterator->nextDocsWithWord())) {
+            foreach($next_docs as $doc_key => $doc_info) {
+                $summary = & $doc_info[CrawlConstants::SUMMARY];
+                $summary['KEY'] = $doc_key;
+                $tmp = unserialize($query_iterator->getIndex(
+                    $doc_key)->description);
+                $doc_info[self::CRAWL_TIME] = $tmp[self::CRAWL_TIME];
+                unset($doc_info[CrawlConstants::SUMMARY]);
+                if(is_array($summary)) {
+                    $raw_recipes[] = array_merge($doc_info, $summary);
+                }
+            }
+
+        }
+        // only cluster if would make more than one cluster
+        if(count($raw_recipes) * CLUSTER_RATIO > 1 ) {
+            $recipes = array();
+            $i = 0;
+            foreach($raw_recipes as $raw_recipe) {
+                $description = $raw_recipe[self::DESCRIPTION];
+                $ingredients = explode("||", $description);
+                if(is_array($ingredients) && count($ingredients) > 1) {
+                    $recipes[$i][0]= $raw_recipe[self::TITLE];
+                    $recipes[$i][1] = $ingredients;
+                    $recipes[$i][2] = $raw_recipe['KEY'];
+                    $recipes[$i][3] = $raw_recipe;
+                    $i++;
+                }
+            }
+
+            $recipes_ingredients = array();
+            $count = count($recipes);
+            foreach($recipes as $key => $recipe) {
+                foreach($recipe[1] as $index => $ingredient) {
+                    if(strlen($ingredient) != 0 && (
+                            substr($ingredient,
+                                strlen($ingredient) - 1) != ":")) {
+                        $mainIngredient =
+                            $this->getIngredientName((string)$ingredient);
+                        if(strlen($mainIngredient) != 0) {
+                            $recipe[1][$index] = $mainIngredient;
+                        } else {
+                            unset($recipe[1][$index]);
+                        }
+                    } else {
+                        unset($recipe[1][$index]);
+                    }
+                }
+                    $recipes[$key] = $recipe;
+            }
+            $count = count($recipes);
+            $k = 0;
+            $basic_ingredients = array(
+               'onion','oil','cheese','pepper','sauce',
+               'salt','milk','butter','flour','cake',
+               'garlic','cream','soda','honey','powder',
+               'sauce','water','vanilla','pepper','bread',
+               'sugar','vanillaextract','celery',
+               'seasoning','syrup','skewers','egg',
+               'muffin','ginger','basil','oregano',
+               'cinammon','cumin','mayonnaise','mayo',
+               'chillipowder','lemon','greens','yogurt',
+               'margarine','asparagus','halfhalf',
+               'pancakemix','coffee','cookies','lime',
+               'chillies','cilantro','rosemary',
+               'vanillaextract','vinegar','shallots',
+               'wine','cornmeal','nonstickspray');
+
+            for($i = 0; $i < $count; $i++) {
+                $recipe1_main_ingredient = "";
+                $recipe1 = $recipes[$i][1];
+                $recipe_name = $recipes[$i][0];
+                $recipe1_title = strtolower($recipes[$i][0]);
+                $distinct_ingredients[$recipe_name] = $recipes[$i][1];
+                $doc_keys[$recipe_name] = $recipes[$i][2];
+                $recipes_summary[$recipe_name] = $recipes[$i][3];
+
+                for($j = $i + 1; $j < $count; $j++) {
+                    $recipe2_main_ingredient = "";
+                    $recipe2 = $recipes[$j][1];
+                    $recipe2_title = strtolower($recipes[$j][0]);
+                    $weights[$k][0] = $recipes[$i][0];
+                    $weights[$k][1] = $recipes[$j][0];
+                    $merge_array = array_merge($recipe1, $recipe2);
+                    $vector_array = array_unique($merge_array);
+                    sort($vector_array);
+                    $recipe1_vector = array_fill_keys($vector_array, 0);
+                    $recipe2_vector = array_fill_keys($vector_array, 0);
+                    foreach($recipe1 as $ingredient){
+                        if($ingredient != "" &&
+                            !in_array($ingredient,$basic_ingredients)) {
+                                if(strstr($recipe1_title,$ingredient)) {
+                                    $recipe1_main_ingredient = $ingredient;
+                                }
+                        }
+                        $recipe1_vector[$ingredient] = 1;
+                    }
+                    foreach($recipe2 as $ingredient) {
+                        if($ingredient != ""&& !
+                            in_array($ingredient,$basic_ingredients)) {
+                                if(strstr($recipe2_title,$ingredient))  {
+                                    $recipe2_main_ingredient = $ingredient;
+                                }
+                        }
+                        $recipe2_vector[$ingredient] = 1;
+                    }
+                    $edge_weight = 0;
+                    $matches = 1;
+                    foreach($vector_array as $vector) {
+                        $diff = $recipe1_vector[$vector] -
+                                    $recipe2_vector[$vector];
+                        $vector_diff[$vector] = (pow($diff, 2));
+                        if(abs($diff) == 1)
+                            $matches += 1;
+                        $edge_weight += $vector_diff[$vector];
+                    }
+                    $main_ingredient_match = 1;
+                    if($recipe1_main_ingredient != $recipe2_main_ingredient)
+                        $main_ingredient_match = 1000;
+                    $edge_weight = sqrt($edge_weight)*
+                                    $matches * $main_ingredient_match;
+                    $weights[$k][2] = $edge_weight;
+                    $k++;
+                }
+            }
+
+            $clusters = kruskalClustering($weights,
+                $count, $distinct_ingredients);
+            $index_shard = new IndexShard("cluster_shard");
+            $word_counts = array();
+            $recipe_sites = array();
+
+            foreach($clusters as $cluster) {
+                $count = count($cluster);
+                for($i = 0; $i < $count - 1; $i++) {
+                    $meta_ids = array();
+                    $summary = array();
+                    $recipe = $cluster[$i];
+                    $doc_key = $doc_keys[$recipe];
+                    $summary[self::URL] =
+                        $recipes_summary[$recipe][self::URL];
+                    $summary[self::TITLE] =
+                        $recipes_summary[$recipe][self::TITLE];
+                    $summary[self::DESCRIPTION] =
+                        $recipes_summary[$recipe][self::DESCRIPTION];
+                    $summary[self::TIMESTAMP] =
+                        $recipes_summary[$recipe][self::TIMESTAMP];
+                    $summary[self::ENCODING] =
+                        $recipes_summary[$recipe][self::ENCODING];
+                    $summary[self::HASH] =
+                        $recipes_summary[$recipe][self::HASH];
+                    $summary[self::TYPE] =
+                        $recipes_summary[$recipe][self::TYPE];
+                    $summary[self::HTTP_CODE] =
+                        $recipes_summary[$recipe][self::HTTP_CODE];
+                    $recipe_sites[] = $summary;
+                    $meta_ids[] = "ingredient:".$cluster["ingredient"];
+                    $index_shard->addDocumentWords($doc_key,
+                        self::NEEDS_OFFSET_FLAG,
+                        $word_counts, $meta_ids, true, false);
+                    $index_shard->save(true);
+                }
+
+            }
+
+            $dir = CRAWL_DIR."/cache/".self::index_data_base_name.$index_name;
+            $index_archive = new IndexArchiveBundle($dir, false);
+            $generation = $index_archive->initGenerationToAdd($index_shard);
+            if(isset($recipe_sites)) {
+                $index_archive->addPages($generation,
+                    self::SUMMARY_OFFSET, $recipe_sites, 0);
+            }
+            $k = 0;
+            foreach($recipe_sites as $site) {
+                $recipe = $site[self::TITLE];
+                $hash = crawlHash($site[self::URL], true).
+                    $site[self::HASH] .
+                    crawlHash("link:".$site[self::URL], true);
+                $summary_offsets[$hash] =
+                    array($site[self::SUMMARY_OFFSET], null);
+            }
+            $index_shard->changeDocumentOffsets($summary_offsets);
+            $index_archive->addIndexData($index_shard);
+            $index_archive->saveAndAddCurrentShardDictionary();
+            $index_archive->dictionary->mergeAllTiers();
+            $this->db->setWorldPermissionsRecursive(
+                CRAWL_DIR.'/cache/'.
+                self::index_data_base_name.$index_name);
+        }
+    }
+
+
+
+    /**
+     *  Extracts the main ingredient from the ingredient.
+     *
+     * @param string $text ingredient.
+     * @return string $name main ingredient
+     */
+    function getIngredientName($text)
+    {
+        $special_chars = array('/\d+/','/\\//');
+        $ingredient = preg_replace($special_chars," ", $text);
+        $ingredient = strtolower($ingredient);
+        $varieties = array('apple','bread','cheese','chicken','shrimp',
+            'tilapia','salmon','butter','chocolate','sugar','pepper','water',
+            'mustard','cream','lettuce','sauce','crab','garlic','mushrooms',
+            'tortilla','potatoes','steak','rice','vinegar','carrots',
+            'marshmellows','onion','oil','ham','parsley','cilantro','broth',
+            'stock','flour','seasoning','banana','pasta','noodles','pork',
+            'bacon','olives','spinach','yogurt','celery','beans','egg',
+            'apricot','whiskey','wine','milk','mango','tomato','lemon',
+            'salsa','herbs','sourdough','prosciutto','seasoning','syrup',
+            'honey','skewers','muffin','beef','cinammon','thyme','asparagus',
+            'turkey','pumpkin');
+        foreach($varieties as $variety){
+            if(strstr($ingredient, $variety)) {
+                $ingredient = $variety;
+            }
+        }
+        $words = explode(' ', $ingredient);
+        $measurements = array('cup','cups','ounces','teaspoon','teaspoons',
+            'tablespoon','tablespoons','pound','pounds','tbsp','tsp','lbs',
+            'inch','pinch','oz','lb','tbs','can','bag','C','c','tb');
+
+        $sizes = array('small','large','thin','less','thick','bunch');
+
+        $prepositions = array('into', 'for', 'by','to','of');
+
+        $misc = array('hot','cold','room','temperature','plus','stick','pieces',
+            "confectioners",'semisweet','white','all-purpose','bittersweet',
+            'cut','whole','or','and','french','wedges','package','pkg','shells',
+            'cartilege','clean','hickory','fillets','fillet','plank','planks',
+            'cedar','taste','spicy','glaze','crunchy','sharp','chips','juice',
+            'optional','fine','regular','dash','overnight','soaked','classic',
+            'firm','delicious','prefer','plain');
+
+        $attributes = array('boneless','skinless','breast','legs','thighs',
+            'washington','fresh','flat','leaf','ground','extra','virgin','dry',
+            'cloves','lean','ground','roma','all purpose','light','brown',
+            'idaho','kosher','frozen','garnish');
+
+        $nouns = array();
+        $i = 0;
+        $endings = array('/\,/','/\./','/\+/','/\*/',"/'/","/\(/","/\)/");
+        foreach($words as $word) {
+            if($word != ''){
+                $word = strtolower($word);
+                foreach($varieties as $variety){
+                        if(strstr($word,$variety))
+                            $word = $variety;
+                    }
+                $word = preg_replace($endings,"",$word);
+                if(!in_array($word,$measurements) && !in_array($word,$sizes)
+                    && !in_array($word,$prepositions) && !in_array($word,$misc)
+                    && !in_array($word,$attributes)) {
+                    $ending = substr($word, -2);
+                    $ending2 = substr($word, -3);
+                    if($ending != 'ly' && $ending != 'ed' && $ending2 != 'ing')
+                    {
+                    $nouns[] = $word;
+                    }
+                }
+            }
+        }
+        $name = implode(" ", $nouns);
+        $name = preg_replace('/[^a-zA-Z]/', "", $name);
+        return $name;
+    }
+
+}
+/**
+ * Gets the language tag (for instance, en_US for American English) of the
+ * locale that is currently being used.
+ *
+ * @return string  "en-US" since for now the recipe plugin only works
+ *      with English recipes
+ */
+if(!function_exists("getLocaleTag")) {
+    function getLocaleTag()
+    {
+        return "en_US";
+    }
+}
+
+/**
+ * class to define vertex
+ */
+class Vertex
+{
+    private $label;
+    private $visited;
+
+    function __construct($label){
+        $this->label = $label;
+        $this->visited = false;
+    }
+
+    function getLabel(){
+        return $this->label;
+    }
+
+    function visited(){
+        $this->visited = true;
+    }
+
+    function isVisited(){
+        return $this->visited;
+    }
+}
+/**
+ * class to define edge
+ */
+class Edge
+{
+    private $start_vertex;
+    private $end_vertex;
+    private $cost;
+
+    function __construct($vertex1,$vertex2,$cost){
+        $this->start_vertex = new Vertex($vertex1);
+        $this->end_vertex = new Vertex($vertex2);
+        $this->cost = $cost;
+    }
+
+    function getStartVertex(){
+        return $this->start_vertex;
+    }
+
+    function getEndVertex(){
+        return $this->end_vertex;
+    }
+
+    function getCost(){
+        return $this->cost;
+    }
+}
+
+/**
+ * class to define Minimum Spanning tree. constructMST constructs
+ * the minimum spanning tree using heap. formCluster forms clusters by
+ * deleting the most expensive edge. BreadthFirstSearch is used to
+ * traverse the MST.
+ */
+class Tree
+{
+    private $cluster_heap;
+    private $vertices;
+    private $adjMatrix;
+
+    function __construct(){
+        $this->cluster_heap = new Cluster();
+        $this->vertices = array();
+    }
+
+   /**
+    * constructs the adjacency matrix for the MST.
+    *
+    * @param object array $edges vertices and edge weights of MST
+    */
+    function constructMST($edges)
+    {
+        foreach($edges as $edge) {
+            $this->cluster_heap->insert($edge);
+            $vertex1 = $edge->getStartVertex();
+            $vertex2 = $edge->getEndVertex();
+            $this->adjMatrix[$vertex1->getLabel()][$vertex2->getLabel()] =
+                $vertex2->getLabel();
+            $this->adjMatrix[$vertex2->getLabel()][$vertex1->getLabel()] =
+                $vertex1->getLabel();
+            if(empty($this->vertices) || !in_array($vertex1,$this->vertices))
+                $this->vertices[$vertex1->getLabel()] = $vertex1;
+            if(empty($this->vertices) || !in_array($vertex2,$this->vertices))
+                $this->vertices[$vertex2->getLabel()] = $vertex2;
+        }
+
+    }
+
+   /**
+    * forms the clusters by removing maximum weighted edges.
+    * performs breadth-first search to cluster the recipes.
+    *
+    * @param int $k queue size
+    * @param int $size number of recipes.
+    * @return array $cluster clusters of recipes.
+    */
+    function formCluster($k, $size)
+    {
+        $this->cluster_heap->top();
+        $nodeQueue = new Queue($k);
+        $cluster_count = $size * CLUSTER_RATIO;
+        $cluster = array();
+        for($j = 0; $j < $cluster_count - 1; $j++) {
+            $max_edge = $this->cluster_heap->extract();
+            $cluster1_start = $max_edge->getStartVertex()->getLabel();
+            $cluster2_start = $max_edge->getEndVertex()->getLabel();
+            $this->adjMatrix[$cluster1_start][$cluster2_start] = -1;
+            $this->adjMatrix[$cluster2_start][$cluster1_start] = -1;
+            $nodeQueue->enqueue($cluster1_start);
+            $nodeQueue->enqueue($cluster2_start);
+        }
+        $queue = new Queue($k);
+        $i=0;
+        while(!$nodeQueue->isEmpty()) {
+            $node = $nodeQueue->dequeue();
+            if($this->vertices[$node]->isVisited() == false){
+                $this->vertices[$node]->visited();
+                $cluster[$i][] = $this->vertices[$node]->getLabel();
+                $queue->enqueue($this->vertices[$node]->getLabel());
+                while(!$queue->isEmpty()){
+                    $node = $queue->dequeue();
+                    while(($nextnode = $this->getNextVertex($node)) != -1){
+                        $this->vertices[$nextnode]->visited();
+                        $cluster[$i][]= $this->vertices[$nextnode]->getLabel();
+                        $queue->enqueue($this->vertices[$nextnode]->getLabel());
+                    }
+                }
+            }
+        $i++;
+        }
+    return $cluster;
+    }
+
+   /**
+    * gets the next vertex  from the adjacency matrix for a given vertex
+    *
+    * @param string $vertex vertex
+    * @return adjacent vertex if it has otherwise -1.
+    */
+    function getNextVertex($vertex)
+    {
+        foreach($this->adjMatrix[$vertex] as $vert=>$value) {
+            if($value != -1
+                && ($this->vertices[$value]->isVisited() == false)) {
+                return $this->adjMatrix[$vertex][$vert];
+            }
+
+        }
+        return -1;
+    }
+
+   /**
+    * Finds the common ingredient for each of the clusters.
+    *
+    * @param array $clusters clusters of recipes.
+    * @param array $ingredients array of ingredients of recipes.
+    * @return array $new_clusters clusters with common ingredient appended.
+    */
+    function findCommonIngredient($clusters,$ingredients)
+    {
+        $k =1;
+        $new_clusters = array();
+        $basic_ingredients = array("onion","oil","cheese","pepper","sauce",
+            "salt","milk","butter",'flour','cake','garlic','cream','soda',
+            'honey','powder','sauce','water','vanilla','pepper','bread',
+            'sugar','vanillaextract','celery','seasoning','syrup','skewers',
+            'egg','muffin','ginger','basil','oregano','cinammon','cumin',
+            'mayonnaise','mayo','chillipowder','lemon','greens','yogurt',
+            'margarine','asparagus','halfhalf','pancakemix','coffee',
+            'cookies','lime','chillies','cilantro','rosemary','vanillaextract',
+            'vinegar','shallots','wine','cornmeal','nonstickspray');
+        foreach($clusters as $cluster) {
+            $recipes_count = 0;
+            $cluster_recipe_ingredients = array();
+            $common_ingredients = array();
+            for($i = 0; $i < count($cluster); $i++){
+                $recipe_name = $cluster[$i];
+                $main_ingredients =
+                    array_diff($ingredients[$recipe_name],$basic_ingredients);
+                $cluster_recipe_ingredients = array_merge(
+                    $cluster_recipe_ingredients,
+                    array_unique($main_ingredients));
+            }
+            $ingredient_occurrence =
+                array_count_values($cluster_recipe_ingredients);
+            $max = max($ingredient_occurrence);
+            foreach($ingredient_occurrence as $key=>$value){
+                if($max == $value && !in_array($key, $basic_ingredients)) {
+                    $common_ingredients[] = $key;
+                }
+            }
+            $cluster_ingredient = $common_ingredients[0];
+            $cluster["ingredient"] = $cluster_ingredient;
+            $new_clusters[] = $cluster;
+            $k++;
+        }
+        return $new_clusters;
+
+    }
+}
+/**
+ * heap to maintain the MST
+ */
+class Cluster extends SplHeap
+{
+
+    public function compare($edge1,$edge2)
+    {
+        $values1 = $edge1->getCost();
+        $values2 = $edge2->getCost();
+        if ($values1 == $values2) return 0;
+        return $values1 < $values2 ? -1 : 1;
+    }
+}
+/**
+ * heap to maintain the tree
+ */
+class TreeCluster extends SplHeap
+{
+
+    public function compare($edge1,$edge2)
+    {
+        $values1 = $edge1->getCost();
+        $values2 = $edge2->getCost();
+        if ($values1 == $values2) return 0;
+        return $values1 > $values2 ? -1 : 1;
+    }
+}
+
+/**
+ * queue for the BFS traversal
+ */
+class Queue
+{
+    private $size;
+    private $queArray;
+    private $front;
+    private $rear;
+
+    function __construct($size){
+        $this->queArray = array();
+        $this->front = 0;
+        $this->rear = -1;
+        $this->size = $size;
+    }
+
+    function enqueue($i){
+        if($this->rear == $this->size-1)
+            $this->rear = -1;
+        $this->queArray[++$this->rear] = $i;
+    }
+
+    function dequeue(){
+        $temp = $this->queArray[$this->front++];
+        if($this->front == $this->size)
+            $this->front = 0;
+        return $temp;
+    }
+    function isEmpty(){
+        if(($this->rear + 1)== $this->front ||
+            ($this->front + $this->size - 1) == $this->rear)
+            return true;
+        return false;
+    }
+
+}
+/**
+ * creates tree from the input and apply Kruskal's algorithm to find MST.
+ *
+ * @param object array $edges recipes with distances between them.
+ * @return object arrat $min_edges MST
+ */
+function construct_tree($edges) {
+    $vertices = array();
+    $tree_heap = new TreeCluster();
+    $vertice_no = 1;
+    for($i=0; $i < count($edges)-1; $i++) {
+        $edge1 = new Edge($edges[$i][0], $edges[$i][1], $edges[$i][2]);
+        $tree_heap->insert($edge1);
+        $vertex1 = $edge1->getStartVertex();
+        $vertex2 = $edge1->getEndVertex();
+        if(empty($vertices[$vertex1->getLabel()])){
+                $vertices[$vertex1->getLabel()] = $vertice_no;
+                $vertice_no++;
+        }
+        if(empty($vertices[$vertex2->getLabel()])){
+                $vertices[$vertex2->getLabel()] = $vertice_no;
+                $vertice_no++;
+        }
+    }
+    $k = 0;
+    $tree_heap->top();
+    while($k < count($vertices) - 1) {
+
+        $min_edge = $tree_heap->extract();
+        $vertex1= $min_edge->getStartVertex()->getLabel();
+        $vertex2 = $min_edge->getEndVertex()->getLabel();
+        if($vertices[$vertex1] != $vertices[$vertex2]){
+            if($vertices[$vertex1] < $vertices[$vertex2]){
+                    $m = $vertices[$vertex2];
+                    $n = $vertices[$vertex1];
+            }
+            else{
+                $m = $vertices[$vertex1];
+                $n = $vertices[$vertex2];
+            }
+            foreach($vertices as $vertex => $no){
+                if($no == $m){
+                    $vertices[$vertex] = $n;
+                }
+            }
+            $min_edges[] = $min_edge;
+            $k++;
+        }
+    }
+    return $min_edges;
+}
+
+/**
+ * Clusters the recipes by applying Kruskal's algorithm
+ * @param array $edges recipes and distances between them.
+ *
+ * @param int $count number of recipes.
+ * @param array $distinct_ingredients recipe names with ingredients.
+ * @return clusters of recipes.
+ */
+function kruskalClustering($edges, $count, $distinct_ingredients)
+{
+    $mst_edges = construct_tree($edges);
+    $mst = new Tree();
+    $mst->constructMST($mst_edges);
+    $clusters = $mst->formCluster(count($mst_edges), $count);
+    $new_clusters = $mst->findCommonIngredient($clusters,
+        $distinct_ingredients);
+    return $new_clusters;
+}
+?>
diff --git a/lib/processors/page_processor.php b/lib/processors/page_processor.php
new file mode 100644
index 000000000..95a929b7f
--- /dev/null
+++ b/lib/processors/page_processor.php
@@ -0,0 +1,135 @@
+<?php
+/**
+ *  SeekQuarry/Yioop --
+ *  Open Source Pure PHP Search Engine, Crawler, and Indexer
+ *
+ *  Copyright (C) 2009, 2010, 2011  Chris Pollett chris@pollett.org
+ *
+ *  LICENSE:
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *  END LICENSE
+ *
+ * @author Chris Pollett chris@pollett.org
+ * @package seek_quarry
+ * @subpackage processor
+ * @license http://www.gnu.org/licenses/ GPL3
+ * @link http://www.seekquarry.com/
+ * @copyright 2009, 2010, 2011
+ * @filesource
+ */
+
+if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}
+
+/**
+ * Used by subclasses, so have succinct access (i.e., can use self:: rather
+ * than CrawlConstants::) to constants like:
+ * CrawlConstants::TITLE, CrawlConstants::DESCRIPTION, etc.
+ */
+require_once BASE_DIR."/lib/crawl_constants.php";
+
+/**
+ * Base class common to all processors of web page data
+ *
+ * @author Chris Pollett
+ * @package seek_quarry
+ * @subpackage processor
+ */
+abstract class PageProcessor implements CrawlConstants
+{
+    /**
+     * indexing_plugins which might be used with the current processor
+     *
+     * @var array
+     */
+    var $indexing_plugins;
+
+    /**
+     *  Set-ups the any indexing plugins associated with this page
+     *  processor
+     *
+     *  @param array $plugins an array of indexing plugins which might
+     *      do further processing on the data handles by this page
+     *      processor
+     */
+    function __construct($plugins = array()){
+        $this->indexing_plugins = $plugins;
+        foreach($plugins as $plugin) {
+            $plugin_name = ucfirst($plugin);
+            $plugin_instance_name = lcfirst($plugin);
+            $this->$plugin_instance_name = new $plugin_name();
+        }
+    }
+
+    /**
+     *  Method used to handle processing data for a web page. It makes
+     *  a summary for the page (via the process() function which should
+     *  be subclassed) as well as runs any plugins that are associated with
+     *  the processors to create sub-documents
+     *
+     * @param string $page string of a web document
+     * @param string $url location the document came from
+     *
+     * @return array a summary of (title, description,links, and content) of
+     *      the information in $page also has a subdocs array containing any
+     *      subdocuments returned from a plugin. A subdocumenst might be
+     *      things like recipes that appeared in a page or tweets, etc.
+     */
+    function handle($page, $url)
+    {
+        $summary = $this->process($page, $url);
+        if($summary != NULL && isset($this->indexing_plugins) &&
+            is_array($this->indexing_plugins) ) {
+            $summary[self::SUBDOCS] = array();
+            foreach($this->indexing_plugins as $plugin) {
+                $subdoc = NULL;
+                $plugin_instance_name =
+                    lcfirst($plugin);
+                $subdocs_description =
+                    $this->$plugin_instance_name->pageProcessing($page, $url);
+                if(is_array($subdocs_description)
+                    && count($subdocs_description) != 0) {
+                    foreach($subdocs_description as $subdoc_description) {
+                        $subdoc[self::TITLE] = $subdoc_description[self::TITLE];
+                        $subdoc[self::DESCRIPTION] =
+                            $subdoc_description[self::DESCRIPTION];
+                        $subdoc[self::LANG] = $summary[self::LANG];
+                        $subdoc[self::LINKS] = $summary[self::LINKS];
+                        $subdoc[self::PAGE] = $page;
+                        $subdoc[self::SUBDOCTYPE] = lcfirst(
+                            substr($plugin, 0, -strlen("Plugin")));
+                        $summary[self::SUBDOCS][] = $subdoc;
+                    }
+                }
+            }
+        }
+        return $summary;
+    }
+
+    /**
+     * Should be implemented to compute a summary based on a
+     * text string of a document. This method is called from
+     * @see handle($page, $url)
+     *
+     * @param string $page string of a document
+     * @param string $url location the document came from
+     *
+     * @return array a summary of (title, description,links, and content) of
+     *      the information in $page
+     */
+    abstract function process($page, $url);
+}
+
+?>
ViewGit