Tanmayee_commit

unknown [2011-10-22 17:Oct:nd]
Tanmayee_commit

Signed-off-by: Chris Pollett <chris@pollett.org>
Filename
bin/fetcher.php
lib/crawl_constants.php
lib/fetch_url.php
diff --git a/bin/fetcher.php b/bin/fetcher.php
index db42db84b..d2482a904 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -871,6 +871,12 @@ class Fetcher implements CrawlConstants

         foreach($site_pages as $site) {
             $response_code = $site[self::HTTP_CODE];
+
+	    //deals with short URLs and directs them to the original link
+	    if(isset($site[self::LOCATION]))
+	    {
+	    	$site[self::URL]=$site[self::LOCATION];
+	    }

             //process robot.txt files separately
             if(isset($site[self::ROBOT_PATHS])) {
diff --git a/lib/crawl_constants.php b/lib/crawl_constants.php
index f0fb00e1b..4001c9c71 100644
--- a/lib/crawl_constants.php
+++ b/lib/crawl_constants.php
@@ -165,7 +165,8 @@ interface CrawlConstants
     const DOMAIN_WEIGHTS = 'bm';
     const POSITION_LIST = 'bn';
     const PROXIMITY = 'bo';
-
+    const LOCATION = 'bp';
+
     const NEEDS_OFFSET_FLAG = 0x7FFFFFFF;

 }
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 5cc7a3745..3bb840f31 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -281,6 +281,10 @@ class FetchUrl implements CrawlConstants
                 $site[CrawlConstants::MODIFIED] =
                     strtotime(@trim($line_parts[1]));
             }
+  	    if(stristr($line,'Location:')){
+	    	$line_parts=explode("Location:",line);
+		$site[CrawlConstants::LOCATION]=@trim(line_parts[1]);
+	    }

         }
         if(!isset($site[CrawlConstants::ENCODING]) ) {
ViewGit