Tanmayee_commit
Tanmayee_commit
Signed-off-by: Chris Pollett <chris@pollett.org>
diff --git a/bin/fetcher.php b/bin/fetcher.php
index db42db84b..d2482a904 100755
--- a/bin/fetcher.php
+++ b/bin/fetcher.php
@@ -871,6 +871,12 @@ class Fetcher implements CrawlConstants
foreach($site_pages as $site) {
$response_code = $site[self::HTTP_CODE];
+
+ //deals with short URLs and directs them to the original link
+ if(isset($site[self::LOCATION]))
+ {
+ $site[self::URL]=$site[self::LOCATION];
+ }
//process robot.txt files separately
if(isset($site[self::ROBOT_PATHS])) {
diff --git a/lib/crawl_constants.php b/lib/crawl_constants.php
index f0fb00e1b..4001c9c71 100644
--- a/lib/crawl_constants.php
+++ b/lib/crawl_constants.php
@@ -165,7 +165,8 @@ interface CrawlConstants
const DOMAIN_WEIGHTS = 'bm';
const POSITION_LIST = 'bn';
const PROXIMITY = 'bo';
-
+ const LOCATION = 'bp';
+
const NEEDS_OFFSET_FLAG = 0x7FFFFFFF;
}
diff --git a/lib/fetch_url.php b/lib/fetch_url.php
index 5cc7a3745..3bb840f31 100755
--- a/lib/fetch_url.php
+++ b/lib/fetch_url.php
@@ -281,6 +281,10 @@ class FetchUrl implements CrawlConstants
$site[CrawlConstants::MODIFIED] =
strtotime(@trim($line_parts[1]));
}
+ if(stristr($line,'Location:')){
+ $line_parts=explode("Location:",line);
+ $site[CrawlConstants::LOCATION]=@trim(line_parts[1]);
+ }
}
if(!isset($site[CrawlConstants::ENCODING]) ) {