diff --git a/src/configs/Createdb.php b/src/configs/Createdb.php
index ab55e516b..ee3bacb15 100755
--- a/src/configs/Createdb.php
+++ b/src/configs/Createdb.php
@@ -442,22 +442,19 @@ foreach ($new_user_activities as $new_activity) {
}
}
$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634195',
- 'YouTube', 'video', 'http://www.youtube.com/watch?v={}',
+ 'YouTube', 'video', 'https://www.youtube.com/watch?v={}',
'http://i1.ytimg.com/vi/{}/default.jpg', '')");
$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634196',
- 'MetaCafe', 'video', 'http://www.metacafe.com/watch/{}',
+ 'MetaCafe', 'video', 'https://www.metacafe.com/watch/{}',
'/resources/blank.png?{}', '')");
$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634197',
- 'DailyMotion', 'video', 'http://www.dailymotion.com/video/{}',
+ 'DailyMotion', 'video', 'https://www.dailymotion.com/video/{}',
'/resources/blank.png?{}', '')");
$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634198',
- 'Vimeo', 'video', 'http://player.vimeo.com/video/{}',
+ 'Vimeo', 'video', 'https://player.vimeo.com/video/{}',
'/resources/blank.png?{}', '')");
-$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634199',
- 'Break.com', 'video', '/resources/blank.png?{}', '" .
- NAME_SERVER . "/resources/blank.png?{}', '')");
$db->execute("INSERT INTO MEDIA_SOURCE VALUES ('1342634200',
- 'Yahoo News', 'rss', 'http://news.yahoo.com/rss/',
+ 'Yahoo News', 'rss', 'https://news.yahoo.com/rss/',
'//content/@url', 'en-US')");
$db->execute("INSERT INTO CRAWL_MIXES VALUES (2, 'images', ".ROOT_ID.", -1)");
$db->execute("INSERT INTO MIX_FRAGMENTS VALUES(2, 0, 1)");
diff --git a/src/configs/default_crawl.ini b/src/configs/default_crawl.ini
index 1e965e531..f99f28ba0 100644
--- a/src/configs/default_crawl.ini
+++ b/src/configs/default_crawl.ini
@@ -22,13 +22,13 @@
;
[general]
crawl_order = 'ad';
-summarizer_option = 'dl';
+summarizer_option = 'dk';
crawl_type = 'ax';
crawl_index = '1483056689';
arc_dir = "";
arc_type = "";
page_recrawl_frequency = '-1';
-page_range_request = '50000';
+page_range_request = '100000';
max_description_len = '10000';
cache_pages = true;
restrict_sites_by_url = false;
diff --git a/src/controllers/Controller.php b/src/controllers/Controller.php
index 874d35f91..27e8bf1c6 100755
--- a/src/controllers/Controller.php
+++ b/src/controllers/Controller.php
@@ -378,7 +378,7 @@ abstract class Controller
* Method to perform a 302 redirect to $location in both under web server
* and CLI setting
*
- * @param string $location url to redirect to
+ * @param string $location url to redirect to
*/
public function redirectLocation($location)
{
diff --git a/src/data/public_default.db b/src/data/public_default.db
index 454d377f6..ad9f2d368 100644
Binary files a/src/data/public_default.db and b/src/data/public_default.db differ
diff --git a/src/executables/QueueServer.php b/src/executables/QueueServer.php
index cc87316cc..692b671cd 100755
--- a/src/executables/QueueServer.php
+++ b/src/executables/QueueServer.php
@@ -308,6 +308,7 @@ class QueueServer implements CrawlConstants, Join
$this->indexing_plugins = [];
$this->indexing_plugins_data = [];
$this->video_sources = [];
+ $this->waiting_hosts = [];
$this->server_name = "IndexerAndScheduler";
$this->process_name = "QueueServer";
}
diff --git a/src/library/FetchUrl.php b/src/library/FetchUrl.php
index a6fbaf903..e249a3dc4 100755
--- a/src/library/FetchUrl.php
+++ b/src/library/FetchUrl.php
@@ -74,15 +74,7 @@ class FetchUrl implements CrawlConstants
$post_data = null, $follow = false, $tor_proxy = "",
$proxy_servers=[])
{
- static $agent_handler = null;
- static $handler_time = 0;
- if (empty($agent_handler)) {
- /* try to keep handler around between calls to allow for connection
- reuse
- */
- $agent_handler = curl_multi_init();
- $handler_time = microtime(true);
- }
+ $agent_handler = curl_multi_init();
$active = null;
$start_time = microtime(true);
if (!$minimal && $temp_dir == null) {
@@ -420,11 +412,8 @@ class FetchUrl implements CrawlConstants
crawlLog(" Get Page Content time ".
(changeInMicrotime($start_time)));
}
- if (microtime(true) - $handler_time > C\PAGE_TIMEOUT) {
- if (!empty($agent_handler)) {
- curl_multi_close($agent_handler);
- }
- $agent_handler = null;
+ if (!empty($agent_handler)) {
+ curl_multi_close($agent_handler);
}
return $sites;
}