seek_quarry
[ class tree: seek_quarry ] [ index: seek_quarry ] [ all elements ]

Class: CrawlConstants

Source Location: /lib/crawl_constants.php

Interface Overview


Shared constants and enums used by components that are involved in the crawling process


Author(s):

  • Chris Pollett

Constants



Class Details

[line 45]
Shared constants and enums used by components that are involved in the crawling process



Tags:

author:  Chris Pollett


[ Top ]


Class Constants

AGENT_LIST =  "bz"

[line 201]


[ Top ]

ALLOWED_SITES =  'aa'

[line 149]


[ Top ]

archive_base_name =  "Archive"

[line 61]


[ Top ]

ARCHIVE_CRAWL =  'ay'

[line 174]


[ Top ]

ARC_DATA =  "cd"

[line 205]


[ Top ]

ARC_DIR =  "cb"

[line 203]


[ Top ]

ARC_TYPE =  "cc"

[line 204]


[ Top ]

AVERAGE_DESCRIPTION_LENGTH =  'B'

[line 122]


[ Top ]

AVERAGE_TITLE_LENGTH =  'A'

[line 121]


[ Top ]

AVERAGE_TOTAL_LINK_TEXT_LENGTH =  'C'

[line 123]


[ Top ]

BOOST =  'av'

[line 170]


[ Top ]

BOTH =  0

[line 50]

Used to say what kind of queue_server this is


[ Top ]

BREADTH_FIRST =  'ac'

[line 151]


[ Top ]

CACHE_PAGES =  'cn'

[line 215]


[ Top ]

CACHE_PAGE_PARTITION =  'ap'

[line 165]


[ Top ]

CONTINUE_STATE =  1

[line 86]


[ Top ]

CRAWL_DELAY =  'v'

[line 115]


[ Top ]

CRAWL_INDEX =  'ba'

[line 176]


[ Top ]

CRAWL_ORDER =  'Y'

[line 147]


[ Top ]

CRAWL_TIME =  'b'

[line 92]


[ Top ]

CRAWL_TYPE =  'az'

[line 175]


[ Top ]

CURRENT_SERVER =  "bv"

[line 197]


[ Top ]

DATA =  'bt'

[line 195]


[ Top ]

data_base_name =  "At"

[line 76]


[ Top ]

DESCRIPTION =  't'

[line 113]


[ Top ]

DESCRIPTION_LENGTH =  'E'

[line 125]


[ Top ]

DESCRIPTION_WORDS =  'H'

[line 128]


[ Top ]

DESCRIPTION_WORD_SCORE =  'K'

[line 131]


[ Top ]

DISALLOWED_SITES =  'ab'

[line 150]


[ Top ]

DNS_TIME =  "by"

[line 200]


[ Top ]

DOC_DEPTH =  'M'

[line 133]


[ Top ]

DOC_ID = 'am'

[line 162]


[ Top ]

DOC_INFO =  'r'

[line 111]


[ Top ]

DOC_LEN =  'bi'

[line 184]


[ Top ]

DOC_RANK =  'N'

[line 134]


[ Top ]

DOMAIN_WEIGHTS =  'bm'

[line 188]


[ Top ]

DUMMY =  'V'

[line 143]


[ Top ]

ENCODING =  'f'

[line 97]


[ Top ]

END_ITERATOR =  'ct'

[line 221]


[ Top ]

fetch_archive_iterator =  "FetchArchiveIterator"

[line 63]


[ Top ]

fetch_batch_name =  "FetchBatch"

[line 73]


[ Top ]

fetch_closed_name =  "FetchClosed"

[line 75]


[ Top ]

fetch_crawl_info =  "FetchInfo"

[line 74]


[ Top ]

FILETYPE =  'ag'

[line 156]


[ Top ]

GENERATION =  'aq'

[line 166]


[ Top ]

GOT_ROBOT_TXT =  'p'

[line 109]


[ Top ]

HASH =  'o'

[line 108]


[ Top ]

HASH_SEEN_URLS = 'aj'

[line 159]


[ Top ]

HASH_SUM_SCORE =  'ar'

[line 167]


[ Top ]

HASH_URL =  'T'

[line 141]


[ Top ]

HASH_URL_COUNT =  'as'

[line 168]


[ Top ]

HEADER =  'bb'

[line 177]


[ Top ]

HTTP_CODE =  'c'

[line 94]


[ Top ]

INDEX =  'z'

[line 119]


[ Top ]

INDEXED_FILE_TYPES =  'bq'

[line 192]


[ Top ]

INDEXER =  "Indexer"

[line 54]

Used to say what kind of queue_server this is


[ Top ]

INDEXING_PLUGINS =  'bl'

[line 187]


[ Top ]

index_closed_name =  "IndexClosed"

[line 72]


[ Top ]

index_data_base_name =  "IndexData"

[line 68]


[ Top ]

INI =  'cq'

[line 218]


[ Top ]

INLINKS =  'P'

[line 136]


[ Top ]

INVERTED_INDEX =  'i'

[line 101]


[ Top ]

IP_ADDRESSES =  'au'

[line 171]


[ Top ]

IS_DOC =  'at'

[line 169]


[ Top ]

IS_FEED =  'ch'

[line 209]


[ Top ]

JUST_METAS =  'aw'

[line 172]


[ Top ]

KEY =  "ce"

[line 206]


[ Top ]

KEYWORD_LINKS =  'cs'

[line 220]


[ Top ]

LANG =  'bg'

[line 182]


[ Top ]

LINKS =  'w'

[line 116]


[ Top ]

LINK_LENGTH =  'F'

[line 126]


[ Top ]

LINK_SEEN_URLS =  'cj'

[line 211]


[ Top ]

LINK_WORDS =  'I'

[line 129]


[ Top ]

LINK_WORD_SCORE =  'L'

[line 132]


[ Top ]

LOCATION =  'bp'

[line 191]


[ Top ]

LOGGING =  'cl'

[line 213]


[ Top ]

MACHINE =  'h'

[line 100]


[ Top ]

MACHINE_ID =  'cf'

[line 207]


[ Top ]

MACHINE_URI =  'ae'

[line 154]


[ Top ]

MAX =  1

[line 82]


[ Top ]

MEMORY_USAGE = 'al'

[line 161]


[ Top ]

META_WORDS =  'cm'

[line 214]


[ Top ]

MIN =  -1

[line 83]


[ Top ]

mirror_table_name =  "mirror_table.txt"

[line 79]


[ Top ]

MODIFIED =  'bf'

[line 181]


[ Top ]

name_archive_iterator =  "NameArchiveIterator"

[line 62]


[ Top ]

NEEDS_OFFSET_FLAG =  0x7FFFFFFF

[line 223]


[ Top ]

network_base_name =  "Network"

[line 69]


[ Top ]

network_crawllist_base_name =  "NetworkCrawlList"

[line 70]


[ Top ]

NEW_CRAWL =  'Q'

[line 138]


[ Top ]

NO_DATA_STATE =  2

[line 87]


[ Top ]

OFFSET =  'R'

[line 139]


[ Top ]

OPERATING_SYSTEM =  'be'

[line 180]


[ Top ]

PAGE =  'q'

[line 110]


[ Top ]

PAGE_IMPORTANCE =  'ad'

[line 152]


[ Top ]

PAGE_RANGE_REQUEST =  'br'

[line 193]


[ Top ]

PAGE_RECRAWL_FREQUENCY =  'bs'

[line 194]


[ Top ]

PAGE_RULES = 'ao'

[line 164]


[ Top ]

PATHS =  'S'

[line 140]


[ Top ]

POSITION_LIST =  'bn'

[line 189]


[ Top ]

POST_MAX_SIZE =  'ck'

[line 212]


[ Top ]

PROXIMITY =  'bo'

[line 190]


[ Top ]

queue_base_name =  "QueueBundle"

[line 60]


[ Top ]

QUEUE_SERVERS =  "bu"

[line 196]


[ Top ]

RECENT_URLS = 'ak'

[line 160]


[ Top ]

REDO_STATE =  4

[line 89]


[ Top ]

RELEVANCE = 'an'

[line 163]


[ Top ]

RESTRICT_SITES_BY_URL =  'Z'

[line 148]


[ Top ]

robot_data_base_name =  "RobotData"

[line 67]


[ Top ]

ROBOT_INSTANCE =  'bh'

[line 183]


[ Top ]

ROBOT_METAS =  "ca"

[line 202]


[ Top ]

ROBOT_PATHS =  'n'

[line 107]


[ Top ]

robot_table_name =  "robot_table.txt"

[line 78]


[ Top ]

ROBOT_TXT =  'x'

[line 117]


[ Top ]

SAVED_CRAWL_TIMES =  'j'

[line 103]


[ Top ]

save_point =  "SavePoint"

[line 64]


[ Top ]

SCHEDULER =  "Scheduler"

[line 58]

Used to say what kind of queue_server this is


[ Top ]

schedule_data_base_name =  "ScheduleData"

[line 65]


[ Top ]

schedule_name =  "FetchSchedule"

[line 66]


[ Top ]

schedule_start_name =  "StartCrawlSchedule.txt"

[line 77]


[ Top ]

SCHEDULE_TIME =  'k'

[line 104]


[ Top ]

SCORE =  'X'

[line 145]


[ Top ]

SEEN_URLS =  'g'

[line 99]


[ Top ]

SERVER =  'bc'

[line 178]


[ Top ]

SERVER_VERSION =  'bd'

[line 179]


[ Top ]

SITES =  'W'

[line 144]


[ Top ]

SITE_INFO =  'af'

[line 155]


[ Top ]

SIZE =  "bw"

[line 198]


[ Top ]

SOURCE_NAME =  'ci'

[line 210]


[ Top ]

START_PARTITION =  'cp'

[line 217]


[ Top ]

statistics_base_name =  "Statistics"

[line 71]


[ Top ]

STATUS =  'a'

[line 91]


[ Top ]

STOP_STATE =  -1

[line 85]


[ Top ]

SUBDOCS =  'bj'

[line 185]


[ Top ]

SUBDOCTYPE =  'bk'

[line 186]


[ Top ]

SUMMARY =  'ah'

[line 157]


[ Top ]

SUMMARY_OFFSET =  'U'

[line 142]


[ Top ]

THUMB =  'u'

[line 114]


[ Top ]

TIMESTAMP =  'd'

[line 95]


[ Top ]

TITLE =  's'

[line 112]


[ Top ]

TITLE_LENGTH =  'D'

[line 124]


[ Top ]

TITLE_WORDS =  'G'

[line 127]


[ Top ]

TITLE_WORD_SCORE =  'J'

[line 130]


[ Top ]

TOTAL_TIME =  "bx"

[line 199]


[ Top ]

TO_CRAWL =  'y'

[line 118]


[ Top ]

TYPE =  'e'

[line 96]


[ Top ]

UI_FLAGS =  'cr'

[line 219]


[ Top ]

URL =  'l'

[line 105]


[ Top ]

URL_INFO =  'ai'

[line 158]


[ Top ]

URL_WEIGHT =  'O'

[line 135]


[ Top ]

VIDEO_SOURCES =  'cg'

[line 208]


[ Top ]

WAITING_START_MESSAGE_STATE =  3

[line 88]


[ Top ]

WARC_ID =  'co'

[line 216]


[ Top ]

WEB_CRAWL =  'ax'

[line 173]


[ Top ]

WEIGHT =  'm'

[line 106]


[ Top ]



Documentation generated by phpDocumentor 1.4.3