set search_path to listhunter; ------------------------------------------------------------- -- Pg index for table "UrlTask" ------------------------------------------------------------- create index url_task_order_by_id_idx on "UrlTask"(id asc); create index url_task_stat_idx on "UrlTask"(status); create index url_task_pri_idx on "UrlTask"(priority); create table "Link" ( id serial primary key, parent_id integer references "UrlTask"(id) not null, url text not null, label text, unique (parent_id, url) ); create table control ( id serial primary key, firefox_food integer not null default 0, firefox_enabled boolean not null default true prefetcher_enabled boolean not null default true ); insert into control (firefox_food, firefox_enabled, prefetcher_enabled) values (0, true, true); ------------------------------------------------------------- -- Pg functions for the prefetchers ------------------------------------------------------------- create sequence prefetcher_serial; grant select, update on sequence prefetcher_serial to anonymous; create type prefetcher_task as (id integer, url text); create or replace function prefetcher_get_tasks(count integer) returns setof prefetcher_task as $$ declare r prefetcher_task%rowtype; ff_food integer; enabled boolean; begin select prefetcher_enabled, firefox_food into enabled, ff_food from control; -- raise notice 'enabled is %b', enabled; if not enabled or ff_food > 20000 then return; end if; for r in select id, url from "UrlTask" where status = 'I' order by priority desc, id asc offset (nextval('prefetcher_serial') - 1) * count limit count loop return next r; end loop; if not found then -- raise notice 'Not found any rows...resetting the sequence...'; perform setval('prefetcher_serial', 1, true); for r in select id, url from "UrlTask" where status = 'I' order by priority desc, id asc offset 0 limit count loop return next r; end loop; end if; return; end $$ language 'plpgsql'; create or replace function reset_prefetcher_queue_pointer () returns integer as $$ declare rv integer; begin perform setval('prefetcher_serial', 1, true); select currval('prefetcher_serial') into rv; return rv; end $$ language 'plpgsql'; create or replace function prefetcher_submit(ok_ids integer[], nok_ids integer[], proxy integer) returns integer as $$ declare new_P_count integer; new_E_count integer; begin update "UrlTask" set status = 'P', proxy_id = proxy where status = 'I' and id = any(ok_ids); get diagnostics new_P_count = row_count; update control set firefox_food = firefox_food + new_P_count; update "UrlTask" set status = 'E', completed_time = now() where status = 'I' and id = any(nok_ids); get diagnostics new_E_count = row_count; return new_P_count + new_E_count; end $$ language 'plpgsql'; ------------------------------------------------------------- -- Pg functions for the firefox (ff) crawlers ------------------------------------------------------------- create sequence firefox_serial; grant select, update on sequence firefox_serial to anonymous; create type firefox_task as (id integer, url text, proxy integer, extract boolean); create or replace function firefox_get_tasks(count integer) returns setof firefox_task as $$ declare r firefox_task%rowtype; food integer; enabled boolean; begin select firefox_enabled, firefox_food into enabled, food from control; if not enabled or food < 7000 then return; end if; for r in select * from (select id, url, proxy_id, extract from "UrlTask" where status = 'P' order by id asc offset (nextval('firefox_serial') - 1) * count limit count) as res order by proxy_id loop return next r; end loop; if not found then -- raise notice 'Not found any rows...resetting the sequence...'; perform setval('firefox_serial', 1, true); for r in select * from (select id, url, proxy_id, extract from "UrlTask" where status = 'P' order by id asc offset 0 limit count) as res order by proxy_id loop return next r; end loop; end if; return; end $$ language 'plpgsql'; create or replace function firefox_submit(list_page_ids integer[], text_page_ids integer[]) returns integer as $$ declare new_L_count integer; new_T_count integer; total integer; begin -- raise notice 'status: %', statuses[i]; update "UrlTask" set status = 'L', completed_time = now() where id = any(list_page_ids) and status = 'P'; get diagnostics new_L_count = row_count; update "UrlTask" set status = 'T', completed_time = now() where id = any(text_page_ids) and status = 'P'; get diagnostics new_T_count = row_count; total := new_L_count + new_T_count; update control set firefox_food = firefox_food - total; return total; end $$ language 'plpgsql'; grant update on "UrlTask" to anonymous; grant select,update on control to anonymous; grant select,update on "Link" to anonymous; select setval('prefetcher_serial', 1, false); select setval('firefox_serial', 1, false);