Skip to content

Commit

Permalink
Pull request #846: Feature/AG-31712 Simplify RuleStorage indexing
Browse files Browse the repository at this point in the history
Merge in ADGUARD-FILTERS/tsurlfilter from feature/AG-31712 to master

Squashed commit of the following:

commit 75ffd0b
Author: Vladimir Zhelvis <[email protected]>
Date:   Wed Apr 17 18:00:21 2024 +0300

    update comments and changelog

commit c066761
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 15:56:16 2024 +0300

    update RuleStoragScanner comment

commit 811246d
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 15:52:12 2024 +0300

    add comments, make rule scanner list id property private

commit 9077fe8
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 15:07:44 2024 +0300

    delete useless diff

commit 14aae8c
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 15:04:22 2024 +0300

    fix id matching for last added rule list

commit 2d9a550
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 14:30:42 2024 +0300

    add comments

commit 52ff790
Merge: a9a5b27 f425553
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 13:48:31 2024 +0300

    Merge branch 'master' into feature/AG-31712

commit a9a5b27
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 13:45:24 2024 +0300

    delete useless diff

commit b4a38f6
Author: Vladimir Zhelvis <[email protected]>
Date:   Tue Apr 16 13:38:27 2024 +0300

    new rule indexing

commit afb26d1
Author: scripthunter7 <[email protected]>
Date:   Mon Apr 8 18:06:40 2024 +0200

    update tests, still wrong

commit 2329646
Author: Vladimir Zhelvis <[email protected]>
Date:   Mon Apr 8 17:36:46 2024 +0300

    rework storage indexing
  • Loading branch information
zhelvis committed Apr 17, 2024
1 parent f425553 commit f2952e1
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 88 deletions.
3 changes: 3 additions & 0 deletions packages/tsurlfilter/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Ability to allowlist scriptlets by name [#377].
- New rule indexing algorithm. The storage index is now an integer representing
the rule position in the concatenated filter list text.
The list id is determined by the pre-stored filter list offset during the scan.

[#377]: https://github.com/AdguardTeam/Scriptlets/issues/377

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,9 @@ export class BufferLineReader implements ILineReader {
public getCurrentPos(): number {
return this.currentIndex;
}

/** @inheritdoc */
public getDataLength(): number {
return this.buffer.length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ export class FileLineReader implements ILineReader {
getCurrentPos(): number {
return this.innerReader.getCurrentPos();
}

/** @inheritdoc */
getDataLength(): number {
return this.innerReader.getDataLength();
}
}
5 changes: 5 additions & 0 deletions packages/tsurlfilter/src/filterlist/reader/line-reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,9 @@ export interface ILineReader {
* Returns the current position of this line reader.
*/
getCurrentPos(): number;

/**
* Returns the length of the data.
*/
getDataLength(): number;
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,9 @@ export class StringLineReader implements ILineReader {
public getCurrentPos(): number {
return this.currentIndex;
}

/** @inheritdoc */
public getDataLength(): number {
return this.text.length;
}
}
63 changes: 18 additions & 45 deletions packages/tsurlfilter/src/filterlist/rule-storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { NetworkRule } from '../rules/network-rule';
import { HostRule } from '../rules/host-rule';
import { ScannerType } from './scanner/scanner-type';
import { RuleFactory } from '../rules/rule-factory';
import { ListCache } from './list-cache';
import { logger } from '../utils/logger';

/**
Expand Down Expand Up @@ -35,11 +34,14 @@ export class RuleStorage {
private readonly listsMap: Map<number, IRuleList>;

/**
* Cache with the rules which were retrieved.
* We use double layer map in order to achieve better performance. The reason is a fact that a map with number
* keys is much faster than a map with string keys. So we have a structure like Map<number, Map<number, IRule>>.
* Cache with the rules which are stored inside this cache instance..
*/
private readonly cache: Map<number, ListCache>;
private readonly cache: Map<number, IRule>;

/**
* Api for managing rule scanners for each filter list.
*/
declare private scanner: RuleStorageScanner;

/**
* Constructor
Expand All @@ -51,7 +53,7 @@ export class RuleStorage {
constructor(lists: IRuleList[]) {
this.lists = lists;
this.listsMap = new Map<number, IRuleList>();
this.cache = new Map<number, ListCache>();
this.cache = new Map<number, IRule>();

this.lists.forEach((list) => {
const filterListId = list.getId();
Expand All @@ -71,7 +73,8 @@ export class RuleStorage {
*/
createRuleStorageScanner(scannerType: ScannerType): RuleStorageScanner {
const scanners: RuleScanner[] = this.lists.map((list) => list.newScanner(scannerType));
return new RuleStorageScanner(scanners);
this.scanner = new RuleStorageScanner(scanners);
return this.scanner;
}

/**
Expand All @@ -81,30 +84,31 @@ export class RuleStorage {
* @param ignoreHost rules could be retrieved as host rules
*/
retrieveRule(storageIdx: number, ignoreHost = true): IRule | null {
const [listId, ruleIdx] = RuleStorageScanner.storageIdxToRuleListIdx(storageIdx);

const rule = this.getFromCache(listId, ruleIdx);
const rule = this.cache.get(storageIdx);
if (rule) {
return rule;
}

const [listId, ruleId] = this.scanner.getIds(storageIdx);

const list = this.listsMap.get(listId);

if (!list) {
logger.warn(`Failed to retrieve list ${listId}, should not happen in normal operation`);

return null;
}

const ruleText = list.retrieveRuleText(ruleIdx);
const ruleText = list.retrieveRuleText(ruleId);
if (!ruleText) {
logger.warn(`Failed to retrieve rule ${ruleIdx}, should not happen in normal operation`);
logger.warn(`Failed to retrieve rule ${ruleId}, should not happen in normal operation`);

return null;
}

const result = RuleFactory.createRule(ruleText, listId, false, false, ignoreHost);
if (result) {
this.saveToCache(listId, ruleIdx, result);
this.cache.set(storageIdx, result);
}

return result;
Expand Down Expand Up @@ -152,37 +156,6 @@ export class RuleStorage {
* Returns the size of the cache.
*/
getCacheSize(): number {
return Array.from(this.cache.values())
.reduce((acc, listCache) => acc + listCache.getSize(), 0);
}

/**
* Saves rule to cache
*
* @param listId
* @param ruleIdx
* @param rule
*/
private saveToCache(listId: number, ruleIdx: number, rule: IRule): void {
let listCache = this.cache.get(listId);
if (!listCache) {
listCache = new ListCache();
this.cache.set(listId, listCache);
}
listCache.set(ruleIdx, rule);
}

/**
* Retrieves rule form cache
*
* @param listId
* @param ruleIdx
*/
private getFromCache(listId: number, ruleIdx: number): IRule | undefined {
const listCache = this.cache.get(listId);
if (!listCache) {
return undefined;
}
return listCache.get(ruleIdx);
return this.cache.size;
}
}
18 changes: 18 additions & 0 deletions packages/tsurlfilter/src/filterlist/scanner/rule-scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,24 @@ export class RuleScanner {
return null;
}

/**
* Get filter list id.
*
* @returns List id.
*/
public getListId(): number {
return this.listId;
}

/**
* Get the length of the data read by the scanner.
*
* @returns Data length.
*/
public getDataLength(): number {
return this.reader.getDataLength();
}

/**
* Reads the next line and returns it.
*
Expand Down
101 changes: 79 additions & 22 deletions packages/tsurlfilter/src/filterlist/scanner/rule-storage-scanner.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
import { RuleScanner } from './rule-scanner';
import { IndexedStorageRule } from '../../rules/rule';
import { LIST_ID_MAX_VALUE } from '../rule-list';

/**
* RuleStorageScanner scans multiple RuleScanner instances
* The rule index is built from the rule index in the list + the list ID
* In the index number we consider decimal part as listId and integer part as ruleId
* RuleStorageScanner scans multiple RuleScanner instances.
* The storage index is built from the rule position in the list
* + cumulative offset value of filter lists.
*/
export class RuleStorageScanner {
/**
* Scanners is the list of list scanners backing this combined scanner
*/
private readonly scanners: RuleScanner[];
declare private readonly scanners: RuleScanner[];

/**
* Cursor for the {@link listOffsetEntries}
*/
declare private listOffsetEntriesCursor: number;

/**
* Mapping between list ID and the offset in the storage
*/
declare private listOffsetEntries: Uint32Array;

/**
* Current scanner
Expand All @@ -23,13 +32,20 @@ export class RuleStorageScanner {
*/
private currentScannerIdx = -1;

/**
* Rule position in the concatenated filter lists data
*/
private storageOffset = 0;

/**
* Constructor
*
* @param scanners
*/
constructor(scanners: RuleScanner[]) {
this.scanners = scanners;
this.listOffsetEntriesCursor = 0;
this.listOffsetEntries = new Uint32Array(scanners.length * 2);
}

/**
Expand All @@ -47,6 +63,7 @@ export class RuleStorageScanner {
if (!this.currentScanner) {
this.currentScannerIdx = 0;
this.currentScanner = this.scanners[this.currentScannerIdx];
this.setListOffset(this.currentScanner.getListId(), this.storageOffset);
}

while (true) {
Expand All @@ -59,15 +76,22 @@ export class RuleStorageScanner {
return false;
}

// Accumulate the length of the current scanner before moving to the next one
this.storageOffset += this.currentScanner.getDataLength();

// Take the next scanner
this.currentScannerIdx += 1;
this.currentScanner = this.scanners[this.currentScannerIdx];

// Store the offset for the next scanner
this.setListOffset(this.currentScanner.getListId(), this.storageOffset);
}
}

/**
* Rule returns the most recent rule generated by a call to Scan, and the index of this rule.
* See ruleListIdxToStorageIdx for more information on what this index is.
*/
*/
public getRule(): IndexedStorageRule | null {
if (!this.currentScanner) {
return null;
Expand All @@ -78,33 +102,66 @@ export class RuleStorageScanner {
return null;
}

const index = RuleStorageScanner.ruleListIdxToStorageIdx(rule.rule.getFilterListId(), rule.index);
const offset = this.getListOffset(rule.rule.getFilterListId())!;
const index = offset + rule.index;
return new IndexedStorageRule(rule.rule, index);
}

/**
* ruleListIdxToStorageIdx converts pair of listID and rule list index
* to "storage index" number
* Match storage index with range of filter list offset
* and return list id and local filter list position.
*
* @param listId
* @param ruleIdx
* @param storageIdx rule position in concatenated filter lists data
* @returns tuple of list id and local filter list position
*/
private static ruleListIdxToStorageIdx(listId: number, ruleIdx: number): number {
return listId / LIST_ID_MAX_VALUE + ruleIdx;
public getIds(storageIdx: number): [number, number] {
const listOffsetEntriesLength = this.listOffsetEntries.length;

if (listOffsetEntriesLength === 2) {
return [this.listOffsetEntries[0], storageIdx - this.listOffsetEntries[1]];
}

for (let i = 1; i < listOffsetEntriesLength - 2; i += 2) {
const offset = this.listOffsetEntries[i];
const nextOffset = this.listOffsetEntries[i + 2];
if (storageIdx >= offset && storageIdx < nextOffset) {
return [this.listOffsetEntries[i - 1], storageIdx - offset];
}
}

return [
this.listOffsetEntries[listOffsetEntriesLength - 2],
storageIdx - this.listOffsetEntries[listOffsetEntriesLength - 1],
];
}

/**
* Converts the "storage index" to two integers:
* listID -- rule list identifier
* ruleIdx -- index of the rule in the list
* Push listID and offset to the {@link listOffsetEntries}.
*
* @param storageIdx
* @return [listId, ruleIdx]
* @param listId Filter list id.
* @param offset Filter list offset position in the storage.
*/
public static storageIdxToRuleListIdx(storageIdx: number): [number, number] {
const listId = Math.round((storageIdx % 1) * LIST_ID_MAX_VALUE);
const ruleIdx = Math.trunc(storageIdx);
private setListOffset(listId: number, offset: number): void {
this.listOffsetEntries[this.listOffsetEntriesCursor] = listId;
this.listOffsetEntriesCursor += 1;
this.listOffsetEntries[this.listOffsetEntriesCursor] = offset;
this.listOffsetEntriesCursor += 1;
}

/**
* Get filter list offset by list id.
*
* @param listId Filter list id.
* @returns offset position in the storage.
* @throws Error if listId is not found.
*/
private getListOffset(listId: number): number {
for (let i = 0; i < this.listOffsetEntries.length; i += 2) {
if (this.listOffsetEntries[i] === listId) {
return this.listOffsetEntries[i + 1];
}
}

return [listId, ruleIdx];
throw new Error(`listId ${listId} not found in the storage`);
}
}
Loading

0 comments on commit f2952e1

Please sign in to comment.