Skip to content

Commit

Permalink
Search for WITH operator in loose classifier
Browse files Browse the repository at this point in the history
Until now, the "loose" classifier handled licensing exceptions
by matching the end of the licence string against a list of acceptable
suffixes, each of those containing a variation of the WITH operator -
like " WITH " or "-with-". This was rather inelegant.

This commit changes the behaviour of the classifier to search for the
presence of the WITH operator within the licence string. Should it be
found, the string is split into the licence name and exception name.
Then, the exception name can be checked against a (slightly shorter)
list of acceptable suffixes.
  • Loading branch information
suve committed Jan 5, 2024
1 parent bea8cee commit f049433
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 52 deletions.
118 changes: 72 additions & 46 deletions src/classifier-loose.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* vrms-rpm - list non-free packages on an rpm-based Linux distribution
* Copyright (C) 2018, 2020-2023 suve (a.k.a. Artur Frenszek-Iwicki)
* Copyright (C) 2018, 2020-2024 suve (a.k.a. Artur Frenszek-Iwicki)
* Copyright (C) 2018 Marcin "dextero" Radomski
*
* This program is free software: you can redistribute it and/or modify
Expand Down Expand Up @@ -31,56 +31,82 @@ struct LooseClassifier {
struct ReBuffer *nodeBuf;
};

// TODO: Convert the "suffix" code here to work similarly to the SPDX classifier.
// Instead of trying all suffixes against the end of the string,
// search for " WITH " or "-with-" first, slice the string
// and then check if the suffix is on the list.
// Try to find the WITH operator. The operator is matched in a case-insensitive
// manner, and can be surrounded by either spaces or hyphens.
//
// The string is traversed in reverse because otherwise input like:
// "Licence name with spaces with extra permissions"
// would be split into "Licence name" and "spaces with extra permissions"
// instead of "Licence name with spaces" and "extra permissions".
static char* find_WITH_operator(char *licence) {
const char matchChars[] = " HTIW ";
const char altChars[] = "-htiw-";

const size_t len = strlen(licence);
if(len == 0) return NULL; // Better safe than sorry
size_t pos = len - 1;

int charsMatched = 0;
while(1) {
char c = licence[pos];
if((c == matchChars[charsMatched]) || (c == altChars[charsMatched])) {
++charsMatched;
if(charsMatched == 6) { // " WITH " or "-with-" or whatever -> 6 chars
return licence + pos;
}
} else {
charsMatched = 0;
}

if(pos == 0) return NULL;
--pos;
}
}

static int is_free(const struct LicenceData *data, char *licence) {
// TODO: Multi-word suffixes appear in the list twice: first using
// a space-separated form, and then a hyphen-seperated one.
// Devise some mechanism to get rid of this duplication.
const char *suffixes[] = {
" with acknowledgement",
" with advertising",
" with additional permissions",
" with attribution",
" with exception",
" with exceptions",
" with font exception",
" with GCC exception",
" with linking exception",
" with plugin exception",
"-with-acknowledgement",
"-with-advertising",
"-with-additional-permissions",
"-with-attribution",
"-with-exception",
"-with-exceptions",
"-with-font-exception",
"-with-GCC-exception",
"-with-linking-exception",
"-with-plugin-exception",
"acknowledgement",
"advertising",
"additional permissions",
"additional-permissions",
"attribution",
"exception",
"exceptions",
"font exception",
"font-exception",
"GCC exception",
"GCC-exception",
"linking exception",
"linking-exception",
"plugin exception",
"plugin-exception",
(const char*)NULL
};
int search = licences_find(data, licence);
if(search >= 0) return 1;

// See if the licence ends with an acceptable suffix.
// This allows us some flexibility when it comes to classifying licences.
for(const char **suf = suffixes; *suf != NULL; ++suf) {
char *sufpos = str_ends_with(licence, *suf);
if(sufpos == NULL) continue;

const char oldchar = *sufpos;
*sufpos = '\0';

search = licences_find(data, licence);
*sufpos = oldchar;

// It's not possible for a licence string to have two valid suffixes,
// so we can return now, without looking through the rest of the suffixes.
return search >= 0;

char *const with = find_WITH_operator(licence);
if(with != NULL) {
char *const past_with = with + 6; // Skip " WITH "
for(const char **suf = suffixes; *suf != NULL; ++suf) {
if(strcmp(past_with, *suf) != 0) continue;

// Store the character appearing before the "WITH" operator.
// We allow both spaces and hyphens, so we must remember which one was it.
const char oldChar = *with;

*with = '\0'; // Trim the licence string before lookup
const int search = licences_find(data, licence);
*with = oldChar; // Restore old char

// It's not possible for a licence string to have two valid suffixes,
// so we can return now, without looking through the rest of the suffixes.
return search >= 0;
}
}
return 0;

return licences_find(data, licence) >= 0;
}

static int is_opening_paren(const char *str) {
Expand Down
17 changes: 11 additions & 6 deletions test/classifier-loose.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* vrms-rpm - list non-free packages on an rpm-based Linux distribution
* Copyright (C) 2021-2023 suve (a.k.a. Artur Frenszek-Iwicki)
* Copyright (C) 2021-2024 suve (a.k.a. Artur Frenszek-Iwicki)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 3,
Expand Down Expand Up @@ -421,11 +421,8 @@ void test__looseClassifier_acceptable_suffixes(void **state) {
make_ltn_simple(expected, 1, "Awesome with additional permissions");
test_licence("Awesome with additional permissions", expected);
}
{
struct LicenceTreeNode *expected;
make_ltn_simple(expected, 1, "Long name with spaces with linking exception");
test_licence("Long name with spaces with linking exception", expected);
}

// Bad licences with licensing exceptions are still bad.
{
struct LicenceTreeNode *expected;
make_ltn_simple(expected, 0, "Bad with acknowledgement");
Expand All @@ -441,6 +438,14 @@ void test__looseClassifier_acceptable_suffixes(void **state) {
make_ltn_simple(expected, 0, "Bad with linking exception");
test_licence("Bad with linking exception", expected);
}

// If the licence name contains the word "with", the split into
// licence and exception parts must not occur at the initial "with".
{
struct LicenceTreeNode *expected;
make_ltn_simple(expected, 1, "Long name with spaces with linking exception");
test_licence("Long name with spaces with linking exception", expected);
}
}

// Test some licence strings with mismatched parentheses.
Expand Down

0 comments on commit f049433

Please sign in to comment.