Skip to content

Commit

Permalink
Merge branch 'rs/pcre2-utf'
Browse files Browse the repository at this point in the history
"git grep --perl-regexp" failed to match UTF-8 characters with
wildcard when the pattern consists only of ASCII letters, which has
been corrected.

* rs/pcre2-utf:
  grep/pcre2: factor out literal variable
  grep/pcre2: use PCRE2_UTF even with ASCII patterns
  • Loading branch information
gitster committed Jan 5, 2022
2 parents 5d522cd + 32e3e8b commit c91b0b7
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
4 changes: 2 additions & 2 deletions grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
int jitret;
int patinforet;
size_t jitsizearg;
int literal = !opt->ignore_case && (p->fixed || p->is_fixed);

/*
* Call pcre2_general_context_create() before calling any
Expand All @@ -382,8 +383,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
options |= PCRE2_CASELESS;
}
if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
!(!opt->ignore_case && (p->fixed || p->is_fixed)))
if (!opt->ignore_locale && is_utf8_locale() && !literal)
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);

#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER
Expand Down
6 changes: 6 additions & 0 deletions t/t7812-grep-icase-non-ascii.sh
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,10 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE2,PCRE2_MATCH_INVALID_UTF 'PCRE v2: gr
test_cmp invalid-0xe5 actual
'

test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-literal ASCII from UTF-8' '
git grep --perl-regexp -h -o -e ll. file >actual &&
echo "lló" >expected &&
test_cmp expected actual
'

test_done

0 comments on commit c91b0b7

Please sign in to comment.