From a97a74686d70a318cd802003498054cc1e8b0ae2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:21:57 +0200 Subject: [PATCH 01/14] Introduce commit notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit notes are blobs which are shown together with the commit message. These blobs are taken from the notes ref, which you can configure by the config variable core.notesRef, which in turn can be overridden by the environment variable GIT_NOTES_REF. The notes ref is a branch which contains "files" whose names are the names of the corresponding commits (i.e. the SHA-1). The rationale for putting this information into a ref is this: we want to be able to fetch and possibly union-merge the notes, maybe even look at the date when a note was introduced, and we want to store them efficiently together with the other objects. This patch has been improved by the following contributions: - Thomas Rast: fix core.notesRef documentation - Tor Arne Vestbø: fix printing of multi-line notes - Alex Riesen: Using char array instead of char pointer costs less BSS - Johan Herland: Plug leak when msg is good, but msglen or type causes return Signed-off-by: Johannes Schindelin Signed-off-by: Thomas Rast Signed-off-by: Tor Arne Vestbø Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano get_commit_notes(): Plug memory leak when 'if' triggers, but not because of read_sha1_file() failure --- Documentation/config.txt | 13 ++++++++ Makefile | 2 ++ cache.h | 4 +++ commit.c | 1 + config.c | 5 +++ environment.c | 1 + notes.c | 70 ++++++++++++++++++++++++++++++++++++++++ notes.h | 7 ++++ pretty.c | 5 +++ 9 files changed, 108 insertions(+) create mode 100644 notes.c create mode 100644 notes.h diff --git a/Documentation/config.txt b/Documentation/config.txt index cd1781498eb92d..57d64e4d990bc6 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -454,6 +454,19 @@ On some file system/operating system combinations, this is unreliable. Set this config setting to 'rename' there; However, This will remove the check that makes sure that existing object files will not get overwritten. +core.notesRef:: + When showing commit messages, also show notes which are stored in + the given ref. This ref is expected to contain files named + after the full SHA-1 of the commit they annotate. ++ +If such a file exists in the given ref, the referenced blob is read, and +appended to the commit message, separated by a "Notes:" line. If the +given ref itself does not exist, it is not an error, but means that no +notes should be printed. ++ +This setting defaults to "refs/notes/commits", and can be overridden by +the `GIT_NOTES_REF` environment variable. + add.ignore-errors:: Tells 'git-add' to continue adding files when some files cannot be added due to indexing errors. Equivalent to the '--ignore-errors' diff --git a/Makefile b/Makefile index fea237bc80978c..9a6a729868d891 100644 --- a/Makefile +++ b/Makefile @@ -432,6 +432,7 @@ LIB_H += ll-merge.h LIB_H += log-tree.h LIB_H += mailmap.h LIB_H += merge-recursive.h +LIB_H += notes.h LIB_H += object.h LIB_H += pack.h LIB_H += pack-refs.h @@ -516,6 +517,7 @@ LIB_OBJS += match-trees.o LIB_OBJS += merge-file.o LIB_OBJS += merge-recursive.o LIB_OBJS += name-hash.o +LIB_OBJS += notes.o LIB_OBJS += object.o LIB_OBJS += pack-check.o LIB_OBJS += pack-refs.o diff --git a/cache.h b/cache.h index a5eeead1e27552..4f0ddeca11b4bd 100644 --- a/cache.h +++ b/cache.h @@ -372,6 +372,8 @@ static inline enum object_type object_type(unsigned int mode) #define GITATTRIBUTES_FILE ".gitattributes" #define INFOATTRIBUTES_FILE "info/attributes" #define ATTRIBUTE_MACRO_PREFIX "[attr]" +#define GIT_NOTES_REF_ENVIRONMENT "GIT_NOTES_REF" +#define GIT_NOTES_DEFAULT_REF "refs/notes/commits" extern int is_bare_repository_cfg; extern int is_bare_repository(void); @@ -567,6 +569,8 @@ enum object_creation_mode { extern enum object_creation_mode object_creation_mode; +extern char *notes_ref_name; + extern int grafts_replace_parents; #define GIT_REPO_VERSION 0 diff --git a/commit.c b/commit.c index fedbd5e5267ec4..5ade8edf81cb88 100644 --- a/commit.c +++ b/commit.c @@ -5,6 +5,7 @@ #include "utf8.h" #include "diff.h" #include "revision.h" +#include "notes.h" int save_commit_buffer = 1; diff --git a/config.c b/config.c index c64406113686e8..51f22088e7e20c 100644 --- a/config.c +++ b/config.c @@ -467,6 +467,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.notesref")) { + notes_ref_name = xstrdup(value); + return 0; + } + if (!strcmp(var, "core.pager")) return git_config_string(&pager_program, var, value); diff --git a/environment.c b/environment.c index 5de683784039f2..571ab56b7e58e2 100644 --- a/environment.c +++ b/environment.c @@ -49,6 +49,7 @@ enum push_default_type push_default = PUSH_DEFAULT_MATCHING; #define OBJECT_CREATION_MODE OBJECT_CREATION_USES_HARDLINKS #endif enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; +char *notes_ref_name; int grafts_replace_parents = 1; /* Parallel index stat data preload? */ diff --git a/notes.c b/notes.c new file mode 100644 index 00000000000000..66379ffd22ea2d --- /dev/null +++ b/notes.c @@ -0,0 +1,70 @@ +#include "cache.h" +#include "commit.h" +#include "notes.h" +#include "refs.h" +#include "utf8.h" +#include "strbuf.h" + +static int initialized; + +void get_commit_notes(const struct commit *commit, struct strbuf *sb, + const char *output_encoding) +{ + static const char utf8[] = "utf-8"; + struct strbuf name = STRBUF_INIT; + unsigned char sha1[20]; + char *msg, *msg_p; + unsigned long linelen, msglen; + enum object_type type; + + if (!initialized) { + const char *env = getenv(GIT_NOTES_REF_ENVIRONMENT); + if (env) + notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); + else if (!notes_ref_name) + notes_ref_name = GIT_NOTES_DEFAULT_REF; + if (notes_ref_name && read_ref(notes_ref_name, sha1)) + notes_ref_name = NULL; + initialized = 1; + } + + if (!notes_ref_name) + return; + + strbuf_addf(&name, "%s:%s", notes_ref_name, + sha1_to_hex(commit->object.sha1)); + if (get_sha1(name.buf, sha1)) + return; + + if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen || + type != OBJ_BLOB) { + free(msg); + return; + } + + if (output_encoding && *output_encoding && + strcmp(utf8, output_encoding)) { + char *reencoded = reencode_string(msg, output_encoding, utf8); + if (reencoded) { + free(msg); + msg = reencoded; + msglen = strlen(msg); + } + } + + /* we will end the annotation by a newline anyway */ + if (msglen && msg[msglen - 1] == '\n') + msglen--; + + strbuf_addstr(sb, "\nNotes:\n"); + + for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) { + linelen = strchrnul(msg_p, '\n') - msg_p; + + strbuf_addstr(sb, " "); + strbuf_add(sb, msg_p, linelen); + strbuf_addch(sb, '\n'); + } + + free(msg); +} diff --git a/notes.h b/notes.h new file mode 100644 index 00000000000000..79d21b65f56d23 --- /dev/null +++ b/notes.h @@ -0,0 +1,7 @@ +#ifndef NOTES_H +#define NOTES_H + +void get_commit_notes(const struct commit *commit, struct strbuf *sb, + const char *output_encoding); + +#endif diff --git a/pretty.c b/pretty.c index f5983f8baa98b6..e25db81eaa531b 100644 --- a/pretty.c +++ b/pretty.c @@ -6,6 +6,7 @@ #include "string-list.h" #include "mailmap.h" #include "log-tree.h" +#include "notes.h" #include "color.h" static char *user_format; @@ -975,5 +976,9 @@ void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, */ if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); + + if (fmt != CMIT_FMT_ONELINE) + get_commit_notes(commit, sb, encoding); + free(reencoded); } From 65d9fb487f36d4a12a169dc18cbbb5225337c085 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:21:58 +0200 Subject: [PATCH 02/14] Add a script to edit/inspect notes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The script 'git notes' allows you to edit and show commit notes, by calling either git notes show or git notes edit This patch has been improved by the following contributions: - Tor Arne Vestbø: fix printing of multi-line notes - Michael J Gruber: test and handle empty notes gracefully - Thomas Rast: - only clean up message file when editing - use GIT_EDITOR and core.editor over VISUAL/EDITOR - t3301: fix confusing quoting in test for valid notes ref - t3301: use test_must_fail instead of ! - refuse to edit notes outside refs/notes/ - Junio C Hamano: tests: fix "export var=val" - Christian Couder: documentation: fix 'linkgit' macro in "git-notes.txt" - Johan Herland: minor cleanup and bugfixing in git-notes.sh (v2) Signed-off-by: Johannes Schindelin Signed-off-by: Tor Arne Vestbø Signed-off-by: Michael J Gruber Signed-off-by: Thomas Rast Signed-off-by: Christian Couder Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- .gitignore | 1 + Documentation/git-notes.txt | 46 +++++++++++++++ Makefile | 1 + command-list.txt | 1 + git-notes.sh | 73 +++++++++++++++++++++++ t/t3301-notes.sh | 114 ++++++++++++++++++++++++++++++++++++ 6 files changed, 236 insertions(+) create mode 100644 Documentation/git-notes.txt create mode 100755 git-notes.sh create mode 100755 t/t3301-notes.sh diff --git a/.gitignore b/.gitignore index 51a37b1af7ab92..cbafa64b44905e 100644 --- a/.gitignore +++ b/.gitignore @@ -86,6 +86,7 @@ git-mktag git-mktree git-name-rev git-mv +git-notes git-pack-redundant git-pack-objects git-pack-refs diff --git a/Documentation/git-notes.txt b/Documentation/git-notes.txt new file mode 100644 index 00000000000000..7136016c7410b2 --- /dev/null +++ b/Documentation/git-notes.txt @@ -0,0 +1,46 @@ +git-notes(1) +============ + +NAME +---- +git-notes - Add/inspect commit notes + +SYNOPSIS +-------- +[verse] +'git-notes' (edit | show) [commit] + +DESCRIPTION +----------- +This command allows you to add notes to commit messages, without +changing the commit. To discern these notes from the message stored +in the commit object, the notes are indented like the message, after +an unindented line saying "Notes:". + +To disable commit notes, you have to set the config variable +core.notesRef to the empty string. Alternatively, you can set it +to a different ref, something like "refs/notes/bugzilla". This setting +can be overridden by the environment variable "GIT_NOTES_REF". + + +SUBCOMMANDS +----------- + +edit:: + Edit the notes for a given commit (defaults to HEAD). + +show:: + Show the notes for a given commit (defaults to HEAD). + + +Author +------ +Written by Johannes Schindelin + +Documentation +------------- +Documentation by Johannes Schindelin + +GIT +--- +Part of the linkgit:git[7] suite diff --git a/Makefile b/Makefile index 9a6a729868d891..8d7cec79d9097c 100644 --- a/Makefile +++ b/Makefile @@ -321,6 +321,7 @@ SCRIPT_SH += git-merge-one-file.sh SCRIPT_SH += git-merge-resolve.sh SCRIPT_SH += git-mergetool.sh SCRIPT_SH += git-mergetool--lib.sh +SCRIPT_SH += git-notes.sh SCRIPT_SH += git-parse-remote.sh SCRIPT_SH += git-pull.sh SCRIPT_SH += git-quiltimport.sh diff --git a/command-list.txt b/command-list.txt index fb03a2ebb5d51f..4296941b68f981 100644 --- a/command-list.txt +++ b/command-list.txt @@ -74,6 +74,7 @@ git-mktag plumbingmanipulators git-mktree plumbingmanipulators git-mv mainporcelain common git-name-rev plumbinginterrogators +git-notes mainporcelain git-pack-objects plumbingmanipulators git-pack-redundant plumbinginterrogators git-pack-refs ancillarymanipulators diff --git a/git-notes.sh b/git-notes.sh new file mode 100755 index 00000000000000..f06c2549c48971 --- /dev/null +++ b/git-notes.sh @@ -0,0 +1,73 @@ +#!/bin/sh + +USAGE="(edit | show) [commit]" +. git-sh-setup + +test -n "$3" && usage + +test -z "$1" && usage +ACTION="$1"; shift + +test -z "$GIT_NOTES_REF" && GIT_NOTES_REF="$(git config core.notesref)" +test -z "$GIT_NOTES_REF" && GIT_NOTES_REF="refs/notes/commits" + +COMMIT=$(git rev-parse --verify --default HEAD "$@") || +die "Invalid commit: $@" + +case "$ACTION" in +edit) + if [ "${GIT_NOTES_REF#refs/notes/}" = "$GIT_NOTES_REF" ]; then + die "Refusing to edit notes in $GIT_NOTES_REF (outside of refs/notes/)" + fi + + MSG_FILE="$GIT_DIR/new-notes-$COMMIT" + GIT_INDEX_FILE="$MSG_FILE.idx" + export GIT_INDEX_FILE + + trap ' + test -f "$MSG_FILE" && rm "$MSG_FILE" + test -f "$GIT_INDEX_FILE" && rm "$GIT_INDEX_FILE" + ' 0 + + GIT_NOTES_REF= git log -1 $COMMIT | sed "s/^/#/" > "$MSG_FILE" + + CURRENT_HEAD=$(git show-ref "$GIT_NOTES_REF" | cut -f 1 -d ' ') + if [ -z "$CURRENT_HEAD" ]; then + PARENT= + else + PARENT="-p $CURRENT_HEAD" + git read-tree "$GIT_NOTES_REF" || die "Could not read index" + git cat-file blob :$COMMIT >> "$MSG_FILE" 2> /dev/null + fi + + core_editor="$(git config core.editor)" + ${GIT_EDITOR:-${core_editor:-${VISUAL:-${EDITOR:-vi}}}} "$MSG_FILE" + + grep -v ^# < "$MSG_FILE" | git stripspace > "$MSG_FILE".processed + mv "$MSG_FILE".processed "$MSG_FILE" + if [ -s "$MSG_FILE" ]; then + BLOB=$(git hash-object -w "$MSG_FILE") || + die "Could not write into object database" + git update-index --add --cacheinfo 0644 $BLOB $COMMIT || + die "Could not write index" + else + test -z "$CURRENT_HEAD" && + die "Will not initialise with empty tree" + git update-index --force-remove $COMMIT || + die "Could not update index" + fi + + TREE=$(git write-tree) || die "Could not write tree" + NEW_HEAD=$(echo Annotate $COMMIT | git commit-tree $TREE $PARENT) || + die "Could not annotate" + git update-ref -m "Annotate $COMMIT" \ + "$GIT_NOTES_REF" $NEW_HEAD $CURRENT_HEAD +;; +show) + git rev-parse -q --verify "$GIT_NOTES_REF":$COMMIT > /dev/null || + die "No note for commit $COMMIT." + git show "$GIT_NOTES_REF":$COMMIT +;; +*) + usage +esac diff --git a/t/t3301-notes.sh b/t/t3301-notes.sh new file mode 100755 index 00000000000000..73e53be40b1a94 --- /dev/null +++ b/t/t3301-notes.sh @@ -0,0 +1,114 @@ +#!/bin/sh +# +# Copyright (c) 2007 Johannes E. Schindelin +# + +test_description='Test commit notes' + +. ./test-lib.sh + +cat > fake_editor.sh << \EOF +echo "$MSG" > "$1" +echo "$MSG" >& 2 +EOF +chmod a+x fake_editor.sh +VISUAL=./fake_editor.sh +export VISUAL + +test_expect_success 'cannot annotate non-existing HEAD' ' + (MSG=3 && export MSG && test_must_fail git notes edit) +' + +test_expect_success setup ' + : > a1 && + git add a1 && + test_tick && + git commit -m 1st && + : > a2 && + git add a2 && + test_tick && + git commit -m 2nd +' + +test_expect_success 'need valid notes ref' ' + (MSG=1 GIT_NOTES_REF=/ && export MSG GIT_NOTES_REF && + test_must_fail git notes edit) && + (MSG=2 GIT_NOTES_REF=/ && export MSG GIT_NOTES_REF && + test_must_fail git notes show) +' + +test_expect_success 'refusing to edit in refs/heads/' ' + (MSG=1 GIT_NOTES_REF=refs/heads/bogus && + export MSG GIT_NOTES_REF && + test_must_fail git notes edit) +' + +test_expect_success 'refusing to edit in refs/remotes/' ' + (MSG=1 GIT_NOTES_REF=refs/remotes/bogus && + export MSG GIT_NOTES_REF && + test_must_fail git notes edit) +' + +# 1 indicates caught gracefully by die, 128 means git-show barked +test_expect_success 'handle empty notes gracefully' ' + git notes show ; test 1 = $? +' + +test_expect_success 'create notes' ' + git config core.notesRef refs/notes/commits && + MSG=b1 git notes edit && + test ! -f .git/new-notes && + test 1 = $(git ls-tree refs/notes/commits | wc -l) && + test b1 = $(git notes show) && + git show HEAD^ && + test_must_fail git notes show HEAD^ +' + +cat > expect << EOF +commit 268048bfb8a1fb38e703baceb8ab235421bf80c5 +Author: A U Thor +Date: Thu Apr 7 15:14:13 2005 -0700 + + 2nd + +Notes: + b1 +EOF + +test_expect_success 'show notes' ' + ! (git cat-file commit HEAD | grep b1) && + git log -1 > output && + test_cmp expect output +' +test_expect_success 'create multi-line notes (setup)' ' + : > a3 && + git add a3 && + test_tick && + git commit -m 3rd && + MSG="b3 +c3c3c3c3 +d3d3d3" git notes edit +' + +cat > expect-multiline << EOF +commit 1584215f1d29c65e99c6c6848626553fdd07fd75 +Author: A U Thor +Date: Thu Apr 7 15:15:13 2005 -0700 + + 3rd + +Notes: + b3 + c3c3c3c3 + d3d3d3 +EOF + +printf "\n" >> expect-multiline +cat expect >> expect-multiline + +test_expect_success 'show multi-line notes' ' + git log -2 > output && + test_cmp expect-multiline output +' + +test_done From fd53c9eb445815696bf84c4701b9af73b5d7f50d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:21:59 +0200 Subject: [PATCH 03/14] Speed up git notes lookup To avoid looking up each and every commit in the notes ref's tree object, which is very expensive, speed things up by slurping the tree object's contents into a hash_map. The idea for the hashmap singleton is from David Reiss, initial benchmarking by Jeff King. Note: the implementation allows for arbitrary entries in the notes tree object, ignoring those that do not reference a valid object. This allows you to annotate arbitrary branches, or objects. This patch has been improved by the following contributions: - Junio C Hamano: fixed an obvious error in initialize_hash_map() Signed-off-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/notes.c b/notes.c index 66379ffd22ea2d..2b66723f5fe52b 100644 --- a/notes.c +++ b/notes.c @@ -4,15 +4,112 @@ #include "refs.h" #include "utf8.h" #include "strbuf.h" +#include "tree-walk.h" + +struct entry { + unsigned char commit_sha1[20]; + unsigned char notes_sha1[20]; +}; + +struct hash_map { + struct entry *entries; + off_t count, size; +}; static int initialized; +static struct hash_map hash_map; + +static int hash_index(struct hash_map *map, const unsigned char *sha1) +{ + int i = ((*(unsigned int *)sha1) % map->size); + + for (;;) { + unsigned char *current = map->entries[i].commit_sha1; + + if (!hashcmp(sha1, current)) + return i; + + if (is_null_sha1(current)) + return -1 - i; + + if (++i == map->size) + i = 0; + } +} + +static void add_entry(const unsigned char *commit_sha1, + const unsigned char *notes_sha1) +{ + int index; + + if (hash_map.count + 1 > hash_map.size >> 1) { + int i, old_size = hash_map.size; + struct entry *old = hash_map.entries; + + hash_map.size = old_size ? old_size << 1 : 64; + hash_map.entries = (struct entry *) + xcalloc(sizeof(struct entry), hash_map.size); + + for (i = 0; i < old_size; i++) + if (!is_null_sha1(old[i].commit_sha1)) { + index = -1 - hash_index(&hash_map, + old[i].commit_sha1); + memcpy(hash_map.entries + index, old + i, + sizeof(struct entry)); + } + free(old); + } + + index = hash_index(&hash_map, commit_sha1); + if (index < 0) { + index = -1 - index; + hash_map.count++; + } + + hashcpy(hash_map.entries[index].commit_sha1, commit_sha1); + hashcpy(hash_map.entries[index].notes_sha1, notes_sha1); +} + +static void initialize_hash_map(const char *notes_ref_name) +{ + unsigned char sha1[20], commit_sha1[20]; + unsigned mode; + struct tree_desc desc; + struct name_entry entry; + void *buf; + + if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) || + get_tree_entry(commit_sha1, "", sha1, &mode)) + return; + + buf = fill_tree_descriptor(&desc, sha1); + if (!buf) + die("Could not read %s for notes-index", sha1_to_hex(sha1)); + + while (tree_entry(&desc, &entry)) + if (!get_sha1(entry.path, commit_sha1)) + add_entry(commit_sha1, entry.sha1); + free(buf); +} + +static unsigned char *lookup_notes(const unsigned char *commit_sha1) +{ + int index; + + if (!hash_map.size) + return NULL; + + index = hash_index(&hash_map, commit_sha1); + if (index < 0) + return NULL; + return hash_map.entries[index].notes_sha1; +} void get_commit_notes(const struct commit *commit, struct strbuf *sb, const char *output_encoding) { static const char utf8[] = "utf-8"; - struct strbuf name = STRBUF_INIT; - unsigned char sha1[20]; + unsigned char *sha1; char *msg, *msg_p; unsigned long linelen, msglen; enum object_type type; @@ -23,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); else if (!notes_ref_name) notes_ref_name = GIT_NOTES_DEFAULT_REF; - if (notes_ref_name && read_ref(notes_ref_name, sha1)) - notes_ref_name = NULL; + initialize_hash_map(notes_ref_name); initialized = 1; } - if (!notes_ref_name) - return; - - strbuf_addf(&name, "%s:%s", notes_ref_name, - sha1_to_hex(commit->object.sha1)); - if (get_sha1(name.buf, sha1)) + sha1 = lookup_notes(commit->object.sha1); + if (!sha1) return; if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen || From a5b0c24f3e9a29f8fe496f49db82f2e3f1b687ce Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:22:00 +0200 Subject: [PATCH 04/14] Add an expensive test for git-notes git-notes have the potential of being pretty expensive, so test with a lot of commits. A lot. So to make things cheaper, you have to opt-in explicitely, by setting the environment variable GIT_NOTES_TIMING_TESTS. This patch has been improved by the following contributions: - Junio C Hamano: tests: fix "export var=val" Signed-off-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- t/t3302-notes-index-expensive.sh | 98 ++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100755 t/t3302-notes-index-expensive.sh diff --git a/t/t3302-notes-index-expensive.sh b/t/t3302-notes-index-expensive.sh new file mode 100755 index 00000000000000..0ef3e959595cbf --- /dev/null +++ b/t/t3302-notes-index-expensive.sh @@ -0,0 +1,98 @@ +#!/bin/sh +# +# Copyright (c) 2007 Johannes E. Schindelin +# + +test_description='Test commit notes index (expensive!)' + +. ./test-lib.sh + +test -z "$GIT_NOTES_TIMING_TESTS" && { + say Skipping timing tests + test_done + exit +} + +create_repo () { + number_of_commits=$1 + nr=0 + parent= + test -d .git || { + git init && + tree=$(git write-tree) && + while [ $nr -lt $number_of_commits ]; do + test_tick && + commit=$(echo $nr | git commit-tree $tree $parent) || + return + parent="-p $commit" + nr=$(($nr+1)) + done && + git update-ref refs/heads/master $commit && + { + GIT_INDEX_FILE=.git/temp; export GIT_INDEX_FILE; + git rev-list HEAD | cat -n | sed "s/^[ ][ ]*/ /g" | + while read nr sha1; do + blob=$(echo note $nr | git hash-object -w --stdin) && + echo $sha1 | sed "s/^/0644 $blob 0 /" + done | git update-index --index-info && + tree=$(git write-tree) && + test_tick && + commit=$(echo notes | git commit-tree $tree) && + git update-ref refs/notes/commits $commit + } && + git config core.notesRef refs/notes/commits + } +} + +test_notes () { + count=$1 && + git config core.notesRef refs/notes/commits && + git log | grep "^ " > output && + i=1 && + while [ $i -le $count ]; do + echo " $(($count-$i))" && + echo " note $i" && + i=$(($i+1)); + done > expect && + git diff expect output +} + +cat > time_notes << \EOF + mode=$1 + i=1 + while [ $i -lt $2 ]; do + case $1 in + no-notes) + GIT_NOTES_REF=non-existing; export GIT_NOTES_REF + ;; + notes) + unset GIT_NOTES_REF + ;; + esac + git log >/dev/null + i=$(($i+1)) + done +EOF + +time_notes () { + for mode in no-notes notes + do + echo $mode + /usr/bin/time sh ../time_notes $mode $1 + done +} + +for count in 10 100 1000 10000; do + + mkdir $count + (cd $count; + + test_expect_success "setup $count" "create_repo $count" + + test_expect_success 'notes work' "test_notes $count" + + test_expect_success 'notes timing' "time_notes 100" + ) +done + +test_done From d9246d4303f441c0e30614cd3a97a80fbe9354b6 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:01 +0200 Subject: [PATCH 05/14] Teach "-m " and "-F " to "git notes edit" The "-m" and "-F" options are already the established method (in both git-commit and git-tag) to specify a commit/tag message without invoking the editor. This patch teaches "git notes edit" to respect the same options for specifying a notes message without invoking the editor. Multiple "-m" and/or "-F" options are concatenated as separate paragraphs. The patch also updates the "git notes" documentation and adds selftests for the new functionality. Unfortunately, the added selftests include a couple of lines with trailing whitespace (without these the test will fail). This may cause git to warn about "whitespace errors". This patch has been improved by the following contributions: - Thomas Rast: fix trailing whitespace in t3301 Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- Documentation/git-notes.txt | 16 +++++++++- git-notes.sh | 64 ++++++++++++++++++++++++++++++++----- t/t3301-notes.sh | 36 +++++++++++++++++++++ 3 files changed, 107 insertions(+), 9 deletions(-) diff --git a/Documentation/git-notes.txt b/Documentation/git-notes.txt index 7136016c7410b2..94cceb13193795 100644 --- a/Documentation/git-notes.txt +++ b/Documentation/git-notes.txt @@ -8,7 +8,7 @@ git-notes - Add/inspect commit notes SYNOPSIS -------- [verse] -'git-notes' (edit | show) [commit] +'git-notes' (edit [-F | -m ] | show) [commit] DESCRIPTION ----------- @@ -33,6 +33,20 @@ show:: Show the notes for a given commit (defaults to HEAD). +OPTIONS +------- +-m :: + Use the given note message (instead of prompting). + If multiple `-m` (or `-F`) options are given, their + values are concatenated as separate paragraphs. + +-F :: + Take the note message from the given file. Use '-' to + read the note message from the standard input. + If multiple `-F` (or `-m`) options are given, their + values are concatenated as separate paragraphs. + + Author ------ Written by Johannes Schindelin diff --git a/git-notes.sh b/git-notes.sh index f06c2549c48971..e642e47d9f1a31 100755 --- a/git-notes.sh +++ b/git-notes.sh @@ -1,16 +1,59 @@ #!/bin/sh -USAGE="(edit | show) [commit]" +USAGE="(edit [-F | -m ] | show) [commit]" . git-sh-setup -test -n "$3" && usage - test -z "$1" && usage ACTION="$1"; shift test -z "$GIT_NOTES_REF" && GIT_NOTES_REF="$(git config core.notesref)" test -z "$GIT_NOTES_REF" && GIT_NOTES_REF="refs/notes/commits" +MESSAGE= +while test $# != 0 +do + case "$1" in + -m) + test "$ACTION" = "edit" || usage + shift + if test "$#" = "0"; then + die "error: option -m needs an argument" + else + if [ -z "$MESSAGE" ]; then + MESSAGE="$1" + else + MESSAGE="$MESSAGE + +$1" + fi + shift + fi + ;; + -F) + test "$ACTION" = "edit" || usage + shift + if test "$#" = "0"; then + die "error: option -F needs an argument" + else + if [ -z "$MESSAGE" ]; then + MESSAGE="$(cat "$1")" + else + MESSAGE="$MESSAGE + +$(cat "$1")" + fi + shift + fi + ;; + -*) + usage + ;; + *) + break + ;; + esac +done + COMMIT=$(git rev-parse --verify --default HEAD "$@") || die "Invalid commit: $@" @@ -29,19 +72,24 @@ edit) test -f "$GIT_INDEX_FILE" && rm "$GIT_INDEX_FILE" ' 0 - GIT_NOTES_REF= git log -1 $COMMIT | sed "s/^/#/" > "$MSG_FILE" - CURRENT_HEAD=$(git show-ref "$GIT_NOTES_REF" | cut -f 1 -d ' ') if [ -z "$CURRENT_HEAD" ]; then PARENT= else PARENT="-p $CURRENT_HEAD" git read-tree "$GIT_NOTES_REF" || die "Could not read index" - git cat-file blob :$COMMIT >> "$MSG_FILE" 2> /dev/null fi - core_editor="$(git config core.editor)" - ${GIT_EDITOR:-${core_editor:-${VISUAL:-${EDITOR:-vi}}}} "$MSG_FILE" + if [ -z "$MESSAGE" ]; then + GIT_NOTES_REF= git log -1 $COMMIT | sed "s/^/#/" > "$MSG_FILE" + if [ ! -z "$CURRENT_HEAD" ]; then + git cat-file blob :$COMMIT >> "$MSG_FILE" 2> /dev/null + fi + core_editor="$(git config core.editor)" + ${GIT_EDITOR:-${core_editor:-${VISUAL:-${EDITOR:-vi}}}} "$MSG_FILE" + else + echo "$MESSAGE" > "$MSG_FILE" + fi grep -v ^# < "$MSG_FILE" | git stripspace > "$MSG_FILE".processed mv "$MSG_FILE".processed "$MSG_FILE" diff --git a/t/t3301-notes.sh b/t/t3301-notes.sh index 73e53be40b1a94..1e34f4836f4840 100755 --- a/t/t3301-notes.sh +++ b/t/t3301-notes.sh @@ -110,5 +110,41 @@ test_expect_success 'show multi-line notes' ' git log -2 > output && test_cmp expect-multiline output ' +test_expect_success 'create -m and -F notes (setup)' ' + : > a4 && + git add a4 && + test_tick && + git commit -m 4th && + echo "xyzzy" > note5 && + git notes edit -m spam -F note5 -m "foo +bar +baz" +' + +whitespace=" " +cat > expect-m-and-F << EOF +commit 15023535574ded8b1a89052b32673f84cf9582b8 +Author: A U Thor +Date: Thu Apr 7 15:16:13 2005 -0700 + + 4th + +Notes: + spam +$whitespace + xyzzy +$whitespace + foo + bar + baz +EOF + +printf "\n" >> expect-m-and-F +cat expect-multiline >> expect-m-and-F + +test_expect_success 'show -m and -F notes' ' + git log -3 > output && + test_cmp expect-m-and-F output +' test_done From a8dd2e7d2bd7472b4e02b07aeef795de9c74f3e7 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:02 +0200 Subject: [PATCH 06/14] fast-import: Add support for importing commit notes Introduce a 'notemodify' subcommand of the 'commit' command. This subcommand is similar to 'filemodify', except that no mode is supplied (all notes have mode 0644), and the path is set to the hex SHA1 of the given "comittish". This enables fast import of note objects along with their associated commits, since the notes can now be named using the mark references of their corresponding commits. The patch also includes a test case of the added functionality. Signed-off-by: Johan Herland Acked-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 45 ++++++-- fast-import.c | 88 +++++++++++++++- t/t9300-fast-import.sh | 166 ++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 10 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index c2f483a8d2aed8..288032c7b88f2f 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -316,7 +316,7 @@ change to the project. data ('from' SP LF)? ('merge' SP LF)? - (filemodify | filedelete | filecopy | filerename | filedeleteall)* + (filemodify | filedelete | filecopy | filerename | filedeleteall | notemodify)* LF? .... @@ -339,14 +339,13 @@ commit message use a 0 length data. Commit messages are free-form and are not interpreted by Git. Currently they must be encoded in UTF-8, as fast-import does not permit other encodings to be specified. -Zero or more `filemodify`, `filedelete`, `filecopy`, `filerename` -and `filedeleteall` commands +Zero or more `filemodify`, `filedelete`, `filecopy`, `filerename`, +`filedeleteall` and `notemodify` commands may be included to update the contents of the branch prior to creating the commit. These commands may be supplied in any order. However it is recommended that a `filedeleteall` command precede -all `filemodify`, `filecopy` and `filerename` commands in the same -commit, as `filedeleteall` -wipes the branch clean (see below). +all `filemodify`, `filecopy`, `filerename` and `notemodify` commands in +the same commit, as `filedeleteall` wipes the branch clean (see below). The `LF` after the command is optional (it used to be required). @@ -595,6 +594,40 @@ more memory per active branch (less than 1 MiB for even most large projects); so frontends that can easily obtain only the affected paths for a commit are encouraged to do so. +`notemodify` +^^^^^^^^^^^^ +Included in a `commit` command to add a new note (annotating a given +commit) or change the content of an existing note. This command has +two different means of specifying the content of the note. + +External data format:: + The data content for the note was already supplied by a prior + `blob` command. The frontend just needs to connect it to the + commit that is to be annotated. ++ +.... + 'N' SP SP LF +.... ++ +Here `` can be either a mark reference (`:`) +set by a prior `blob` command, or a full 40-byte SHA-1 of an +existing Git blob object. + +Inline data format:: + The data content for the note has not been supplied yet. + The frontend wants to supply it as part of this modify + command. ++ +.... + 'N' SP 'inline' SP LF + data +.... ++ +See below for a detailed description of the `data` command. + +In both formats `` is any of the commit specification +expressions also accepted by `from` (see above). + `mark` ~~~~~~ Arranges for fast-import to save a reference to the current object, allowing diff --git a/fast-import.c b/fast-import.c index 6faaaacb68999d..b41d29fd31e47f 100644 --- a/fast-import.c +++ b/fast-import.c @@ -22,8 +22,8 @@ Format of STDIN stream: ('author' sp name sp '<' email '>' sp when lf)? 'committer' sp name sp '<' email '>' sp when lf commit_msg - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? - ('merge' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)* + ('from' sp committish lf)? + ('merge' sp committish lf)* file_change* lf?; commit_msg ::= data; @@ -41,15 +41,18 @@ Format of STDIN stream: file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; file_inm ::= 'M' sp mode sp 'inline' sp path_str lf data; + note_obm ::= 'N' sp (hexsha1 | idnum) sp committish lf; + note_inm ::= 'N' sp 'inline' sp committish lf + data; new_tag ::= 'tag' sp tag_str lf - 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf + 'from' sp committish lf ('tagger' sp name sp '<' email '>' sp when lf)? tag_msg; tag_msg ::= data; reset_branch ::= 'reset' sp ref_str lf - ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)? + ('from' sp committish lf)? lf?; checkpoint ::= 'checkpoint' lf @@ -88,6 +91,7 @@ Format of STDIN stream: # stream formatting is: \, " and LF. Otherwise these values # are UTF8. # + committish ::= (ref_str | hexsha1 | sha1exp_str | idnum); ref_str ::= ref; sha1exp_str ::= sha1exp; tag_str ::= tag; @@ -2006,6 +2010,80 @@ static void file_change_cr(struct branch *b, int rename) leaf.tree); } +static void note_change_n(struct branch *b) +{ + const char *p = command_buf.buf + 2; + static struct strbuf uq = STRBUF_INIT; + struct object_entry *oe = oe; + struct branch *s; + unsigned char sha1[20], commit_sha1[20]; + uint16_t inline_data = 0; + + /* or 'inline' */ + if (*p == ':') { + char *x; + oe = find_mark(strtoumax(p + 1, &x, 10)); + hashcpy(sha1, oe->sha1); + p = x; + } else if (!prefixcmp(p, "inline")) { + inline_data = 1; + p += 6; + } else { + if (get_sha1_hex(p, sha1)) + die("Invalid SHA1: %s", command_buf.buf); + oe = find_object(sha1); + p += 40; + } + if (*p++ != ' ') + die("Missing space after SHA1: %s", command_buf.buf); + + /* */ + s = lookup_branch(p); + if (s) { + hashcpy(commit_sha1, s->sha1); + } else if (*p == ':') { + uintmax_t commit_mark = strtoumax(p + 1, NULL, 10); + struct object_entry *commit_oe = find_mark(commit_mark); + if (commit_oe->type != OBJ_COMMIT) + die("Mark :%" PRIuMAX " not a commit", commit_mark); + hashcpy(commit_sha1, commit_oe->sha1); + } else if (!get_sha1(p, commit_sha1)) { + unsigned long size; + char *buf = read_object_with_reference(commit_sha1, + commit_type, &size, commit_sha1); + if (!buf || size < 46) + die("Not a valid commit: %s", p); + free(buf); + } else + die("Invalid ref name or SHA1 expression: %s", p); + + if (inline_data) { + static struct strbuf buf = STRBUF_INIT; + + if (p != uq.buf) { + strbuf_addstr(&uq, p); + p = uq.buf; + } + read_next_command(); + parse_data(&buf); + store_object(OBJ_BLOB, &buf, &last_blob, sha1, 0); + } else if (oe) { + if (oe->type != OBJ_BLOB) + die("Not a blob (actually a %s): %s", + typename(oe->type), command_buf.buf); + } else { + enum object_type type = sha1_object_info(sha1, NULL); + if (type < 0) + die("Blob not found: %s", command_buf.buf); + if (type != OBJ_BLOB) + die("Not a blob (actually a %s): %s", + typename(type), command_buf.buf); + } + + tree_content_set(&b->branch_tree, sha1_to_hex(commit_sha1), sha1, + S_IFREG | 0644, NULL); +} + static void file_change_deleteall(struct branch *b) { release_tree_content_recursive(b->branch_tree.tree); @@ -2175,6 +2253,8 @@ static void parse_new_commit(void) file_change_cr(b, 1); else if (!prefixcmp(command_buf.buf, "C ")) file_change_cr(b, 0); + else if (!prefixcmp(command_buf.buf, "N ")) + note_change_n(b); else if (!strcmp("deleteall", command_buf.buf)) file_change_deleteall(b); else { diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 821be7ce8d92f8..b49815d10806a5 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -1088,4 +1088,170 @@ INPUT_END test_expect_success 'P: fail on blob mark in gitlink' ' test_must_fail git fast-import input < $GIT_COMMITTER_DATE +data < $GIT_COMMITTER_DATE +data < $GIT_COMMITTER_DATE +data < $GIT_COMMITTER_DATE +data <expect < $GIT_COMMITTER_DATE +committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + +first (:3) +EOF +test_expect_success \ + 'Q: verify first commit' \ + 'git cat-file commit notes-test~2 | sed 1d >actual && + test_cmp expect actual' + +cat >expect < $GIT_COMMITTER_DATE +committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + +second (:5) +EOF +test_expect_success \ + 'Q: verify second commit' \ + 'git cat-file commit notes-test^ | sed 1d >actual && + test_cmp expect actual' + +cat >expect < $GIT_COMMITTER_DATE +committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + +third (:6) +EOF +test_expect_success \ + 'Q: verify third commit' \ + 'git cat-file commit notes-test | sed 1d >actual && + test_cmp expect actual' + +cat >expect < $GIT_COMMITTER_DATE +committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + +notes (:9) +EOF +test_expect_success \ + 'Q: verify notes commit' \ + 'git cat-file commit refs/notes/foobar | sed 1d >actual && + test_cmp expect actual' + +cat >expect.unsorted <expect +test_expect_success \ + 'Q: verify notes tree' \ + 'git cat-file -p refs/notes/foobar^{tree} | sed "s/ [0-9a-f]* / /" >actual && + test_cmp expect actual' + +echo "$note1_data" >expect +test_expect_success \ + 'Q: verify note for first commit' \ + 'git cat-file blob refs/notes/foobar:$commit1 >actual && test_cmp expect actual' + +echo "$note2_data" >expect +test_expect_success \ + 'Q: verify note for second commit' \ + 'git cat-file blob refs/notes/foobar:$commit2 >actual && test_cmp expect actual' + +echo "$note3_data" >expect +test_expect_success \ + 'Q: verify note for third commit' \ + 'git cat-file blob refs/notes/foobar:$commit3 >actual && test_cmp expect actual' + test_done From 3ed24b6aaf35d6ca1eef2643cd0b9128eb152cda Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:03 +0200 Subject: [PATCH 07/14] t3302-notes-index-expensive: Speed up create_repo() Creating repos with 10/100/1000/10000 commits and notes takes a lot of time. However, using git-fast-import to do the job is a lot more efficient than using plumbing commands to do the same. This patch decreases the overall run-time of this test on my machine from ~3 to ~1 minutes. Signed-off-by: Johan Herland Acked-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- t/t3302-notes-index-expensive.sh | 74 ++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/t/t3302-notes-index-expensive.sh b/t/t3302-notes-index-expensive.sh index 0ef3e959595cbf..ee84fc4884676e 100755 --- a/t/t3302-notes-index-expensive.sh +++ b/t/t3302-notes-index-expensive.sh @@ -16,30 +16,50 @@ test -z "$GIT_NOTES_TIMING_TESTS" && { create_repo () { number_of_commits=$1 nr=0 - parent= test -d .git || { git init && - tree=$(git write-tree) && - while [ $nr -lt $number_of_commits ]; do - test_tick && - commit=$(echo $nr | git commit-tree $tree $parent) || - return - parent="-p $commit" - nr=$(($nr+1)) - done && - git update-ref refs/heads/master $commit && - { - GIT_INDEX_FILE=.git/temp; export GIT_INDEX_FILE; - git rev-list HEAD | cat -n | sed "s/^[ ][ ]*/ /g" | - while read nr sha1; do - blob=$(echo note $nr | git hash-object -w --stdin) && - echo $sha1 | sed "s/^/0644 $blob 0 /" - done | git update-index --index-info && - tree=$(git write-tree) && + ( + while [ $nr -lt $number_of_commits ]; do + nr=$(($nr+1)) + mark=$(($nr+$nr)) + notemark=$(($mark+1)) + test_tick && + cat < $GIT_COMMITTER_DATE +data <> note_commit + done && test_tick && - commit=$(echo notes | git commit-tree $tree) && - git update-ref refs/notes/commits $commit - } && + cat < $GIT_COMMITTER_DATE +data < output && - i=1 && - while [ $i -le $count ]; do - echo " $(($count-$i))" && - echo " note $i" && - i=$(($i+1)); + i=$count && + while [ $i -gt 0 ]; do + echo " commit #$i" && + echo " note for commit #$i" && + i=$(($i-1)); done > expect && - git diff expect output + test_cmp expect output } cat > time_notes << \EOF From c56fcc89b951f3e8c9240ea02676b2eef5417da6 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:04 +0200 Subject: [PATCH 08/14] Add flags to get_commit_notes() to control the format of the note string This patch adds the following flags to get_commit_notes() for adjusting the format of the produced note string: - NOTES_SHOW_HEADER: Print "Notes:" line before the notes contents - NOTES_INDENT: Indent notes contents by 4 spaces Suggested-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 8 +++++--- notes.h | 5 ++++- pretty.c | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/notes.c b/notes.c index 2b66723f5fe52b..b7d79e19005f3c 100644 --- a/notes.c +++ b/notes.c @@ -106,7 +106,7 @@ static unsigned char *lookup_notes(const unsigned char *commit_sha1) } void get_commit_notes(const struct commit *commit, struct strbuf *sb, - const char *output_encoding) + const char *output_encoding, int flags) { static const char utf8[] = "utf-8"; unsigned char *sha1; @@ -148,12 +148,14 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, if (msglen && msg[msglen - 1] == '\n') msglen--; - strbuf_addstr(sb, "\nNotes:\n"); + if (flags & NOTES_SHOW_HEADER) + strbuf_addstr(sb, "\nNotes:\n"); for (msg_p = msg; msg_p < msg + msglen; msg_p += linelen + 1) { linelen = strchrnul(msg_p, '\n') - msg_p; - strbuf_addstr(sb, " "); + if (flags & NOTES_INDENT) + strbuf_addstr(sb, " "); strbuf_add(sb, msg_p, linelen); strbuf_addch(sb, '\n'); } diff --git a/notes.h b/notes.h index 79d21b65f56d23..7f3eed4384208a 100644 --- a/notes.h +++ b/notes.h @@ -1,7 +1,10 @@ #ifndef NOTES_H #define NOTES_H +#define NOTES_SHOW_HEADER 1 +#define NOTES_INDENT 2 + void get_commit_notes(const struct commit *commit, struct strbuf *sb, - const char *output_encoding); + const char *output_encoding, int flags); #endif diff --git a/pretty.c b/pretty.c index e25db81eaa531b..01eadd0482f234 100644 --- a/pretty.c +++ b/pretty.c @@ -978,7 +978,8 @@ void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, strbuf_addch(sb, '\n'); if (fmt != CMIT_FMT_ONELINE) - get_commit_notes(commit, sb, encoding); + get_commit_notes(commit, sb, encoding, + NOTES_SHOW_HEADER | NOTES_INDENT); free(reencoded); } From 8b208f0213ed349ecb2ab8f7bb6c5072f8011a70 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 9 Oct 2009 12:22:05 +0200 Subject: [PATCH 09/14] Add '%N'-format for pretty-printing commit notes Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- Documentation/pretty-formats.txt | 1 + pretty.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt index 2a845b1e57590a..5fb10b3a1525da 100644 --- a/Documentation/pretty-formats.txt +++ b/Documentation/pretty-formats.txt @@ -123,6 +123,7 @@ The placeholders are: - '%s': subject - '%f': sanitized subject line, suitable for a filename - '%b': body +- '%N': commit notes - '%Cred': switch color to red - '%Cgreen': switch color to green - '%Cblue': switch color to blue diff --git a/pretty.c b/pretty.c index 01eadd0482f234..7f350bbdff0500 100644 --- a/pretty.c +++ b/pretty.c @@ -702,6 +702,10 @@ static size_t format_commit_item(struct strbuf *sb, const char *placeholder, case 'd': format_decoration(sb, commit); return 1; + case 'N': + get_commit_notes(commit, sb, git_log_output_encoding ? + git_log_output_encoding : git_commit_encoding, 0); + return 1; } /* For the rest we have to parse the commit header. */ From 27d57564102a98950bf4398daeeb14a15154478f Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:06 +0200 Subject: [PATCH 10/14] Teach notes code to free its internal data structures on request There's no need to be rude to memory-concious callers... This patch has been improved by the following contributions: - Junio C Hamano: avoid old-style declaration Signed-off-by: Junio C Hamano Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 7 +++++++ notes.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/notes.c b/notes.c index b7d79e19005f3c..a5d888d77250d7 100644 --- a/notes.c +++ b/notes.c @@ -105,6 +105,13 @@ static unsigned char *lookup_notes(const unsigned char *commit_sha1) return hash_map.entries[index].notes_sha1; } +void free_notes(void) +{ + free(hash_map.entries); + memset(&hash_map, 0, sizeof(struct hash_map)); + initialized = 0; +} + void get_commit_notes(const struct commit *commit, struct strbuf *sb, const char *output_encoding, int flags) { diff --git a/notes.h b/notes.h index 7f3eed4384208a..a1421e351ae0a6 100644 --- a/notes.h +++ b/notes.h @@ -1,6 +1,9 @@ #ifndef NOTES_H #define NOTES_H +/* Free (and de-initialize) the internal notes tree structure */ +void free_notes(void); + #define NOTES_SHOW_HEADER 1 #define NOTES_INDENT 2 From 23123aecf8418a6b0ec23378555ed78c438ae894 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:07 +0200 Subject: [PATCH 11/14] Teach the notes lookup code to parse notes trees with various fanout schemes The semantics used when parsing notes trees (with regards to fanout subtrees) follow Dscho's proposal fairly closely: - No concatenation/merging of notes is performed. If there are several notes objects referencing a given commit, only one of those objects are used. - If a notes object for a given commit is present in the "root" notes tree, no subtrees are consulted; the object in the root tree is used directly. - If there are more than one subtree that prefix-matches the given commit, only the subtree with the longest matching prefix is consulted. This means that if the given commit is e.g. "deadbeef", and the notes tree have subtrees "de" and "dead", then the following paths in the notes tree are searched: "deadbeef", "dead/beef". Note that "de/adbeef" is NOT searched. - Fanout directories (subtrees) must references a whole number of bytes from the SHA1 sum they subdivide. E.g. subtrees "dead" and "de" are acceptable; "d" and "dea" are not. - Multiple levels of fanout are allowed. All the above rules apply recursively. E.g. "de/adbeef" is preferred over "de/adbe/ef", etc. This patch changes the in-memory datastructure for holding parsed notes: Instead of holding all note (and subtree) entries in a hash table, a simple 16-tree structure is used instead. The tree structure consists of 16-arrays as internal nodes, and note/subtree entries as leaf nodes. The tree is traversed by indexing subsequent nibbles of the search key until a leaf node is encountered. If a subtree entry is encountered while searching for a note, the subtree is unpacked into the 16-tree structure, and the search continues into that subtree. The new algorithm performs significantly better in the cases where only a fraction of the notes need to be looked up (this is assumed to be the common case for notes lookup). The new code even performs marginally better in the worst case (where _all_ the notes are looked up). In addition to this, comes the massive performance win associated with organizing the notes tree according to some fanout scheme. Even a simple 2/38 fanout scheme is dramatically quicker to traverse (going from tens of seconds to sub-second runtimes). As for memory usage, the new code is marginally better than the old code in the worst case, but in the case of looking up only some notes from a notes tree with proper fanout, the new code uses only a small fraction of the memory needed to hold the entire notes tree. However, there is one casualty of this patch. The old notes lookup code was able to parse notes that were associated with non-SHA1s (e.g. refs). The new code requires the referenced object to be named by a SHA1 sum. Still, this is not considered a major setback, since the notes infrastructure was not originally intended to annotate objects outside the Git object database. Cc: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 317 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 248 insertions(+), 69 deletions(-) diff --git a/notes.c b/notes.c index a5d888d77250d7..210c4b2263810c 100644 --- a/notes.c +++ b/notes.c @@ -6,109 +6,288 @@ #include "strbuf.h" #include "tree-walk.h" -struct entry { - unsigned char commit_sha1[20]; - unsigned char notes_sha1[20]; +/* + * Use a non-balancing simple 16-tree structure with struct int_node as + * internal nodes, and struct leaf_node as leaf nodes. Each int_node has a + * 16-array of pointers to its children. + * The bottom 2 bits of each pointer is used to identify the pointer type + * - ptr & 3 == 0 - NULL pointer, assert(ptr == NULL) + * - ptr & 3 == 1 - pointer to next internal node - cast to struct int_node * + * - ptr & 3 == 2 - pointer to note entry - cast to struct leaf_node * + * - ptr & 3 == 3 - pointer to subtree entry - cast to struct leaf_node * + * + * The root node is a statically allocated struct int_node. + */ +struct int_node { + void *a[16]; }; -struct hash_map { - struct entry *entries; - off_t count, size; +/* + * Leaf nodes come in two variants, note entries and subtree entries, + * distinguished by the LSb of the leaf node pointer (see above). + * As a note entry, the key is the SHA1 of the referenced commit, and the + * value is the SHA1 of the note object. + * As a subtree entry, the key is the prefix SHA1 (w/trailing NULs) of the + * referenced commit, using the last byte of the key to store the length of + * the prefix. The value is the SHA1 of the tree object containing the notes + * subtree. + */ +struct leaf_node { + unsigned char key_sha1[20]; + unsigned char val_sha1[20]; }; -static int initialized; -static struct hash_map hash_map; +#define PTR_TYPE_NULL 0 +#define PTR_TYPE_INTERNAL 1 +#define PTR_TYPE_NOTE 2 +#define PTR_TYPE_SUBTREE 3 -static int hash_index(struct hash_map *map, const unsigned char *sha1) -{ - int i = ((*(unsigned int *)sha1) % map->size); +#define GET_PTR_TYPE(ptr) ((uintptr_t) (ptr) & 3) +#define CLR_PTR_TYPE(ptr) ((void *) ((uintptr_t) (ptr) & ~3)) +#define SET_PTR_TYPE(ptr, type) ((void *) ((uintptr_t) (ptr) | (type))) - for (;;) { - unsigned char *current = map->entries[i].commit_sha1; +#define GET_NIBBLE(n, sha1) (((sha1[n >> 1]) >> ((~n & 0x01) << 2)) & 0x0f) - if (!hashcmp(sha1, current)) - return i; +#define SUBTREE_SHA1_PREFIXCMP(key_sha1, subtree_sha1) \ + (memcmp(key_sha1, subtree_sha1, subtree_sha1[19])) - if (is_null_sha1(current)) - return -1 - i; +static struct int_node root_node; - if (++i == map->size) - i = 0; +static int initialized; + +static void load_subtree(struct leaf_node *subtree, struct int_node *node, + unsigned int n); + +/* + * To find a leaf_node: + * 1. Start at the root node, with n = 0 + * 2. Use the nth nibble of the key as an index into a: + * - If a[n] is an int_node, recurse into that node and increment n + * - If a leaf_node with matching key, return leaf_node (assert note entry) + * - If a matching subtree entry, unpack that subtree entry (and remove it); + * restart search at the current level. + * - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node. + * Backtrack out of the recursion, one level at a time and check a[0]: + * - If a[0] at the current level is a matching subtree entry, unpack that + * subtree entry (and remove it); restart search at the current level. + */ +static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, + const unsigned char *key_sha1) +{ + struct leaf_node *l; + unsigned char i = GET_NIBBLE(n, key_sha1); + void *p = tree->a[i]; + + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_INTERNAL: + l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1); + if (l) + return l; + break; + case PTR_TYPE_NOTE: + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!hashcmp(key_sha1, l->key_sha1)) + return l; /* return note object matching given key */ + break; + case PTR_TYPE_SUBTREE: + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + tree->a[i] = NULL; + load_subtree(l, tree, n); + free(l); + return note_tree_find(tree, n, key_sha1); + } + break; + case PTR_TYPE_NULL: + default: + assert(!p); + break; } + + /* + * Did not find key at this (or any lower) level. + * Check if there's a matching subtree entry in tree->a[0]. + * If so, unpack tree and resume search. + */ + p = tree->a[0]; + if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE) + return NULL; + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + tree->a[0] = NULL; + load_subtree(l, tree, n); + free(l); + return note_tree_find(tree, n, key_sha1); + } + return NULL; } -static void add_entry(const unsigned char *commit_sha1, - const unsigned char *notes_sha1) +/* + * To insert a leaf_node: + * 1. Start at the root node, with n = 0 + * 2. Use the nth nibble of the key as an index into a: + * - If a[n] is NULL, store the tweaked pointer directly into a[n] + * - If a[n] is an int_node, recurse into that node and increment n + * - If a[n] is a leaf_node: + * 1. Check if they're equal, and handle that (abort? overwrite?) + * 2. Create a new int_node, and store both leaf_nodes there + * 3. Store the new int_node into a[n]. + */ +static int note_tree_insert(struct int_node *tree, unsigned char n, + const struct leaf_node *entry, unsigned char type) { - int index; - - if (hash_map.count + 1 > hash_map.size >> 1) { - int i, old_size = hash_map.size; - struct entry *old = hash_map.entries; - - hash_map.size = old_size ? old_size << 1 : 64; - hash_map.entries = (struct entry *) - xcalloc(sizeof(struct entry), hash_map.size); - - for (i = 0; i < old_size; i++) - if (!is_null_sha1(old[i].commit_sha1)) { - index = -1 - hash_index(&hash_map, - old[i].commit_sha1); - memcpy(hash_map.entries + index, old + i, - sizeof(struct entry)); - } - free(old); + struct int_node *new_node; + const struct leaf_node *l; + int ret; + unsigned char i = GET_NIBBLE(n, entry->key_sha1); + void *p = tree->a[i]; + assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL); + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_NULL: + assert(!p); + tree->a[i] = SET_PTR_TYPE(entry, type); + return 0; + case PTR_TYPE_INTERNAL: + return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type); + default: + assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE || + GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE); + l = (const struct leaf_node *) CLR_PTR_TYPE(p); + if (!hashcmp(entry->key_sha1, l->key_sha1)) + return -1; /* abort insert on matching key */ + new_node = (struct int_node *) + xcalloc(sizeof(struct int_node), 1); + ret = note_tree_insert(new_node, n + 1, + CLR_PTR_TYPE(p), GET_PTR_TYPE(p)); + if (ret) { + free(new_node); + return -1; + } + tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); + return note_tree_insert(new_node, n + 1, entry, type); } +} - index = hash_index(&hash_map, commit_sha1); - if (index < 0) { - index = -1 - index; - hash_map.count++; +/* Free the entire notes data contained in the given tree */ +static void note_tree_free(struct int_node *tree) +{ + unsigned int i; + for (i = 0; i < 16; i++) { + void *p = tree->a[i]; + switch(GET_PTR_TYPE(p)) { + case PTR_TYPE_INTERNAL: + note_tree_free(CLR_PTR_TYPE(p)); + /* fall through */ + case PTR_TYPE_NOTE: + case PTR_TYPE_SUBTREE: + free(CLR_PTR_TYPE(p)); + } } +} - hashcpy(hash_map.entries[index].commit_sha1, commit_sha1); - hashcpy(hash_map.entries[index].notes_sha1, notes_sha1); +/* + * Convert a partial SHA1 hex string to the corresponding partial SHA1 value. + * - hex - Partial SHA1 segment in ASCII hex format + * - hex_len - Length of above segment. Must be multiple of 2 between 0 and 40 + * - sha1 - Partial SHA1 value is written here + * - sha1_len - Max #bytes to store in sha1, Must be >= hex_len / 2, and < 20 + * Returns -1 on error (invalid arguments or invalid SHA1 (not in hex format). + * Otherwise, returns number of bytes written to sha1 (i.e. hex_len / 2). + * Pads sha1 with NULs up to sha1_len (not included in returned length). + */ +static int get_sha1_hex_segment(const char *hex, unsigned int hex_len, + unsigned char *sha1, unsigned int sha1_len) +{ + unsigned int i, len = hex_len >> 1; + if (hex_len % 2 != 0 || len > sha1_len) + return -1; + for (i = 0; i < len; i++) { + unsigned int val = (hexval(hex[0]) << 4) | hexval(hex[1]); + if (val & ~0xff) + return -1; + *sha1++ = val; + hex += 2; + } + for (; i < sha1_len; i++) + *sha1++ = 0; + return len; } -static void initialize_hash_map(const char *notes_ref_name) +static void load_subtree(struct leaf_node *subtree, struct int_node *node, + unsigned int n) { - unsigned char sha1[20], commit_sha1[20]; - unsigned mode; + unsigned char commit_sha1[20]; + unsigned int prefix_len; + int status; + void *buf; struct tree_desc desc; struct name_entry entry; - void *buf; + + buf = fill_tree_descriptor(&desc, subtree->val_sha1); + if (!buf) + die("Could not read %s for notes-index", + sha1_to_hex(subtree->val_sha1)); + + prefix_len = subtree->key_sha1[19]; + assert(prefix_len * 2 >= n); + memcpy(commit_sha1, subtree->key_sha1, prefix_len); + while (tree_entry(&desc, &entry)) { + int len = get_sha1_hex_segment(entry.path, strlen(entry.path), + commit_sha1 + prefix_len, 20 - prefix_len); + if (len < 0) + continue; /* entry.path is not a SHA1 sum. Skip */ + len += prefix_len; + + /* + * If commit SHA1 is complete (len == 20), assume note object + * If commit SHA1 is incomplete (len < 20), assume note subtree + */ + if (len <= 20) { + unsigned char type = PTR_TYPE_NOTE; + struct leaf_node *l = (struct leaf_node *) + xcalloc(sizeof(struct leaf_node), 1); + hashcpy(l->key_sha1, commit_sha1); + hashcpy(l->val_sha1, entry.sha1); + if (len < 20) { + l->key_sha1[19] = (unsigned char) len; + type = PTR_TYPE_SUBTREE; + } + status = note_tree_insert(node, n, l, type); + assert(!status); + } + } + free(buf); +} + +static void initialize_notes(const char *notes_ref_name) +{ + unsigned char sha1[20], commit_sha1[20]; + unsigned mode; + struct leaf_node root_tree; if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) || get_tree_entry(commit_sha1, "", sha1, &mode)) return; - buf = fill_tree_descriptor(&desc, sha1); - if (!buf) - die("Could not read %s for notes-index", sha1_to_hex(sha1)); - - while (tree_entry(&desc, &entry)) - if (!get_sha1(entry.path, commit_sha1)) - add_entry(commit_sha1, entry.sha1); - free(buf); + hashclr(root_tree.key_sha1); + hashcpy(root_tree.val_sha1, sha1); + load_subtree(&root_tree, &root_node, 0); } static unsigned char *lookup_notes(const unsigned char *commit_sha1) { - int index; - - if (!hash_map.size) - return NULL; - - index = hash_index(&hash_map, commit_sha1); - if (index < 0) - return NULL; - return hash_map.entries[index].notes_sha1; + struct leaf_node *found = note_tree_find(&root_node, 0, commit_sha1); + if (found) + return found->val_sha1; + return NULL; } void free_notes(void) { - free(hash_map.entries); - memset(&hash_map, 0, sizeof(struct hash_map)); + note_tree_free(&root_node); + memset(&root_node, 0, sizeof(struct int_node)); initialized = 0; } @@ -127,7 +306,7 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb, notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT); else if (!notes_ref_name) notes_ref_name = GIT_NOTES_DEFAULT_REF; - initialize_hash_map(notes_ref_name); + initialize_notes(notes_ref_name); initialized = 1; } From 0057c0917d35f9f21e01f2122e7f2b9f169a8b02 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:08 +0200 Subject: [PATCH 12/14] Add selftests verifying that we can parse notes trees with various fanouts Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- t/t3303-notes-subtrees.sh | 104 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100755 t/t3303-notes-subtrees.sh diff --git a/t/t3303-notes-subtrees.sh b/t/t3303-notes-subtrees.sh new file mode 100755 index 00000000000000..cbb9d35909565d --- /dev/null +++ b/t/t3303-notes-subtrees.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +test_description='Test commit notes organized in subtrees' + +. ./test-lib.sh + +number_of_commits=100 + +start_note_commit () { + test_tick && + cat < $GIT_COMMITTER_DATE +data < output && + i=$number_of_commits && + while [ $i -gt 0 ]; do + echo " commit #$i" && + echo " note for commit #$i" && + i=$(($i-1)); + done > expect && + test_cmp expect output +} + +test_expect_success "setup: create $number_of_commits commits" ' + + ( + nr=0 && + while [ $nr -lt $number_of_commits ]; do + nr=$(($nr+1)) && + test_tick && + cat < $GIT_COMMITTER_DATE +data < $GIT_COMMITTER_DATE +data < Date: Fri, 9 Oct 2009 12:22:09 +0200 Subject: [PATCH 13/14] Refactor notes code to concatenate multiple notes annotating the same object Currently, having multiple notes referring to the same commit from various locations in the notes tree is strongly discouraged, since only one of those notes will be parsed and shown. This patch teaches the notes code to _concatenate_ multiple notes that annotate the same commit. Notes are concatenated by creating a new blob object containing the concatenation of the notes in question, and replacing them with the concatenated note in the internal notes tree structure. Getting the concatenation right requires being more proactive in unpacking subtree entries in the internal notes tree structure, so that we don't return a note prematurely (i.e. before having found all other notes that annotate the same object). As such, this patch may incur a small performance penalty. Suggested-by: Sam Vilain Re-suggested-by: Johannes Schindelin Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes.c | 243 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 161 insertions(+), 82 deletions(-) diff --git a/notes.c b/notes.c index 210c4b2263810c..50a4672d7c68d8 100644 --- a/notes.c +++ b/notes.c @@ -59,115 +59,196 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, unsigned int n); /* - * To find a leaf_node: + * Search the tree until the appropriate location for the given key is found: * 1. Start at the root node, with n = 0 - * 2. Use the nth nibble of the key as an index into a: - * - If a[n] is an int_node, recurse into that node and increment n - * - If a leaf_node with matching key, return leaf_node (assert note entry) + * 2. If a[0] at the current level is a matching subtree entry, unpack that + * subtree entry and remove it; restart search at the current level. + * 3. Use the nth nibble of the key as an index into a: + * - If a[n] is an int_node, recurse from #2 into that node and increment n * - If a matching subtree entry, unpack that subtree entry (and remove it); * restart search at the current level. - * - Otherwise, we end up at a NULL pointer, or a non-matching leaf_node. - * Backtrack out of the recursion, one level at a time and check a[0]: - * - If a[0] at the current level is a matching subtree entry, unpack that - * subtree entry (and remove it); restart search at the current level. + * - Otherwise, we have found one of the following: + * - a subtree entry which does not match the key + * - a note entry which may or may not match the key + * - an unused leaf node (NULL) + * In any case, set *tree and *n, and return pointer to the tree location. */ -static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, - const unsigned char *key_sha1) +static void **note_tree_search(struct int_node **tree, + unsigned char *n, const unsigned char *key_sha1) { struct leaf_node *l; - unsigned char i = GET_NIBBLE(n, key_sha1); - void *p = tree->a[i]; + unsigned char i; + void *p = (*tree)->a[0]; + if (GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE) { + l = (struct leaf_node *) CLR_PTR_TYPE(p); + if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { + /* unpack tree and resume search */ + (*tree)->a[0] = NULL; + load_subtree(l, *tree, *n); + free(l); + return note_tree_search(tree, n, key_sha1); + } + } + + i = GET_NIBBLE(*n, key_sha1); + p = (*tree)->a[i]; switch(GET_PTR_TYPE(p)) { case PTR_TYPE_INTERNAL: - l = note_tree_find(CLR_PTR_TYPE(p), n + 1, key_sha1); - if (l) - return l; - break; - case PTR_TYPE_NOTE: - l = (struct leaf_node *) CLR_PTR_TYPE(p); - if (!hashcmp(key_sha1, l->key_sha1)) - return l; /* return note object matching given key */ - break; + *tree = CLR_PTR_TYPE(p); + (*n)++; + return note_tree_search(tree, n, key_sha1); case PTR_TYPE_SUBTREE: l = (struct leaf_node *) CLR_PTR_TYPE(p); if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { /* unpack tree and resume search */ - tree->a[i] = NULL; - load_subtree(l, tree, n); + (*tree)->a[i] = NULL; + load_subtree(l, *tree, *n); free(l); - return note_tree_find(tree, n, key_sha1); + return note_tree_search(tree, n, key_sha1); } - break; - case PTR_TYPE_NULL: + /* fall through */ default: - assert(!p); - break; + return &((*tree)->a[i]); } +} - /* - * Did not find key at this (or any lower) level. - * Check if there's a matching subtree entry in tree->a[0]. - * If so, unpack tree and resume search. - */ - p = tree->a[0]; - if (GET_PTR_TYPE(p) != PTR_TYPE_SUBTREE) - return NULL; - l = (struct leaf_node *) CLR_PTR_TYPE(p); - if (!SUBTREE_SHA1_PREFIXCMP(key_sha1, l->key_sha1)) { - /* unpack tree and resume search */ - tree->a[0] = NULL; - load_subtree(l, tree, n); - free(l); - return note_tree_find(tree, n, key_sha1); +/* + * To find a leaf_node: + * Search to the tree location appropriate for the given key: + * If a note entry with matching key, return the note entry, else return NULL. + */ +static struct leaf_node *note_tree_find(struct int_node *tree, unsigned char n, + const unsigned char *key_sha1) +{ + void **p = note_tree_search(&tree, &n, key_sha1); + if (GET_PTR_TYPE(*p) == PTR_TYPE_NOTE) { + struct leaf_node *l = (struct leaf_node *) CLR_PTR_TYPE(*p); + if (!hashcmp(key_sha1, l->key_sha1)) + return l; } return NULL; } +/* Create a new blob object by concatenating the two given blob objects */ +static int concatenate_notes(unsigned char *cur_sha1, + const unsigned char *new_sha1) +{ + char *cur_msg, *new_msg, *buf; + unsigned long cur_len, new_len, buf_len; + enum object_type cur_type, new_type; + int ret; + + /* read in both note blob objects */ + new_msg = read_sha1_file(new_sha1, &new_type, &new_len); + if (!new_msg || !new_len || new_type != OBJ_BLOB) { + free(new_msg); + return 0; + } + cur_msg = read_sha1_file(cur_sha1, &cur_type, &cur_len); + if (!cur_msg || !cur_len || cur_type != OBJ_BLOB) { + free(cur_msg); + free(new_msg); + hashcpy(cur_sha1, new_sha1); + return 0; + } + + /* we will separate the notes by a newline anyway */ + if (cur_msg[cur_len - 1] == '\n') + cur_len--; + + /* concatenate cur_msg and new_msg into buf */ + buf_len = cur_len + 1 + new_len; + buf = (char *) xmalloc(buf_len); + memcpy(buf, cur_msg, cur_len); + buf[cur_len] = '\n'; + memcpy(buf + cur_len + 1, new_msg, new_len); + + free(cur_msg); + free(new_msg); + + /* create a new blob object from buf */ + ret = write_sha1_file(buf, buf_len, "blob", cur_sha1); + free(buf); + return ret; +} + /* * To insert a leaf_node: - * 1. Start at the root node, with n = 0 - * 2. Use the nth nibble of the key as an index into a: - * - If a[n] is NULL, store the tweaked pointer directly into a[n] - * - If a[n] is an int_node, recurse into that node and increment n - * - If a[n] is a leaf_node: - * 1. Check if they're equal, and handle that (abort? overwrite?) - * 2. Create a new int_node, and store both leaf_nodes there - * 3. Store the new int_node into a[n]. + * Search to the tree location appropriate for the given leaf_node's key: + * - If location is unused (NULL), store the tweaked pointer directly there + * - If location holds a note entry that matches the note-to-be-inserted, then + * concatenate the two notes. + * - If location holds a note entry that matches the subtree-to-be-inserted, + * then unpack the subtree-to-be-inserted into the location. + * - If location holds a matching subtree entry, unpack the subtree at that + * location, and restart the insert operation from that level. + * - Else, create a new int_node, holding both the node-at-location and the + * node-to-be-inserted, and store the new int_node into the location. */ -static int note_tree_insert(struct int_node *tree, unsigned char n, - const struct leaf_node *entry, unsigned char type) +static void note_tree_insert(struct int_node *tree, unsigned char n, + struct leaf_node *entry, unsigned char type) { struct int_node *new_node; - const struct leaf_node *l; - int ret; - unsigned char i = GET_NIBBLE(n, entry->key_sha1); - void *p = tree->a[i]; - assert(GET_PTR_TYPE(entry) == PTR_TYPE_NULL); - switch(GET_PTR_TYPE(p)) { + struct leaf_node *l; + void **p = note_tree_search(&tree, &n, entry->key_sha1); + + assert(GET_PTR_TYPE(entry) == 0); /* no type bits set */ + l = (struct leaf_node *) CLR_PTR_TYPE(*p); + switch(GET_PTR_TYPE(*p)) { case PTR_TYPE_NULL: - assert(!p); - tree->a[i] = SET_PTR_TYPE(entry, type); - return 0; - case PTR_TYPE_INTERNAL: - return note_tree_insert(CLR_PTR_TYPE(p), n + 1, entry, type); - default: - assert(GET_PTR_TYPE(p) == PTR_TYPE_NOTE || - GET_PTR_TYPE(p) == PTR_TYPE_SUBTREE); - l = (const struct leaf_node *) CLR_PTR_TYPE(p); - if (!hashcmp(entry->key_sha1, l->key_sha1)) - return -1; /* abort insert on matching key */ - new_node = (struct int_node *) - xcalloc(sizeof(struct int_node), 1); - ret = note_tree_insert(new_node, n + 1, - CLR_PTR_TYPE(p), GET_PTR_TYPE(p)); - if (ret) { - free(new_node); - return -1; + assert(!*p); + *p = SET_PTR_TYPE(entry, type); + return; + case PTR_TYPE_NOTE: + switch (type) { + case PTR_TYPE_NOTE: + if (!hashcmp(l->key_sha1, entry->key_sha1)) { + /* skip concatenation if l == entry */ + if (!hashcmp(l->val_sha1, entry->val_sha1)) + return; + + if (concatenate_notes(l->val_sha1, + entry->val_sha1)) + die("failed to concatenate note %s " + "into note %s for commit %s", + sha1_to_hex(entry->val_sha1), + sha1_to_hex(l->val_sha1), + sha1_to_hex(l->key_sha1)); + free(entry); + return; + } + break; + case PTR_TYPE_SUBTREE: + if (!SUBTREE_SHA1_PREFIXCMP(l->key_sha1, + entry->key_sha1)) { + /* unpack 'entry' */ + load_subtree(entry, tree, n); + free(entry); + return; + } + break; + } + break; + case PTR_TYPE_SUBTREE: + if (!SUBTREE_SHA1_PREFIXCMP(entry->key_sha1, l->key_sha1)) { + /* unpack 'l' and restart insert */ + *p = NULL; + load_subtree(l, tree, n); + free(l); + note_tree_insert(tree, n, entry, type); + return; } - tree->a[i] = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); - return note_tree_insert(new_node, n + 1, entry, type); + break; } + + /* non-matching leaf_node */ + assert(GET_PTR_TYPE(*p) == PTR_TYPE_NOTE || + GET_PTR_TYPE(*p) == PTR_TYPE_SUBTREE); + new_node = (struct int_node *) xcalloc(sizeof(struct int_node), 1); + note_tree_insert(new_node, n + 1, l, GET_PTR_TYPE(*p)); + *p = SET_PTR_TYPE(new_node, PTR_TYPE_INTERNAL); + note_tree_insert(new_node, n + 1, entry, type); } /* Free the entire notes data contained in the given tree */ @@ -220,7 +301,6 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, { unsigned char commit_sha1[20]; unsigned int prefix_len; - int status; void *buf; struct tree_desc desc; struct name_entry entry; @@ -254,8 +334,7 @@ static void load_subtree(struct leaf_node *subtree, struct int_node *node, l->key_sha1[19] = (unsigned char) len; type = PTR_TYPE_SUBTREE; } - status = note_tree_insert(node, n, l, type); - assert(!status); + note_tree_insert(node, n, l, type); } } free(buf); From a099469bbcf273e76d81573229971956b4ef0700 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Fri, 9 Oct 2009 12:22:10 +0200 Subject: [PATCH 14/14] Add selftests verifying concatenation of multiple notes for the same commit Also verify that multiple references to the _same_ note blob are _not_ concatenated. Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- t/t3303-notes-subtrees.sh | 84 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/t/t3303-notes-subtrees.sh b/t/t3303-notes-subtrees.sh index cbb9d35909565d..edc4bc884147f2 100755 --- a/t/t3303-notes-subtrees.sh +++ b/t/t3303-notes-subtrees.sh @@ -101,4 +101,88 @@ test_expect_success 'verify notes in 4/36-fanout' 'verify_notes' test_expect_success 'test notes in 2/2/36-fanout' 'test_sha1_based "s|^\(..\)\(..\)|\1/\2/|"' test_expect_success 'verify notes in 2/2/36-fanout' 'verify_notes' +test_same_notes () { + ( + start_note_commit && + nr=$number_of_commits && + git rev-list refs/heads/master | + while read sha1; do + first_note_path=$(echo "$sha1" | sed "$1") + second_note_path=$(echo "$sha1" | sed "$2") + cat < output && + i=$number_of_commits && + while [ $i -gt 0 ]; do + echo " commit #$i" && + echo " first note for commit #$i" && + echo " second note for commit #$i" && + i=$(($i-1)); + done > expect && + test_cmp expect output +} + +test_expect_success 'test notes in 4/36-fanout concatenated with 2/38-fanout' 'test_concatenated_notes "s|^..|&/|" "s|^....|&/|"' +test_expect_success 'verify notes in 4/36-fanout concatenated with 2/38-fanout' 'verify_concatenated_notes' + +test_expect_success 'test notes in 2/38-fanout concatenated with 2/2/36-fanout' 'test_concatenated_notes "s|^\(..\)\(..\)|\1/\2/|" "s|^..|&/|"' +test_expect_success 'verify notes in 2/38-fanout concatenated with 2/2/36-fanout' 'verify_concatenated_notes' + +test_expect_success 'test notes in 4/36-fanout concatenated with 2/2/36-fanout' 'test_concatenated_notes "s|^\(..\)\(..\)|\1/\2/|" "s|^....|&/|"' +test_expect_success 'verify notes in 4/36-fanout concatenated with 2/2/36-fanout' 'verify_concatenated_notes' + test_done