Skip to content

Commit

Permalink
Merge branch 'jk/prune-mtime'
Browse files Browse the repository at this point in the history
Tighten the logic to decide that an unreachable cruft is
sufficiently old by covering corner cases such as an ancient object
becoming reachable and then going unreachable again, in which case
its retention period should be prolonged.

* jk/prune-mtime: (28 commits)
  drop add_object_array_with_mode
  revision: remove definition of unused 'add_object' function
  pack-objects: double-check options before discarding objects
  repack: pack objects mentioned by the index
  pack-objects: use argv_array
  reachable: use revision machinery's --indexed-objects code
  rev-list: add --indexed-objects option
  rev-list: document --reflog option
  t5516: test pushing a tag of an otherwise unreferenced blob
  traverse_commit_list: support pending blobs/trees with paths
  make add_object_array_with_context interface more sane
  write_sha1_file: freshen existing objects
  pack-objects: match prune logic for discarding objects
  pack-objects: refactor unpack-unreachable expiration check
  prune: keep objects reachable from recent objects
  sha1_file: add for_each iterators for loose and packed objects
  count-objects: use for_each_loose_file_in_objdir
  count-objects: do not use xsize_t when counting object size
  prune-packed: use for_each_loose_file_in_objdir
  reachable: mark index blobs as SEEN
  ...
  • Loading branch information
gitster committed Oct 29, 2014
2 parents 853878d + 189a122 commit d70e331
Show file tree
Hide file tree
Showing 23 changed files with 836 additions and 436 deletions.
9 changes: 9 additions & 0 deletions Documentation/rev-list-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,15 @@ respectively, and they must begin with `refs/` when applied to `--glob`
or `--all`. If a trailing '/{asterisk}' is intended, it must be given
explicitly.

--reflog::
Pretend as if all objects mentioned by reflogs are listed on the
command line as `<commit>`.

--indexed-objects::
Pretend as if all trees and blobs used by the index are listed
on the command line. Note that you probably want to use
`--objects`, too.

--ignore-missing::
Upon seeing an invalid object name in the input, pretend as if
the bad input was not given.
Expand Down
101 changes: 30 additions & 71 deletions builtin/count-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

static unsigned long garbage;
static off_t size_garbage;
static int verbose;
static unsigned long loose, packed, packed_loose;
static off_t loose_size;

static void real_report_garbage(const char *desc, const char *path)
{
Expand All @@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path)
garbage++;
}

static void count_objects(DIR *d, char *path, int len, int verbose,
unsigned long *loose,
off_t *loose_size,
unsigned long *packed_loose)
static void loose_garbage(const char *path)
{
struct dirent *ent;
while ((ent = readdir(d)) != NULL) {
char hex[41];
unsigned char sha1[20];
const char *cp;
int bad = 0;
if (verbose)
report_garbage("garbage found", path);
}

if (is_dot_or_dotdot(ent->d_name))
continue;
for (cp = ent->d_name; *cp; cp++) {
int ch = *cp;
if (('0' <= ch && ch <= '9') ||
('a' <= ch && ch <= 'f'))
continue;
bad = 1;
break;
}
if (cp - ent->d_name != 38)
bad = 1;
else {
struct stat st;
memcpy(path + len + 3, ent->d_name, 38);
path[len + 2] = '/';
path[len + 41] = 0;
if (lstat(path, &st) || !S_ISREG(st.st_mode))
bad = 1;
else
(*loose_size) += xsize_t(on_disk_bytes(st));
}
if (bad) {
if (verbose) {
struct strbuf sb = STRBUF_INIT;
strbuf_addf(&sb, "%.*s/%s",
len + 2, path, ent->d_name);
report_garbage("garbage found", sb.buf);
strbuf_release(&sb);
}
continue;
}
(*loose)++;
if (!verbose)
continue;
memcpy(hex, path+len, 2);
memcpy(hex+2, ent->d_name, 38);
hex[40] = 0;
if (get_sha1_hex(hex, sha1))
die("internal error");
if (has_sha1_pack(sha1))
(*packed_loose)++;
static int count_loose(const unsigned char *sha1, const char *path, void *data)
{
struct stat st;

if (lstat(path, &st) || !S_ISREG(st.st_mode))
loose_garbage(path);
else {
loose_size += on_disk_bytes(st);
loose++;
if (verbose && has_sha1_pack(sha1))
packed_loose++;
}
return 0;
}

static int count_cruft(const char *basename, const char *path, void *data)
{
loose_garbage(path);
return 0;
}

static char const * const count_objects_usage[] = {
Expand All @@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = {

int cmd_count_objects(int argc, const char **argv, const char *prefix)
{
int i, verbose = 0, human_readable = 0;
const char *objdir = get_object_directory();
int len = strlen(objdir);
char *path = xmalloc(len + 50);
unsigned long loose = 0, packed = 0, packed_loose = 0;
off_t loose_size = 0;
int human_readable = 0;
struct option opts[] = {
OPT__VERBOSE(&verbose, N_("be verbose")),
OPT_BOOL('H', "human-readable", &human_readable,
Expand All @@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
usage_with_options(count_objects_usage, opts);
if (verbose)
report_garbage = real_report_garbage;
memcpy(path, objdir, len);
if (len && objdir[len-1] != '/')
path[len++] = '/';
for (i = 0; i < 256; i++) {
DIR *d;
sprintf(path + len, "%02x", i);
d = opendir(path);
if (!d)
continue;
count_objects(d, path, len, verbose,
&loose, &loose_size, &packed_loose);
closedir(d);
}

for_each_loose_file_in_objdir(get_object_directory(),
count_loose, count_cruft, NULL, NULL);

if (verbose) {
struct packed_git *p;
unsigned long num_pack = 0;
Expand Down
8 changes: 4 additions & 4 deletions builtin/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
}

static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
struct object *obj, const char *name, struct object_context *oc)
struct object *obj, const char *name, const char *path)
{
if (obj->type == OBJ_BLOB)
return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL);
return grep_sha1(opt, obj->sha1, name, 0, path);
if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
struct tree_desc tree;
void *data;
Expand Down Expand Up @@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
for (i = 0; i < nr; i++) {
struct object *real_obj;
real_obj = deref_tag(list->objects[i].item, NULL, 0);
if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) {
if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) {
hit = 1;
if (opt->status_only)
break;
Expand Down Expand Up @@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object *object = parse_object_or_die(sha1, arg);
if (!seen_dashdash)
verify_non_filename(prefix, arg);
add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context)));
add_object_array_with_path(object, arg, &list, oc.mode, oc.path);
continue;
}
if (!strcmp(arg, "--")) {
Expand Down
86 changes: 71 additions & 15 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include "streaming.h"
#include "thread-utils.h"
#include "pack-bitmap.h"
#include "reachable.h"
#include "sha1-array.h"
#include "argv-array.h"

static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
Expand Down Expand Up @@ -2406,6 +2409,27 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
return 0;
}

/*
* Store a list of sha1s that are should not be discarded
* because they are either written too recently, or are
* reachable from another object that was.
*
* This is filled by get_object_list.
*/
static struct sha1_array recent_objects;

static int loosened_object_can_be_discarded(const unsigned char *sha1,
unsigned long mtime)
{
if (!unpack_unreachable_expiration)
return 0;
if (mtime > unpack_unreachable_expiration)
return 0;
if (sha1_array_lookup(&recent_objects, sha1) >= 0)
return 0;
return 1;
}

static void loosen_unused_packed_objects(struct rev_info *revs)
{
struct packed_git *p;
Expand All @@ -2416,17 +2440,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs)
if (!p->pack_local || p->pack_keep)
continue;

if (unpack_unreachable_expiration &&
p->mtime < unpack_unreachable_expiration)
continue;

if (open_pack_index(p))
die("cannot open pack index");

for (i = 0; i < p->num_objects; i++) {
sha1 = nth_packed_object_sha1(p, i);
if (!packlist_find(&to_pack, sha1, NULL) &&
!has_sha1_pack_kept_or_nonlocal(sha1))
!has_sha1_pack_kept_or_nonlocal(sha1) &&
!loosened_object_can_be_discarded(sha1, p->mtime))
if (force_object_loose(sha1, p->mtime))
die("unable to force loose object");
}
Expand Down Expand Up @@ -2462,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
return 0;
}

static void record_recent_object(struct object *obj,
const struct name_path *path,
const char *last,
void *data)
{
sha1_array_append(&recent_objects, obj->sha1);
}

static void record_recent_commit(struct commit *commit, void *data)
{
sha1_array_append(&recent_objects, commit->object.sha1);
}

static void get_object_list(int ac, const char **av)
{
struct rev_info revs;
Expand Down Expand Up @@ -2509,10 +2543,23 @@ static void get_object_list(int ac, const char **av)
mark_edges_uninteresting(&revs, show_edge);
traverse_commit_list(&revs, show_commit, show_object, NULL);

if (unpack_unreachable_expiration) {
revs.ignore_missing_links = 1;
if (add_unseen_recent_objects_to_traversal(&revs,
unpack_unreachable_expiration))
die("unable to add recent objects");
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
traverse_commit_list(&revs, record_recent_commit,
record_recent_object, NULL);
}

if (keep_unreachable)
add_objects_in_unpacked_packs(&revs);
if (unpack_unreachable)
loosen_unused_packed_objects(&revs);

sha1_array_clear(&recent_objects);
}

static int option_parse_index_version(const struct option *opt,
Expand Down Expand Up @@ -2567,9 +2614,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
int use_internal_rev_list = 0;
int thin = 0;
int all_progress_implied = 0;
const char *rp_av[6];
int rp_ac = 0;
struct argv_array rp = ARGV_ARRAY_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
int rev_list_index = 0;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
N_("do not show progress meter"), 0),
Expand Down Expand Up @@ -2616,6 +2663,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{ OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
N_("include objects referred by reflog entries"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
{ OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
N_("include objects referred to by the index"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag,
Expand Down Expand Up @@ -2658,24 +2708,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name || argc)
usage_with_options(pack_usage, pack_objects_options);

rp_av[rp_ac++] = "pack-objects";
argv_array_push(&rp, "pack-objects");
if (thin) {
use_internal_rev_list = 1;
rp_av[rp_ac++] = "--objects-edge";
argv_array_push(&rp, "--objects-edge");
} else
rp_av[rp_ac++] = "--objects";
argv_array_push(&rp, "--objects");

if (rev_list_all) {
use_internal_rev_list = 1;
rp_av[rp_ac++] = "--all";
argv_array_push(&rp, "--all");
}
if (rev_list_reflog) {
use_internal_rev_list = 1;
rp_av[rp_ac++] = "--reflog";
argv_array_push(&rp, "--reflog");
}
if (rev_list_index) {
use_internal_rev_list = 1;
argv_array_push(&rp, "--indexed-objects");
}
if (rev_list_unpacked) {
use_internal_rev_list = 1;
rp_av[rp_ac++] = "--unpacked";
argv_array_push(&rp, "--unpacked");
}

if (!reuse_object)
Expand Down Expand Up @@ -2706,6 +2760,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)

if (keep_unreachable && unpack_unreachable)
die("--keep-unreachable and --unpack-unreachable are incompatible.");
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
unpack_unreachable_expiration = 0;

if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
use_bitmap_index = 0;
Expand All @@ -2723,8 +2779,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
rp_av[rp_ac] = NULL;
get_object_list(rp_ac, rp_av);
get_object_list(rp.argc, rp.argv);
argv_array_clear(&rp);
}
cleanup_preferred_base();
if (include_tag && nr_result)
Expand Down
Loading

0 comments on commit d70e331

Please sign in to comment.