diff --git a/internal/packindex/content_id_to_bytes.go b/internal/packindex/content_id_to_bytes.go index bc123a2a6..731f3b57d 100644 --- a/internal/packindex/content_id_to_bytes.go +++ b/internal/packindex/content_id_to_bytes.go @@ -21,14 +21,15 @@ func bytesToContentID(b []byte) string { func contentIDToBytes(c string) []byte { var prefix []byte + var skip int if len(c)%2 == 1 { prefix = []byte(c[0:1]) - c = c[1:] + skip = 1 } else { prefix = []byte{0} } - b, err := hex.DecodeString(c) + b, err := hex.DecodeString(c[skip:]) if err != nil { return append([]byte{0xff}, []byte(c)...) } diff --git a/internal/packindex/index.go b/internal/packindex/index.go index ab31e7d19..1ea4c2cb2 100644 --- a/internal/packindex/index.go +++ b/internal/packindex/index.go @@ -7,21 +7,18 @@ "io" "sort" "strings" - "sync" ) // Index is a read-only index of packed blocks. type Index interface { io.Closer - EntryCount() int GetInfo(blockID string) (*Info, error) Iterate(prefix string, cb func(Info) error) error } type index struct { hdr headerInfo - mu sync.Mutex readerAt io.ReaderAt } @@ -42,25 +39,23 @@ func readHeader(readerAt io.ReaderAt) (headerInfo, error) { return headerInfo{}, fmt.Errorf("invalid header format: %v", header[0]) } - return headerInfo{ + hi := headerInfo{ keySize: int(header[1]), valueSize: int(binary.BigEndian.Uint16(header[2:4])), entryCount: int(binary.BigEndian.Uint32(header[4:8])), - }, nil -} + } -// EntryCount returns the number of block entries in an index. -func (b *index) EntryCount() int { - return b.hdr.entryCount + if hi.keySize <= 1 || hi.valueSize < 0 || hi.entryCount < 0 { + return headerInfo{}, fmt.Errorf("invalid header") + } + + return hi, nil } // Iterate invokes the provided callback function for all blocks in the index, sorted alphabetically. // The iteration ends when the callback returns an error, which is propagated to the caller or when // all blocks have been visited. func (b *index) Iterate(prefix string, cb func(Info) error) error { - b.mu.Lock() - defer b.mu.Unlock() - startPos, err := b.findEntryPosition(prefix) if err != nil { return fmt.Errorf("could not find starting position: %v", err) @@ -139,9 +134,6 @@ func (b *index) findEntry(blockID string) ([]byte, error) { // GetInfo returns information about a given block. If a block is not found, nil is returned. func (b *index) GetInfo(blockID string) (*Info, error) { - b.mu.Lock() - defer b.mu.Unlock() - e, err := b.findEntry(blockID) if err != nil { return nil, err diff --git a/internal/packindex/merged.go b/internal/packindex/merged.go index a1aad2104..76e637972 100644 --- a/internal/packindex/merged.go +++ b/internal/packindex/merged.go @@ -19,15 +19,6 @@ func (m Merged) Close() error { return nil } -// EntryCount returns the cumulative number of entries in all underlying indexes (not necessarily the number of unique block IDs). -func (m Merged) EntryCount() int { - cnt := 0 - for _, ndx := range m { - cnt += ndx.EntryCount() - } - return cnt -} - // GetInfo returns information about a single block. If a block is not found, returns (nil,nil) func (m Merged) GetInfo(contentID string) (*Info, error) { var best *Info diff --git a/internal/packindex/merged_test.go b/internal/packindex/merged_test.go index f5a31e252..8ff02abe0 100644 --- a/internal/packindex/merged_test.go +++ b/internal/packindex/merged_test.go @@ -75,6 +75,10 @@ func TestMerged(t *testing.T) { if !reflect.DeepEqual(inOrder, expectedInOrder) { t.Errorf("unexpected items in order: %v, wanted %v", inOrder, expectedInOrder) } + + if err := m.Close(); err != nil { + t.Errorf("unexpected error in Close(): %v", err) + } } func indexWithItems(items ...packindex.Info) (packindex.Index, error) { diff --git a/internal/packindex/packindex_internal_test.go b/internal/packindex/packindex_internal_test.go new file mode 100644 index 000000000..a80661e82 --- /dev/null +++ b/internal/packindex/packindex_internal_test.go @@ -0,0 +1,26 @@ +package packindex + +import "testing" + +func TestRoundTrip(t *testing.T) { + cases := []string{ + "", + "x", + "aa", + "xaa", + "xaaa", + "a1x", + } + + for _, tc := range cases { + b := contentIDToBytes(tc) + got := bytesToContentID(b) + if got != tc { + t.Errorf("%q did not round trip, got %q, wanted %q", tc, got, tc) + } + } + + if got, want := bytesToContentID(nil), ""; got != want { + t.Errorf("unexpected content id %v, want %v", got, want) + } +} diff --git a/internal/packindex/packindex_test.go b/internal/packindex/packindex_test.go index b4e3b7f94..821f5d08f 100644 --- a/internal/packindex/packindex_test.go +++ b/internal/packindex/packindex_test.go @@ -119,10 +119,16 @@ func TestPackIndex(t *testing.T) { t.Errorf("builder output not stable: %x vs %x", hex.Dump(data2), hex.Dump(data3)) } + t.Run("FuzzTest", func(t *testing.T) { + fuzzTestIndexOpen(t, data1) + }) + ndx, err := packindex.Open(bytes.NewReader(data1)) if err != nil { t.Fatalf("can't open index: %v", err) } + defer ndx.Close() + for _, info := range infos { info2, err := ndx.GetInfo(info.BlockID) if err != nil { @@ -172,3 +178,60 @@ func TestPackIndex(t *testing.T) { t.Logf("found %v elements with prefix %q", cnt2, prefix) } } + +func fuzzTestIndexOpen(t *testing.T, originalData []byte) { + // use consistent random + rnd := rand.New(rand.NewSource(12345)) + + fuzzTest(rnd, originalData, 50000, func(d []byte) { + ndx, err := packindex.Open(bytes.NewReader(d)) + if err != nil { + return + } + defer ndx.Close() + cnt := 0 + ndx.Iterate("", func(cb packindex.Info) error { + if cnt < 10 { + ndx.GetInfo(cb.BlockID) + } + cnt++ + return nil + }) + }) +} + +func fuzzTest(rnd *rand.Rand, originalData []byte, rounds int, callback func(d []byte)) { + for round := 0; round < rounds; round++ { + data := append([]byte(nil), originalData...) + + // mutate small number of bytes + bytesToMutate := rnd.Intn(3) + for i := 0; i < bytesToMutate; i++ { + pos := rnd.Intn(len(data)) + data[pos] = byte(rnd.Int()) + } + + sectionsToInsert := rnd.Intn(3) + for i := 0; i < sectionsToInsert; i++ { + pos := rnd.Intn(len(data)) + insertedLength := rnd.Intn(20) + insertedData := make([]byte, insertedLength) + rnd.Read(insertedData) + + data = append(append(append([]byte(nil), data[0:pos]...), insertedData...), data[pos:]...) + } + + sectionsToDelete := rnd.Intn(3) + for i := 0; i < sectionsToDelete; i++ { + pos := rnd.Intn(len(data)) + deletedLength := rnd.Intn(10) + if pos+deletedLength > len(data) { + continue + } + + data = append(append([]byte(nil), data[0:pos]...), data[pos+deletedLength:]...) + } + + callback(data) + } +} diff --git a/internal/packindex/subset.go b/internal/packindex/subset.go index 5946ade6c..5c6e55299 100644 --- a/internal/packindex/subset.go +++ b/internal/packindex/subset.go @@ -2,11 +2,6 @@ // IsSubset returns true if all entries in index 'a' are contained in index 'b'. func IsSubset(a, b Index) bool { - if a.EntryCount() > b.EntryCount() { - // no point iterating. - return false - } - done := make(chan bool) defer close(done) diff --git a/repo/block/committed_block_index.go b/repo/block/committed_block_index.go index 3d67f6d18..103866790 100644 --- a/repo/block/committed_block_index.go +++ b/repo/block/committed_block_index.go @@ -106,7 +106,6 @@ func (b *committedBlockIndex) use(packFiles []string) (bool, error) { return false, fmt.Errorf("unable to open pack index %q: %v", e, err) } - log.Debugf("opened %v with %v entries", e, ndx.EntryCount()) newMerged = append(newMerged, ndx) newInUse[e] = ndx }