aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRunxi Yu <me@runxiyu.org>2025-04-03 10:25:53 +0800
committerRunxi Yu <me@runxiyu.org>2025-04-03 10:25:53 +0800
commit669cca494fd456dbfcf42801c791796c286c18ad (patch)
tree274301c8d2623725d68a027b3ca7312629344022
parentREADME: Remove the note about the Hare implementation (diff)
downloadforge-669cca494fd456dbfcf42801c791796c286c18ad.tar.gz
forge-669cca494fd456dbfcf42801c791796c286c18ad.tar.zst
forge-669cca494fd456dbfcf42801c791796c286c18ad.zip
git2d: Import BARE and UTF-8 utilities
-rw-r--r--.gitignore1
-rw-r--r--Makefile2
-rw-r--r--git2d/bare.c383
-rw-r--r--git2d/bare.h70
-rw-r--r--git2d/utf8.h74
5 files changed, 529 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 10ad875..a57474d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
/vendor
/source.tar.gz
*.c.BAK
+*.o
diff --git a/Makefile b/Makefile
index 597c1bc..dd43574 100644
--- a/Makefile
+++ b/Makefile
@@ -21,7 +21,7 @@ utils/colb:
hookc/hookc:
-git2d/git2d: git2d/*.c
+git2d/git2d: git2d/main.o git2d/bare.o
$(CC) $(CFLAGS) `pkg-config --cflags --libs libgit2` -lpthread -o git2d/git2d $<
version.go:
diff --git a/git2d/bare.c b/git2d/bare.c
new file mode 100644
index 0000000..23c0352
--- /dev/null
+++ b/git2d/bare.c
@@ -0,0 +1,383 @@
+/*-
+ * SPDX-License-Identifier: MIT
+ * SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit <https://61924.nl/>
+ */
+
+#include <string.h>
+#include <stdbool.h>
+
+#include "bare.h"
+#include "utf8.h"
+
+#define UNUSED(x) (void)(x)
+
+enum {
+ U8SZ = 1,
+ U16SZ = 2,
+ U32SZ = 4,
+ U64SZ = 8,
+ MAXVARINTSZ = 10,
+};
+
+static bool
+checkstr(const char *x, uint64_t sz)
+{
+ if (x == NULL || sz == 0) {
+ return true;
+ }
+
+ int err = 0;
+ uint32_t cp = 0;
+ char *buf = (void *)x;
+ uint64_t chunk = 4;
+ char *pad = (char *)(char[4]){0, 0, 0, 0};
+
+#define _utf8_decode(buf) \
+ do { \
+ buf = utf8_decode(buf, &cp, &err); \
+ if (err > 0) { \
+ return false; \
+ } \
+ } while (0)
+
+ for (; sz >= chunk; sz -= chunk) {
+ _utf8_decode(buf);
+ }
+
+ if (sz > 0) {
+ memcpy(pad, buf, sz);
+ _utf8_decode(pad);
+ }
+
+#undef _utf8_decode
+
+ return true;
+}
+
+bare_error
+bare_put_uint(struct bare_writer *ctx, uint64_t x)
+{
+ uint64_t i = 0;
+ uint8_t b[MAXVARINTSZ];
+
+ while (x >= 0x80) {
+ b[i] = (uint8_t)x | 0x80;
+ x >>= 7;
+ i++;
+ }
+
+ b[i] = (uint8_t)x;
+ i++;
+
+ return ctx->write(ctx->buffer, b, i);
+}
+
+bare_error
+bare_get_uint(struct bare_reader *ctx, uint64_t *x)
+{
+ bare_error err = BARE_ERROR_NONE;
+
+ uint8_t shift = 0;
+ uint64_t result = 0;
+
+ for (uint8_t i = 0;i < 10;i++) {
+ uint8_t b;
+
+ err = ctx->read(ctx->buffer, &b, U8SZ);
+ if (err != BARE_ERROR_NONE) {
+ break;
+ }
+
+ if (b < 0x80) {
+ result |= (uint64_t)b << shift;
+ break;
+ } else {
+ result |= ((uint64_t)b & 0x7f) << shift;
+ shift += 7;
+ }
+ }
+
+ *x = result;
+
+ return err;
+}
+
+bare_error
+bare_put_int(struct bare_writer *ctx, int64_t x)
+{
+ uint64_t ux = (uint64_t)x << 1;
+
+ if (x < 0) {
+ ux = ~ux;
+ }
+
+ return bare_put_uint(ctx, ux);
+}
+
+bare_error
+bare_get_int(struct bare_reader *ctx, int64_t *x)
+{
+ uint64_t ux;
+
+ bare_error err = bare_get_uint(ctx, &ux);
+
+ if (err == BARE_ERROR_NONE) {
+ *x = (int64_t)(ux >> 1);
+
+ if ((ux & 1) != 0) {
+ *x = ~(*x);
+ }
+ }
+
+ return err;
+}
+
+bare_error
+bare_put_u8(struct bare_writer *ctx, uint8_t x)
+{
+ return ctx->write(ctx->buffer, &x, U8SZ);
+}
+
+bare_error
+bare_get_u8(struct bare_reader *ctx, uint8_t *x)
+{
+ return ctx->read(ctx->buffer, x, U8SZ);
+}
+
+bare_error
+bare_put_u16(struct bare_writer *ctx, uint16_t x)
+{
+ return ctx->write(ctx->buffer, (uint8_t[U16SZ]){x, x >> 8}, U16SZ);
+}
+
+bare_error
+bare_get_u16(struct bare_reader *ctx, uint16_t *x)
+{
+ bare_error err = ctx->read(ctx->buffer, x, U16SZ);
+
+ if (err == BARE_ERROR_NONE) {
+ *x = (uint16_t)((uint8_t *)x)[0]
+ | (uint16_t)((uint8_t *)x)[1] << 8;
+ }
+
+ return err;
+}
+
+bare_error
+bare_put_u32(struct bare_writer *ctx, uint32_t x)
+{
+ uint8_t buf[U32SZ];
+
+ buf[0] = (uint8_t)(x);
+ buf[1] = (uint8_t)(x >> 8);
+ buf[2] = (uint8_t)(x >> 16);
+ buf[3] = (uint8_t)(x >> 24);
+
+ return ctx->write(ctx->buffer, buf, U32SZ);
+}
+
+bare_error
+bare_get_u32(struct bare_reader *ctx, uint32_t *x)
+{
+ bare_error err = ctx->read(ctx->buffer, x, U32SZ);
+
+ if (err == BARE_ERROR_NONE) {
+ *x = (uint32_t)(((uint8_t *)x)[0])
+ | (uint32_t)(((uint8_t *)x)[1] << 8)
+ | (uint32_t)(((uint8_t *)x)[2] << 16)
+ | (uint32_t)(((uint8_t *)x)[3] << 24);
+ }
+
+ return err;
+}
+
+bare_error
+bare_put_u64(struct bare_writer *ctx, uint64_t x)
+{
+ uint8_t buf[U64SZ];
+
+ buf[0] = x;
+ buf[1] = x >> 8;
+ buf[2] = x >> 16;
+ buf[3] = x >> 24;
+ buf[4] = x >> 32;
+ buf[5] = x >> 40;
+ buf[6] = x >> 48;
+ buf[7] = x >> 56;
+
+ return ctx->write(ctx->buffer, buf, U64SZ);
+}
+
+bare_error
+bare_get_u64(struct bare_reader *ctx, uint64_t *x)
+{
+ bare_error err = ctx->read(ctx->buffer, x, U64SZ);
+
+ if (err == BARE_ERROR_NONE) {
+ *x = (uint64_t)((uint8_t *)x)[0]
+ | (uint64_t)((uint8_t *)x)[1] << 8
+ | (uint64_t)((uint8_t *)x)[2] << 16
+ | (uint64_t)((uint8_t *)x)[3] << 24
+ | (uint64_t)((uint8_t *)x)[4] << 32
+ | (uint64_t)((uint8_t *)x)[5] << 40
+ | (uint64_t)((uint8_t *)x)[6] << 48
+ | (uint64_t)((uint8_t *)x)[7] << 56;
+ }
+
+ return err;
+}
+
+bare_error
+bare_put_i8(struct bare_writer *ctx, int8_t x)
+{
+ return bare_put_u8(ctx, x);
+}
+
+bare_error
+bare_get_i8(struct bare_reader *ctx, int8_t *x)
+{
+ return bare_get_u8(ctx, (uint8_t *)x);
+}
+
+bare_error
+bare_put_i16(struct bare_writer *ctx, int16_t x)
+{
+ return bare_put_u16(ctx, x);
+}
+
+bare_error
+bare_get_i16(struct bare_reader *ctx, int16_t *x)
+{
+ return bare_get_u16(ctx, (uint16_t *)x);
+}
+
+bare_error
+bare_put_i32(struct bare_writer *ctx, int32_t x)
+{
+ return bare_put_u32(ctx, x);
+}
+
+bare_error
+bare_get_i32(struct bare_reader *ctx, int32_t *x)
+{
+ return bare_get_u32(ctx, (uint32_t *)x);
+}
+
+bare_error
+bare_put_i64(struct bare_writer *ctx, int64_t x)
+{
+ return bare_put_u64(ctx, x);
+}
+
+bare_error
+bare_get_i64(struct bare_reader *ctx, int64_t *x)
+{
+ return bare_get_u64(ctx, (uint64_t *)x);
+}
+
+bare_error
+bare_put_f32(struct bare_writer *ctx, float x)
+{
+ uint32_t b;
+ memcpy(&b, &x, U32SZ);
+
+ return bare_put_u32(ctx, b);
+}
+
+bare_error
+bare_get_f32(struct bare_reader *ctx, float *x)
+{
+ return ctx->read(ctx->buffer, x, U32SZ);
+}
+
+bare_error
+bare_put_f64(struct bare_writer *ctx, double x)
+{
+ uint64_t b;
+ memcpy(&b, &x, U64SZ);
+
+ return bare_put_u64(ctx, b);
+}
+
+bare_error
+bare_get_f64(struct bare_reader *ctx, double *x)
+{
+ return ctx->read(ctx->buffer, x, U64SZ);
+}
+
+bare_error
+bare_put_bool(struct bare_writer *ctx, bool x)
+{
+ return bare_put_u8(ctx, (uint8_t)x);
+}
+
+bare_error
+bare_get_bool(struct bare_reader *ctx, bool *x)
+{
+ return bare_get_u8(ctx, (uint8_t *)x);
+}
+
+bare_error
+bare_put_fixed_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz)
+{
+ return ctx->write(ctx->buffer, (void *)src, sz);
+}
+
+bare_error
+bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz)
+{
+ return ctx->read(ctx->buffer, dst, sz);
+}
+
+bare_error
+bare_put_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz)
+{
+ bare_error err = BARE_ERROR_NONE;
+
+ err = bare_put_uint(ctx, sz);
+
+ if (err == BARE_ERROR_NONE) {
+ err = bare_put_fixed_data(ctx, src, sz);
+ }
+
+ return err;
+}
+
+bare_error
+bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz)
+{
+ bare_error err = BARE_ERROR_NONE;
+ uint64_t ssz = 0;
+
+ err = bare_get_uint(ctx, &ssz);
+
+ if (err == BARE_ERROR_NONE) {
+ err = ssz <= sz \
+ ? bare_get_fixed_data(ctx, dst, ssz) \
+ : BARE_ERROR_BUFFER_TOO_SMALL;
+ }
+
+ return err;
+}
+
+bare_error
+bare_put_str(struct bare_writer *ctx, char *src, uint64_t sz)
+{
+ if (!checkstr(src, sz)) {
+ return BARE_ERROR_INVALID_UTF8;
+ }
+
+ return bare_put_data(ctx, (uint8_t *)src, sz);
+}
+
+bare_error
+bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz)
+{
+ bare_error err = bare_get_data(ctx, (uint8_t *)dst, sz);\
+
+ if (err == BARE_ERROR_NONE) {
+ err = !checkstr(dst, sz) ? BARE_ERROR_INVALID_UTF8 : err;
+ }
+
+ return err;
+}
diff --git a/git2d/bare.h b/git2d/bare.h
new file mode 100644
index 0000000..389017f
--- /dev/null
+++ b/git2d/bare.h
@@ -0,0 +1,70 @@
+/*-
+ * SPDX-License-Identifier: MIT
+ * SPDX-FileCopyrightText: Copyright (c) 2022 Frank Smit <https://61924.nl/>
+ */
+
+#ifndef BARE_H
+#define BARE_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef enum {
+ BARE_ERROR_NONE,
+ BARE_ERROR_WRITE_FAILED,
+ BARE_ERROR_READ_FAILED,
+ BARE_ERROR_BUFFER_TOO_SMALL,
+ BARE_ERROR_INVALID_UTF8,
+} bare_error;
+
+typedef bare_error (*bare_write_func)(void *buffer, void *src, uint64_t sz);
+typedef bare_error (*bare_read_func)(void *buffer, void *dst, uint64_t sz);
+
+struct bare_writer {
+ void *buffer;
+ bare_write_func write;
+};
+
+struct bare_reader {
+ void *buffer;
+ bare_read_func read;
+};
+
+bare_error bare_put_uint(struct bare_writer *ctx, uint64_t x); /* varuint */
+bare_error bare_get_uint(struct bare_reader *ctx, uint64_t *x); /* varuint */
+bare_error bare_put_u8(struct bare_writer *ctx, uint8_t x);
+bare_error bare_get_u8(struct bare_reader *ctx, uint8_t *x);
+bare_error bare_put_u16(struct bare_writer *ctx, uint16_t x);
+bare_error bare_get_u16(struct bare_reader *ctx, uint16_t *x);
+bare_error bare_put_u32(struct bare_writer *ctx, uint32_t x);
+bare_error bare_get_u32(struct bare_reader *ctx, uint32_t *x);
+bare_error bare_put_u64(struct bare_writer *ctx, uint64_t x);
+bare_error bare_get_u64(struct bare_reader *ctx, uint64_t *x);
+
+bare_error bare_put_int(struct bare_writer *ctx, int64_t x); /* varint */
+bare_error bare_get_int(struct bare_reader *ctx, int64_t *x); /* varint */
+bare_error bare_put_i8(struct bare_writer *ctx, int8_t x);
+bare_error bare_get_i8(struct bare_reader *ctx, int8_t *x);
+bare_error bare_put_i16(struct bare_writer *ctx, int16_t x);
+bare_error bare_get_i16(struct bare_reader *ctx, int16_t *x);
+bare_error bare_put_i32(struct bare_writer *ctx, int32_t x);
+bare_error bare_get_i32(struct bare_reader *ctx, int32_t *x);
+bare_error bare_put_i64(struct bare_writer *ctx, int64_t x);
+bare_error bare_get_i64(struct bare_reader *ctx, int64_t *x);
+
+bare_error bare_put_f32(struct bare_writer *ctx, float x);
+bare_error bare_get_f32(struct bare_reader *ctx, float *x);
+bare_error bare_put_f64(struct bare_writer *ctx, double x);
+bare_error bare_get_f64(struct bare_reader *ctx, double *x);
+
+bare_error bare_put_bool(struct bare_writer *ctx, bool x);
+bare_error bare_get_bool(struct bare_reader *ctx, bool *x);
+
+bare_error bare_put_fixed_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz);
+bare_error bare_get_fixed_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz);
+bare_error bare_put_data(struct bare_writer *ctx, uint8_t *src, uint64_t sz);
+bare_error bare_get_data(struct bare_reader *ctx, uint8_t *dst, uint64_t sz);
+bare_error bare_put_str(struct bare_writer *ctx, char *src, uint64_t sz);
+bare_error bare_get_str(struct bare_reader *ctx, char *dst, uint64_t sz);
+
+#endif /* BARE_H */
diff --git a/git2d/utf8.h b/git2d/utf8.h
new file mode 100644
index 0000000..894cbd5
--- /dev/null
+++ b/git2d/utf8.h
@@ -0,0 +1,74 @@
+/*-
+ * SPDX-License-Identifier: Unlicense
+ * SPDX-FileContributor: Chris Wellons <wellons@nullprogram.com>
+ *
+ * From: https://nullprogram.com/blog/2017/10/06/
+ */
+
+#ifndef UTF8_H
+#define UTF8_H
+
+#include <stdint.h>
+
+/*
+ * Decode the next character, C, from BUF, reporting errors in E.
+ *
+ * Since this is a branchless decoder, four bytes will be read from the
+ * buffer regardless of the actual length of the next character. This
+ * means the buffer _must_ have at least three bytes of zero padding
+ * following the end of the data stream.
+ *
+ * Errors are reported in E, which will be non-zero if the parsed
+ * character was somehow invalid: invalid byte sequence, non-canonical
+ * encoding, or a surrogate half.
+ *
+ * The function returns a pointer to the next character. When an error
+ * occurs, this pointer will be a guess that depends on the particular
+ * error, but it will always advance at least one byte.
+ */
+static void *
+utf8_decode(void *buf, uint32_t *c, int *e)
+{
+ static const char lengths[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0
+ };
+ static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07};
+ static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536};
+ static const int shiftc[] = {0, 18, 12, 6, 0};
+ static const int shifte[] = {0, 6, 4, 2, 0};
+
+ uint8_t *s = buf;
+ int len = lengths[s[0] >> 3];
+
+ /*
+ * Compute the pointer to the next character early so that the next
+ * iteration can start working on the next character. Neither Clang
+ * nor GCC figure out this reordering on their own.
+ */
+ uint8_t *next = s + len + !len;
+
+ /*
+ * Assume a four-byte character and load four bytes. Unused bits are
+ * shifted out.
+ */
+ *c = (uint32_t)(s[0] & masks[len]) << 18;
+ *c |= (uint32_t)(s[1] & 0x3f) << 12;
+ *c |= (uint32_t)(s[2] & 0x3f) << 6;
+ *c |= (uint32_t)(s[3] & 0x3f) << 0;
+ *c >>= shiftc[len];
+
+ /* Accumulate the various error conditions. */
+ *e = (*c < mins[len]) << 6; /* non-canonical encoding */
+ *e |= ((*c >> 11) == 0x1b) << 7; /* surrogate half? */
+ *e |= (*c > 0x10FFFF) << 8; /* out of range? */
+ *e |= (s[1] & 0xc0) >> 2;
+ *e |= (s[2] & 0xc0) >> 4;
+ *e |= (s[3] ) >> 6;
+ *e ^= 0x2a; /* top two bits of each tail byte correct? */
+ *e >>= shifte[len];
+
+ return next;
+}
+
+#endif