From 2a41a998c96b3679651f574ffb96f539842aa61e Mon Sep 17 00:00:00 2001 From: createsource Date: Mon, 18 May 2026 19:30:58 -0400 Subject: [PATCH 1/4] Code Cleanup #1 --- bq.c | 376 ++++++++++++++++++++++++++++++++++++++--------------------- bq.h | 50 ++++++++ 2 files changed, 290 insertions(+), 136 deletions(-) create mode 100644 bq.h diff --git a/bq.c b/bq.c index b98ccda..309cf53 100644 --- a/bq.c +++ b/bq.c @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include #include @@ -35,37 +36,7 @@ //#define PRINT_BUT_DONT_EXEC -#define code_trap() code_append("\xcc") - -#define likely(x) (__builtin_expect(!!(x), 1)) -#define unlikely(x) (__builtin_expect(!!(x), 0)) -#define likeliness(x, l) (__builtin_expect_with_probability(!!(x), 0, l)) -#define hot __attribute((hot)) -#define cold __attribute((cold)) -#define noreturn __attribute((noreturn)) - -static char *tape, *tapestart, *tapeguardpages[2]; -static size_t realtapesize; - -typedef enum { OP_MOVE, OP_ADD, OP_OUTPUT, OP_INPUT, OP_JUMP_RIGHT, OP_JUMP_LEFT, OP_CLEAR, OP_ADD_TO, OP_MOVE_UNTIL } Opcode; - -static const char *const nops[] = { - "\x90", // nop - "\x66\x90", // xchg ax,ax - "\x0f\x1f\x00", // nop DWORD PTR [eax] - "\x0f\x1f\x40\x00", // nop DWORD PTR [eax+0x0] - "\x0f\x1f\x44\x00\x00", // nop DWORD PTR [eax+eax*1+0x0] - "\x66\x0f\x1f\x44\x00\x00", // nop WORD PTR [eax+eax*1+0x0] - "\x0f\x1f\x80\x00\x00\x00\x00", // nop DWORD PTR [eax+0x0] - "\x0f\x1f\x84\x00\x00\x00\x00\x00", // nop DWORD PTR [eax+eax*1+0x0] - "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", // nop WORD PTR [eax+eax*1+0x0] - "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00" // nop WORD PTR cs:[eax+eax*1+0x0] -}; - -typedef struct { - Opcode op; - int arg; -} Instr; +#include "bq.h" static inline size_t max(size_t a, size_t b) @@ -77,7 +48,6 @@ static void cold usage(char *argv0) { fprintf(stderr, "usage: %s [file]\n", argv0); - exit(1); } static void noreturn cold @@ -187,133 +157,267 @@ handler(int signum, siginfo_t *si, void *ucontext) die("could not protect tape memory underflow guard page:"); } -int -main(int argc, char *argv[]) +bool +should_use_buffer(char **stdin_text) { - if unlikely (argc != 2) - usage(argv[0]); + struct stat stdin_st; + bool stdin_complete = isstdincomplete(&stdin_st); + if (!stdin_complete) return false; - int fd = open(argv[1], O_RDONLY); - if unlikely (fd < 0) - die("cannot access '%s':", argv[1]); + if (S_ISREG(stdin_st.st_mode)) { + *stdin_text = emalloc(stdin_st.st_size); + read(STDIN_FILENO, *stdin_text, stdin_st.st_size); - struct stat st; - fstat(fd, &st); + return true; + } + if (S_ISFIFO(stdin_st.st_mode)) { + int size; + ioctl(STDIN_FILENO, FIONREAD, &size); + + *stdin_text = emalloc(size + 1); + read(STDIN_FILENO, *stdin_text, size); + (*stdin_text)[size] = '\0'; - char *txt = emalloc(st.st_size); - read(fd, txt, st.st_size); - close(fd); + return true; + } - struct stat stdin_st; - bool stdin_complete = isstdincomplete(&stdin_st); - char *stdin_txt = NULL; + return false; +} - if (stdin_complete) { - if (S_ISREG(stdin_st.st_mode)) { - stdin_txt = emalloc(stdin_st.st_size); - read(STDIN_FILENO, stdin_txt, stdin_st.st_size); - } else if (S_ISFIFO(stdin_st.st_mode)) { - int n; - ioctl(STDIN_FILENO, FIONREAD, &n); - stdin_txt = emalloc(n + 1); - read(STDIN_FILENO, stdin_txt, n); - stdin_txt[n] = '\0'; - } else { - stdin_complete = false; +cvector(Instr) instrs = NULL; + +// funny type stuff we just talked about +void +add_instr(MaybeInstr maybe_instr) +{ + if (maybe_instr.op == OP_NONE) return; + + Instr instr = maybe_instr; + cvector_push_back(instrs, instr); +} + +bool +instr_equals(MaybeInstr *instr, Opcode op, int arg) +{ + if (arg == 0) return instr->op == op; + + return instr->op == op && instr->arg == arg; +} + +bool has_no_instr(MaybeInstr instr) +{ + return instr.op == OP_NONE; +} + +// maybe add 1/-1 optimization because +// we only call this if we know there's +// a move instruction +MaybeInstr +procinstr_move(char **ptr) +{ + int move_cnt = 0; + + while (true) { + if (isop(**ptr)) { + if (**ptr == '>') { + move_cnt++; + } else if (**ptr == '<') { + move_cnt--; + } else break; } + (*ptr)++; } - cvector(Instr) instrs = NULL; - cvector_reserve(instrs, (size_t)st.st_size); + if unlikely (move_cnt == 0) { + return NO_INSTR; + } - for (char *s = txt; likely(*s); s++) { - Instr instr; - switch (*s) { - case '>': - case '<': { - int n = 0; - for (s--; s[1] == '>' || s[1] == '<' || !isop(s[1]); s++) - if (s[1] == '>') - n++; - else if (s[1] == '<') - n--; - - if likely (n != 0) { - instr = (Instr){ OP_MOVE, n }; - cvector_push_back(instrs, instr); - } + MaybeInstr instr; + instr.op = OP_MOVE; + instr.arg = move_cnt; + return instr; +} - break; +// same note as `procinstr_move` but +// for offset obvi +MaybeInstr +procinstr_value(char **ptr) +{ + int offset_cnt = 0; + + while (true) { + if (isop(**ptr)) { + if (**ptr == '+') { + offset_cnt++; + } else if (**ptr == '-') { + offset_cnt--; + } else break; } - case '+': - case '-': { - int n = 0; - for (s--; s[1] == '+' || s[1] == '-' || !isop(s[1]); s++) - if (s[1] == '+') - n++; - else if (s[1] == '-') - n--; - - if likely (n != 0) { - instr = (Instr){ OP_ADD, n }; - cvector_push_back(instrs, instr); - } + (*ptr)++; + } - break; + if unlikely (offset_cnt == 0) { + return NO_INSTR; + } + + MaybeInstr instr; + instr.op = OP_ADD; + instr.arg = offset_cnt; + return instr; +} + +MaybeInstr +procinstr_clear(Instr *start) +{ + if (instr_equals(&start[0], OP_JUMP_RIGHT, 0) && + instr_equals(&start[1], OP_ADD, 0) && + (start[1].arg & 1) == 1 + ) return (MaybeInstr){ .op=OP_CLEAR }; + + return NO_INSTR; +} + +MaybeInstr +procinstr_addto(Instr *start) +{ + if (instr_equals(&start[0], OP_JUMP_RIGHT, 0) && // ] + instr_equals(&start[1], OP_ADD, -1) && // - + instr_equals(&start[2], OP_MOVE, 0) && // >/< + instr_equals(&start[3], OP_ADD, 1) && // + + instr_equals(&start[4], OP_MOVE, 0) && // + start[4].arg == -start[2].arg // make sure not [->+>]/[-<+<] + ) return (MaybeInstr){ + OP_ADD_TO, + start[2].arg + }; + + return NO_INSTR; +} + +MaybeInstr +procinstr_movetil(Instr *start) +{ + if ( + instr_equals(&start[0], OP_JUMP_RIGHT, 0) && + instr_equals(&start[1], OP_MOVE, 0) + ) return (MaybeInstr){ + OP_MOVE_UNTIL, + start[1].arg + }; + + return NO_INSTR; +} + +MaybeInstr +procinstr_loop(void) +{ + MaybeInstr instr; + size_t len = cvector_size(instrs); + + if (len >= 2) { + // [-] or [+] + instr = procinstr_clear(&instrs[len - 2]); + if (!has_no_instr(instr)) { + cvector_set_size(instrs, len - 2); + return instr; } - case '.': - instr.op = OP_OUTPUT; - cvector_push_back(instrs, instr); - break; - case ',': - instr.op = OP_INPUT; - cvector_push_back(instrs, instr); - break; - case '[': - instr.op = OP_JUMP_RIGHT; - cvector_push_back(instrs, instr); - break; - case ']': { - size_t len = cvector_size(instrs); - - // [-] or [+] - if (len >= 2 && instrs[len - 1].op == OP_ADD && instrs[len - 1].arg & 1 && instrs[len - 2].op == OP_JUMP_RIGHT) { - cvector_set_size(instrs, len - 2); - instr.op = OP_CLEAR; - cvector_push_back(instrs, instr); + + // [>] or [<] + instr = procinstr_movetil(&instrs[len - 2]); + if (!has_no_instr(instr)) { + cvector_set_size(instrs, len - 2); + return instr; + } + } + + // [->+<] or [-<+>] + if (len >= 5) { + instr = procinstr_addto(&instrs[len - 5]); + if (!has_no_instr(instr)) { + cvector_set_size(instrs, len - 5); + return instr; + } + } + + instr.op = OP_JUMP_LEFT; + return instr; +} + +void +process_instructions(char *text) +{ + for (char *s = text; likely(*s); s++) { + MaybeInstr instr; + switch (*s) { + case '>': case '<': + instr = procinstr_move(&s); break; - } - // [->+<] or [-<+>] - if (len >= 5 && instrs[len - 1].op == OP_MOVE && instrs[len - 2].op == OP_ADD && instrs[len - 2].arg == 1 && - instrs[len - 3].op == OP_MOVE && instrs[len - 4].op == OP_ADD && instrs[len - 4].arg == -1 && - instrs[len - 1].arg == -instrs[len - 3].arg && instrs[len - 5].op == OP_JUMP_RIGHT) { - cvector_set_size(instrs, len - 5); - instr = (Instr){ OP_ADD_TO, instrs[len - 3].arg }; - cvector_push_back(instrs, instr); + case '+': case '-': + instr = procinstr_value(&s); break; - } - // [>] or [<] - if (len >= 2 && instrs[len - 1].op == OP_MOVE && instrs[len - 2].op == OP_JUMP_RIGHT) { - cvector_set_size(instrs, len - 2); - instr = (Instr){ OP_MOVE_UNTIL, instrs[len - 1].arg }; - cvector_push_back(instrs, instr); + case '.': + instr.op = OP_OUTPUT; break; - } - instr.op = OP_JUMP_LEFT; - cvector_push_back(instrs, instr); - break; - } - default: break; + case ',': + instr.op = OP_INPUT; + break; + + case '[': + instr.op = OP_JUMP_RIGHT; + break; + + case ']': + instr = procinstr_loop(); + break; + + default: + instr.op = OP_NONE; } + add_instr(instr); + } +} + +int +main(int argc, char *argv[]) +{ + // If code input isn't a filename, display help info + if unlikely (argc != 2) { + usage(argv[0]); + return 1; } - free(txt); + const char* filename = argv[1]; + + // Get file descriptor for file + int file_desc = open(filename, O_RDONLY); + if unlikely (file_desc < 0) { + die("cannot access '%s':", filename); + } + + // Get file info from descriptor + struct stat file_stats; + fstat(file_desc, &file_stats); + + // Get text from file + char *text = emalloc(file_stats.st_size); + read(file_desc, text, file_stats.st_size); + close(file_desc); + + // check if we should read all of stdin + char *stdin_text = NULL; + bool stdin_complete = should_use_buffer(&stdin_text); + + cvector_reserve(instrs, (size_t)file_stats.st_size); + + process_instructions(text); + + free(text); - size_t codemapsize = max(cvector_size(instrs) * 64, 1); - unsigned char *fn = mmap(NULL, codemapsize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + size_t codemapsize = max(cvector_size(instrs) * 64, 1); + unsigned char *fn = mmap(NULL, codemapsize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if unlikely (!fn) die("could not allocate executable memory:"); @@ -385,7 +489,7 @@ main(int argc, char *argv[]) if (stdin_complete) { code_append("\x49\xbf\x00\x00\x00\x00\x00\x00\x00\x00"); // movabs r15, imm64 - *(void **)(fn + codesize - 8) = stdin_txt; + *(void **)(fn + codesize - 8) = stdin_text; } code_append("\x48\x81\xec\x00\x20\x00\x00" // sub rsp,0x2000 diff --git a/bq.h b/bq.h new file mode 100644 index 0000000..af86913 --- /dev/null +++ b/bq.h @@ -0,0 +1,50 @@ +#include + +#define code_trap() code_append("\xcc") + +#define likely(x) (__builtin_expect(!!(x), 1)) +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#define likeliness(x, l) (__builtin_expect_with_probability(!!(x), 0, l)) +#define hot __attribute((hot)) +#define cold __attribute((cold)) +#define noreturn __attribute((noreturn)) + +static char *tape, *tapestart, *tapeguardpages[2]; +static size_t realtapesize; + +#define OP_NONE 0 +typedef enum { + OP_MOVE = 1, + OP_ADD, + OP_OUTPUT, + OP_INPUT, + OP_JUMP_RIGHT, + OP_JUMP_LEFT, + OP_CLEAR, + OP_ADD_TO, + OP_MOVE_UNTIL +} Opcode; + +static const char *const nops[] = { + "\x90", // nop + "\x66\x90", // xchg ax,ax + "\x0f\x1f\x00", // nop DWORD PTR [eax] + "\x0f\x1f\x40\x00", // nop DWORD PTR [eax+0x0] + "\x0f\x1f\x44\x00\x00", // nop DWORD PTR [eax+eax*1+0x0] + "\x66\x0f\x1f\x44\x00\x00", // nop WORD PTR [eax+eax*1+0x0] + "\x0f\x1f\x80\x00\x00\x00\x00", // nop DWORD PTR [eax+0x0] + "\x0f\x1f\x84\x00\x00\x00\x00\x00", // nop DWORD PTR [eax+eax*1+0x0] + "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", // nop WORD PTR [eax+eax*1+0x0] + "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00" // nop WORD PTR cs:[eax+eax*1+0x0] +}; + +// should not have OP_NONE as Opcode +typedef struct { + Opcode op; + int arg; +} Instr; + +// allows OP_NONE +typedef Instr MaybeInstr; + +#define NO_INSTR ((MaybeInstr){ .op = 0 }) \ No newline at end of file From 611d0104a64f1d1e4c3b400c9a982c69d9df8887 Mon Sep 17 00:00:00 2001 From: createsource Date: Wed, 20 May 2026 14:45:37 -0400 Subject: [PATCH 2/4] Code Cleanup #2 (compiles) --- Makefile | 6 +- bq.c | 656 ++++++++++++++++++++++++++++++------------------------- bq.h | 1 + 3 files changed, 364 insertions(+), 299 deletions(-) diff --git a/Makefile b/Makefile index 5798a73..cb5adb8 100644 --- a/Makefile +++ b/Makefile @@ -13,9 +13,9 @@ # limitations under the License. CPPFLAGS = -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L -D_GNU_SOURCE -#CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} -CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} -CC = cc +CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} +# CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} +CC = clang-22 bq: bq.c ${CC} -o $@ ${CFLAGS} $< diff --git a/bq.c b/bq.c index 309cf53..e3fa6dc 100644 --- a/bq.c +++ b/bq.c @@ -44,6 +44,12 @@ max(size_t a, size_t b) return a > b ? a : b; } +static inline size_t +min(size_t a, size_t b) +{ + return a > b ? b : a; +} + static void cold usage(char *argv0) { @@ -97,26 +103,26 @@ isop(char c) return strchr("><+-.,[]", c); } -static bool -isstdincomplete(struct stat *st) -{ - if (isatty(STDIN_FILENO)) - return false; +// static bool cold +// isstdincomplete(struct stat *st) +// { +// if (isatty(STDIN_FILENO)) +// return false; - if (fstat(STDIN_FILENO, st) < 0) - return false; +// if (fstat(STDIN_FILENO, st) < 0) +// return false; - if (S_ISREG(st->st_mode)) - return true; +// if (S_ISREG(st->st_mode)) +// return true; - if (S_ISFIFO(st->st_mode)) { - struct pollfd pfd = { .fd = STDIN_FILENO, .events = POLLIN }; - poll(&pfd, 1, 0); - return pfd.revents & POLLHUP; - } +// if (S_ISFIFO(st->st_mode)) { +// struct pollfd pfd = { .fd = STDIN_FILENO, .events = POLLIN }; +// poll(&pfd, 1, 0); +// return pfd.revents & POLLHUP; +// } - return false; -} +// return false; +// } static void handler(int signum, siginfo_t *si, void *ucontext) @@ -157,37 +163,37 @@ handler(int signum, siginfo_t *si, void *ucontext) die("could not protect tape memory underflow guard page:"); } -bool -should_use_buffer(char **stdin_text) -{ - struct stat stdin_st; - bool stdin_complete = isstdincomplete(&stdin_st); - if (!stdin_complete) return false; - - if (S_ISREG(stdin_st.st_mode)) { - *stdin_text = emalloc(stdin_st.st_size); - read(STDIN_FILENO, *stdin_text, stdin_st.st_size); - - return true; - } - if (S_ISFIFO(stdin_st.st_mode)) { - int size; - ioctl(STDIN_FILENO, FIONREAD, &size); +// static bool cold +// should_use_buffer(char **stdin_text) +// { +// struct stat stdin_st; +// bool stdin_complete = isstdincomplete(&stdin_st); +// if (!stdin_complete) return false; + +// if (S_ISREG(stdin_st.st_mode)) { +// *stdin_text = emalloc(stdin_st.st_size); +// read(STDIN_FILENO, *stdin_text, stdin_st.st_size); + +// return true; +// } +// if (S_ISFIFO(stdin_st.st_mode)) { +// int size; +// ioctl(STDIN_FILENO, FIONREAD, &size); - *stdin_text = emalloc(size + 1); - read(STDIN_FILENO, *stdin_text, size); - (*stdin_text)[size] = '\0'; +// *stdin_text = emalloc(size + 1); +// read(STDIN_FILENO, *stdin_text, size); +// (*stdin_text)[size] = '\0'; - return true; - } +// return true; +// } - return false; -} +// return false; +// } cvector(Instr) instrs = NULL; // funny type stuff we just talked about -void +static void add_instr(MaybeInstr maybe_instr) { if (maybe_instr.op == OP_NONE) return; @@ -196,7 +202,7 @@ add_instr(MaybeInstr maybe_instr) cvector_push_back(instrs, instr); } -bool +static bool instr_equals(MaybeInstr *instr, Opcode op, int arg) { if (arg == 0) return instr->op == op; @@ -204,7 +210,7 @@ instr_equals(MaybeInstr *instr, Opcode op, int arg) return instr->op == op && instr->arg == arg; } -bool has_no_instr(MaybeInstr instr) +static bool has_no_instr(MaybeInstr instr) { return instr.op == OP_NONE; } @@ -212,8 +218,8 @@ bool has_no_instr(MaybeInstr instr) // maybe add 1/-1 optimization because // we only call this if we know there's // a move instruction -MaybeInstr -procinstr_move(char **ptr) +static MaybeInstr +interp_move(char **ptr) { int move_cnt = 0; @@ -240,8 +246,8 @@ procinstr_move(char **ptr) // same note as `procinstr_move` but // for offset obvi -MaybeInstr -procinstr_value(char **ptr) +static MaybeInstr +interp_value(char **ptr) { int offset_cnt = 0; @@ -266,8 +272,8 @@ procinstr_value(char **ptr) return instr; } -MaybeInstr -procinstr_clear(Instr *start) +static MaybeInstr +interp_clear(Instr *start) { if (instr_equals(&start[0], OP_JUMP_RIGHT, 0) && instr_equals(&start[1], OP_ADD, 0) && @@ -277,8 +283,8 @@ procinstr_clear(Instr *start) return NO_INSTR; } -MaybeInstr -procinstr_addto(Instr *start) +static MaybeInstr +interp_addto(Instr *start) { if (instr_equals(&start[0], OP_JUMP_RIGHT, 0) && // ] instr_equals(&start[1], OP_ADD, -1) && // - @@ -294,8 +300,8 @@ procinstr_addto(Instr *start) return NO_INSTR; } -MaybeInstr -procinstr_movetil(Instr *start) +static MaybeInstr +interp_movetil(Instr *start) { if ( instr_equals(&start[0], OP_JUMP_RIGHT, 0) && @@ -308,22 +314,22 @@ procinstr_movetil(Instr *start) return NO_INSTR; } -MaybeInstr -procinstr_loop(void) +static MaybeInstr +interp_loop(void) { MaybeInstr instr; size_t len = cvector_size(instrs); if (len >= 2) { // [-] or [+] - instr = procinstr_clear(&instrs[len - 2]); + instr = interp_clear(&instrs[len - 2]); if (!has_no_instr(instr)) { cvector_set_size(instrs, len - 2); return instr; } // [>] or [<] - instr = procinstr_movetil(&instrs[len - 2]); + instr = interp_movetil(&instrs[len - 2]); if (!has_no_instr(instr)) { cvector_set_size(instrs, len - 2); return instr; @@ -332,7 +338,7 @@ procinstr_loop(void) // [->+<] or [-<+>] if (len >= 5) { - instr = procinstr_addto(&instrs[len - 5]); + instr = interp_addto(&instrs[len - 5]); if (!has_no_instr(instr)) { cvector_set_size(instrs, len - 5); return instr; @@ -343,18 +349,18 @@ procinstr_loop(void) return instr; } -void -process_instructions(char *text) +static void +interpret_text(char *text) { for (char *s = text; likely(*s); s++) { MaybeInstr instr; switch (*s) { case '>': case '<': - instr = procinstr_move(&s); + instr = interp_move(&s); break; case '+': case '-': - instr = procinstr_value(&s); + instr = interp_value(&s); break; case '.': @@ -370,7 +376,7 @@ process_instructions(char *text) break; case ']': - instr = procinstr_loop(); + instr = interp_loop(); break; default: @@ -380,270 +386,258 @@ process_instructions(char *text) } } -int -main(int argc, char *argv[]) +#define put_char_asm_offset 0 +#define put_char_asm \ + "\x0f\xb6\x03" /* movzx eax,BYTE PTR [rbx] */ \ + "\x42\x88\x44\x24\x08" /* mov BYTE PTR [rsp+r12*1+8],al */ \ + "\x49\xff\xc4" /* inc r12 */ \ + "\x3c\x0a" /* cmp al,0xa */ \ + "\x74\x09" /* je flush */ \ + "\x49\x81\xfc\x00\x10\x00\x00" /* cmp r12,0x1000 */ \ + "\x75\x16" /* jne done */ \ + "\x48\x31\xc0" /* xor rax,rax */ \ + "\x48\xff\xc0" /* inc rax */ \ + "\x48\x89\xc7" /* mov rdi,rax */ \ + "\x48\x8d\x74\x24\x08" /* lea rsi,[rsp+8] */ \ + "\x4c\x89\xe2" /* mov rdx,r12 */ \ + "\x0f\x05" /* syscall */ \ + "\x45\x31\xe4" /* xor r12d,r12d */ \ + "\xc3" /* ret */ + +// compiles to a number even tho this would seem big!! just use optimization :) +#define get_char_asm_offset sizeof(put_char_asm) +#define get_char_asm \ + "\x49\x81\xfd\x00\x10\x00\x00" /* cmp r13,0x1000 */ \ + "\x75\x16" /* jne have_data */ \ + "\x31\xc0" /* xor eax,eax */ \ + "\x31\xff" /* xor edi,edi */ \ + "\x48\x8d\xb4\x24\x08\x10\x00\x00" /* lea rsi,[rsp+0x1008] */ \ + "\xba\x00\x10\x00\x00" /* mov edx,0x1000 */ \ + "\x0f\x05" /* syscall */ \ + "\x45\x31\xed" /* xor r13d,r13d */ \ + "\x42\x0f\xb6\x84\x2c\x08\x10\x00\x00" /* movzx eax,BYTE PTR [rsp+r13*1+0x1008] */ \ + "\x88\x03" /* mov BYTE PTR [rbx],al */ \ + "\x49\xff\xc5" /* inc r13 */ \ + "\xc3" /* ret */ + +#define code_start_asm_offset get_char_asm_offset + sizeof(get_char_asm) +#define code_start \ + "\x48\x81\xec\x00\x20\x00\x00" /* sub rsp,0x2000 */ \ + "\x4d\x31\xe4" /* xor r12,r12 */ \ + "\x49\xc7\xc5\x00\x10\x00\x00" /* mov r13,0x1000 */ \ + "\x48\x89\xfb" /* mov rbx, rdi */ + +static inline size_t +code_append(unsigned char *restrict location, const char *restrict code) { - // If code input isn't a filename, display help info - if unlikely (argc != 2) { - usage(argv[0]); - return 1; + size_t code_size = strlen(code) / sizeof(*code) - 1; + memcpy(location, code, code_size); + return code_size; +} + +static inline size_t +code_align(unsigned char *restrict location, size_t align) +{ + if (align <= 1) return 0; + + uintptr_t ptr = (uintptr_t)(location); + size_t from_alignment = ptr & (align - 1); + size_t padding = (align - from_alignment) & (align - 1); + size_t nopsize = MAX_NOP_SIZE; + + while (padding > 0) { + nopsize = min(padding, MAX_NOP_SIZE); + mempcpy(location, nops[nopsize - 1], nopsize); + padding -= nopsize; } - const char* filename = argv[1]; + return nopsize; +} - // Get file descriptor for file - int file_desc = open(filename, O_RDONLY); - if unlikely (file_desc < 0) { - die("cannot access '%s':", filename); +static size_t +procinstr_move(unsigned char *restrict location, int move_cnt) +{ + bool is_dec = move_cnt > 0; + + if (abs(move_cnt) == 1) { + // inc rbx dec rbx + const char *code = is_dec ? "\x48\xff\xc3" : "\x48\xff\xcb"; + return code_append(location, code); } - - // Get file info from descriptor - struct stat file_stats; - fstat(file_desc, &file_stats); - // Get text from file - char *text = emalloc(file_stats.st_size); - read(file_desc, text, file_stats.st_size); - close(file_desc); + move_cnt = abs(move_cnt); - // check if we should read all of stdin - char *stdin_text = NULL; - bool stdin_complete = should_use_buffer(&stdin_text); + const char value_instr = is_dec ? '\xc3' : '\xeb'; + if likely (move_cnt <= UCHAR_MAX) { // add/sub rbx, imm8 + const char code[4] = { '\x48', '\x83', value_instr, move_cnt }; + return code_append(location, code); + } else { // add/sub rbx, imm32 + const char code[7] = { '\x48', '\x81', value_instr }; + *(unsigned int *)(&code[3]) = move_cnt; + return code_append(location, code); + } +} - cvector_reserve(instrs, (size_t)file_stats.st_size); +static size_t +procinstr_add(unsigned char *restrict location, unsigned char move_cnt) +{ + bool is_dec = move_cnt > 0; + if (abs((signed)move_cnt) == 1) { + // inc BYTE PTR [rbx] dec BYTE PTR [rbx] + const char *code = is_dec ? "\xfe\x03" : "\xfe\x0b"; + return code_append(location, code); + } - process_instructions(text); + // add BYTE PTR [rbx], imm8 sub BYTE PTR [rbx], imm8 + const char value_instr = is_dec ? '\x03' : '\x2b'; + const char code[3] = { '\x80', value_instr, (char)abs((signed)move_cnt) }; + return code_append(location, code); +} - free(text); +static size_t +procinstr_out(unsigned char *restrict location, size_t codesize) +{ + const char code[5] = { '\xe8' }; + *(unsigned int *)&code[1] = put_char_asm_offset - codesize; + return code_append(location, code); +} - size_t codemapsize = max(cvector_size(instrs) * 64, 1); - unsigned char *fn = mmap(NULL, codemapsize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if unlikely (!fn) - die("could not allocate executable memory:"); +static size_t +procinstr_in(unsigned char *restrict location) +{ + return code_append(location, + "\x41\x8a\x07" // mov al, BYTE PTR [r15] + "\x88\x03" // mov BYTE PTR [rbx], al + "\x49\xff\xc7"); // inc r15 +} - size_t codesize = 0; +static size_t +procinstr_jright(unsigned char *restrict location, cvector(uintptr_t) jmps, size_t align, size_t index) +{ + size_t offset = 0; + + size_t cost = 0; + size_t jmpstraverse = cvector_size(jmps) + 1; + for (size_t j = index; jmpstraverse && likely (j < cvector_size(instrs)); j++) { + switch (instrs[j].op) { + case OP_MOVE: cost += 3; break; + case OP_ADD: cost += 1; break; + case OP_OUTPUT: cost += 40; break; + case OP_INPUT: cost += 35; break; + case OP_JUMP_RIGHT: cost += 10; break; + case OP_JUMP_LEFT: + cost += 8; + jmpstraverse--; + break; + case OP_CLEAR: cost += 2; break; + case OP_ADD_TO: cost += 10; break; + case OP_MOVE_UNTIL: cost += 10; break; + } + } - cvector(uintptr_t) jmps = NULL; - cvector_reserve(jmps, max(cvector_size(instrs) / 20, 16)); + + offset += code_align(location, align >> ((cvector_size(jmps) + 1) * cost / 50)); + offset += code_append(location, + "\x80\x3b\x00" // cmp BYTE PTR [rbx], 0 + "\x0f\x84" // jz rel32 + "\x0f\x1f\x40\x00"); // nop DWORD PTR [eax+0x0] + + return offset; +} -#define code_append(snip_) \ - do { \ - size_t snip_size_ = sizeof snip_ / sizeof *snip_ - 1; \ - memcpy(fn + codesize, snip_, snip_size_); \ - codesize += snip_size_; \ - } while (0) - -#define code_align(align) \ - do { \ - if ((align) <= 1) \ - break; \ - uintptr_t cur = (uintptr_t)(fn + codesize); \ - size_t pad = ((align) - (cur & ((align) - 1))) & ((align) - 1); \ - size_t off = codesize; \ - size_t i = 0; \ - while (pad > 0) { \ - size_t nopsize = pad > 10 ? 10 : pad; \ - memcpy(fn + off + i, nops[nopsize - 1], nopsize); \ - i += nopsize; \ - pad -= nopsize; \ - } \ - codesize = off + i; \ - } while (0) - - size_t icacheline = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); - - size_t myputcharoffset = 0; - code_append("\x0f\xb6\x03" // movzx eax,BYTE PTR [rbx] - "\x42\x88\x44\x24\x08" // mov BYTE PTR [rsp+r12*1+8],al - "\x49\xff\xc4" // inc r12 - "\x3c\x0a" // cmp al,0xa - "\x74\x09" // je flush - "\x49\x81\xfc\x00\x10\x00\x00" // cmp r12,0x1000 - "\x75\x16" // jne done - "\x48\x31\xc0" // xor rax,rax - "\x48\xff\xc0" // inc rax - "\x48\x89\xc7" // mov rdi,rax - "\x48\x8d\x74\x24\x08" // lea rsi,[rsp+8] - "\x4c\x89\xe2" // mov rdx,r12 - "\x0f\x05" // syscall - "\x45\x31\xe4" // xor r12d,r12d - "\xc3"); // ret +static size_t +procinstr_jleft(unsigned char *restrict file, size_t codesize, cvector(uintptr_t) jmps) +{ + codesize += code_append(&file[codesize], "\x80\x3b\x00"); // cmp BYTE PTR [rbx], 0 + + if unlikely (cvector_size(jmps) == 0) + die("mismatched loop end"); + + size_t jmp = jmps[cvector_size(jmps) - 1]; + + { + int rel = jmp - (codesize + 2); - code_align(icacheline); - size_t mygetcharoffset = codesize; - code_append("\x49\x81\xfd\x00\x10\x00\x00" // cmp r13,0x1000 - "\x75\x16" // jne have_data - "\x31\xc0" // xor eax,eax - "\x31\xff" // xor edi,edi - "\x48\x8d\xb4\x24\x08\x10\x00\x00" // lea rsi,[rsp+0x1008] - "\xba\x00\x10\x00\x00" // mov edx,0x1000 - "\x0f\x05" // syscall - "\x45\x31\xed" // xor r13d,r13d - "\x42\x0f\xb6\x84\x2c\x08\x10\x00\x00" // movzx eax,BYTE PTR [rsp+r13*1+0x1008] - "\x88\x03" // mov BYTE PTR [rbx],al - "\x49\xff\xc5" // inc r13 - "\xc3"); // ret - - code_align(icacheline); - size_t codestartoffset = codesize; - - if (stdin_complete) { - code_append("\x49\xbf\x00\x00\x00\x00\x00\x00\x00\x00"); // movabs r15, imm64 - *(void **)(fn + codesize - 8) = stdin_text; + if likely (rel >= CHAR_MIN && rel <= CHAR_MAX) { + codesize += code_append(&file[codesize], "\x75\x00"); // jnz rel8 + file[codesize - 1] = rel; + } else { + codesize += code_append(&file[codesize], "\x0f\x85\x00\x00\x00\x00"); // jnz rel32 + *(int *)(file + codesize - 4) = rel - 4; + } + } + + { + int rel = codesize - jmp; + + if likely (rel >= CHAR_MIN && rel <= CHAR_MAX) { + file[jmp - 6] = 0x74; // jz rel8 + file[jmp - 5] = rel + 4; + } else { + *(int *)(file + jmp - 4) = rel; // rel32 + } } - code_append("\x48\x81\xec\x00\x20\x00\x00" // sub rsp,0x2000 - "\x4d\x31\xe4" // xor r12,r12 - "\x49\xc7\xc5\x00\x10\x00\x00" // mov r13,0x1000 - "\x48\x89\xfb"); // mov rbx, rdi + return codesize; +} - code_align(icacheline); +static void +process_instructions(unsigned char *restrict file, size_t codesize, size_t align) +{ + cvector(uintptr_t) jmps = NULL; + cvector_reserve(jmps, max(cvector_size(instrs) / 20, 16)); for (size_t i = 0; likely(i < cvector_size(instrs)); i++) { Instr instr = instrs[i]; switch (instr.op) { case OP_MOVE: - if (instr.arg == 1) - code_append("\x48\xff\xc3"); // inc rbx - else if (instr.arg == -1) - code_append("\x48\xff\xcb"); // dec rbx - else if (instr.arg > 0) { - unsigned int n = instr.arg; - - if likely (n <= UCHAR_MAX) { - code_append("\x48\x83\xc3\x00"); // add rbx, imm8 - fn[codesize - 1] = n; - } else { - code_append("\x48\x81\xc3\x00\x00\x00\x00"); // add rbx, imm32 - *(unsigned int *)(fn + codesize - 4) = n; - } - } else if (instr.arg < 0) { - unsigned int n = -instr.arg; - - if (n <= UCHAR_MAX) { - code_append("\x48\x83\xeb\x00"); // sub rbx, imm8 - fn[codesize - 1] = n; - } else { - code_append("\x48\x81\xeb\x00\x00\x00\x00"); // sub rbx, imm32 - *(unsigned int *)(fn + codesize - 4) = n; - } - } + codesize += procinstr_move(&file[codesize], instr.arg); break; - case OP_ADD: { - short n = instr.arg % 256; - - if (n == 1) - code_append("\xfe\x03"); // inc BYTE PTR [rbx] - else if (n == -1) - code_append("\xfe\x0b"); // dec BYTE PTR [rbx] - else if (n > 0) { - code_append("\x80\x03\x00"); // add BYTE PTR [rbx], imm8 - fn[codesize - 1] = n; - } else if (n < 0) { - code_append("\x80\x2b\x00"); // sub BYTE PTR [rbx], imm8 - fn[codesize - 1] = -n; - } + case OP_ADD: + codesize += procinstr_add(&file[codesize], instr.arg); break; - } - case OP_OUTPUT: { - code_append("\xe8\x00\x00\x00\x00"); // call rel32 - *(int *)(fn + codesize - 4) = myputcharoffset - codesize; + + case OP_OUTPUT: + codesize += procinstr_out(&file[codesize], codesize); break; - } - case OP_INPUT: { - if (!stdin_complete) { - code_append("\xe8\x00\x00\x00\x00"); // call rel32 - *(int *)(fn + codesize - 4) = mygetcharoffset - codesize; - } else { - const char snip[] = "\x41\x8a\x07" // mov al, BYTE PTR [r15] - "\x88\x03" // mov BYTE PTR [rbx], al - "\x49\xff\xc7"; // inc r15 - code_append(snip); - } + + case OP_INPUT: + codesize += procinstr_in(&file[codesize]); break; - } - case OP_JUMP_RIGHT: { - const char snip[] = "\x80\x3b\x00" // cmp BYTE PTR [rbx], 0 - "\x0f\x84" // jz rel32 - "\x0f\x1f\x40\x00"; // nop DWORD PTR [eax+0x0] - - size_t cost = 0; - size_t jmpstraverse = cvector_size(jmps) + 1; - for (size_t j = i; jmpstraverse && likely(j < cvector_size(instrs)); j++) - switch (instrs[j].op) { - case OP_MOVE: cost += 3; break; - case OP_ADD: cost += 1; break; - case OP_OUTPUT: cost += 40; break; - case OP_INPUT: cost += 35; break; - case OP_JUMP_RIGHT: cost += 10; break; - case OP_JUMP_LEFT: - cost += 8; - jmpstraverse--; - break; - case OP_CLEAR: cost += 2; break; - case OP_ADD_TO: cost += 10; break; - case OP_MOVE_UNTIL: cost += 10; break; - } - code_align(icacheline >> ((cvector_size(jmps) + 1) * cost / 50)); - code_append(snip); + case OP_JUMP_RIGHT: + codesize += procinstr_jright(&file[codesize], jmps, align, i); cvector_push_back(jmps, codesize); + printf("+1\n"); // debug break; - } - case OP_JUMP_LEFT: { - code_append("\x80\x3b\x00"); // cmp BYTE PTR [rbx], 0 - if unlikely (cvector_size(jmps) == 0) - die("mismatched ]"); - - size_t jmp = jmps[cvector_size(jmps) - 1]; + case OP_JUMP_LEFT: + codesize += procinstr_jleft(file, codesize, jmps); cvector_pop_back(jmps); - - { - int rel = jmp - (codesize + 2); - - if likely (rel >= CHAR_MIN && rel <= CHAR_MAX) { - code_append("\x75\x00"); // jnz rel8 - fn[codesize - 1] = rel; - } else { - code_append("\x0f\x85\x00\x00\x00\x00"); // jnz rel32 - *(int *)(fn + codesize - 4) = rel - 4; - } - } - - { - int rel = codesize - jmp; - - if likely (rel >= CHAR_MIN && rel <= CHAR_MAX) { - fn[jmp - 6] = 0x74; // jz rel8 - fn[jmp - 5] = rel + 4; - } else { - *(int *)(fn + jmp - 4) = rel; // rel32 - } - } - + printf("-1\n"); // debug break; - } + case OP_CLEAR: - code_append("\xc6\x03\x00"); // mov BYTE PTR [rbx], 0 + codesize += code_append(&file[codesize], "\xc6\x03\x00"); // mov BYTE PTR [rbx], 0 break; + case OP_ADD_TO: { if likely (instr.arg >= CHAR_MIN && instr.arg <= CHAR_MAX) { const char snip[] = "\x8a\x03" // mov al, BYTE PTR [rbx] "\x00\x43\x00" // add BYTE PTR [rbx + disp8], al "\xc6\x03\x00"; // mov BYTE PTR [rbx], 0 - code_append(snip); - fn[codesize - 4] = instr.arg; + codesize += code_append(&file[codesize], snip); + file[codesize - 4] = instr.arg; } else { const char snip[] = "\x8a\x03" // mov al, BYTE PTR [rbx] "\x00\x83\x00\x00\x00\x00" // add BYTE PTR [rbx + disp32], al "\xc6\x03\x00"; // mov BYTE PTR [rbx], 0 - code_append(snip); - *(int *)(fn + codesize - 7) = instr.arg; + codesize += code_append(&file[codesize], snip); + *(int *)(file + codesize - 7) = instr.arg; } break; } @@ -654,14 +648,14 @@ main(int argc, char *argv[]) "\x48\xff\xc3" // inc rbx "\xeb\xf6"; // jmp -10 - code_append(snip); + codesize += code_append(&file[codesize], snip); } else if (instr.arg == -1) { const char snip[] = "\x80\x3b\x00" // cmp BYTE PTR [rbx], 0 "\x74\x05" // je +5 "\x48\xff\xcb" // dec rbx "\xeb\xf6"; // jmp -10 - code_append(snip); + codesize += code_append(&file[codesize], snip); } else if (instr.arg > 1) { unsigned int n = instr.arg; @@ -671,16 +665,16 @@ main(int argc, char *argv[]) "\x48\x83\xc3\x00" // add rbx, imm8 "\xeb\xf5"; // jmp -11 - code_append(snip); - fn[codesize - 3] = n; + codesize += code_append(&file[codesize], snip); + file[codesize - 3] = n; } else { const char snip[] = "\x80\x3b\x00" // cmp BYTE PTR [rbx], 0 "\x74\x09" // je +9 "\x48\x81\xc3\x00\x00\x00\x00" // add rbx, imm32 "\xeb\xf2"; // jmp -14 - code_append(snip); - *(unsigned int *)(fn + codesize - 6) = n; + codesize += code_append(&file[codesize], snip); + *(unsigned int *)(file + codesize - 6) = n; } } else if (instr.arg < -1) { unsigned int n = -instr.arg; @@ -691,16 +685,16 @@ main(int argc, char *argv[]) "\x48\x83\xeb\x00" // sub rbx, imm8 "\xeb\xf5"; // jmp -11 - code_append(snip); - fn[codesize - 3] = n; + codesize += code_append(&file[codesize], snip); + file[codesize - 3] = n; } else { const char snip[] = "\x80\x3b\x00" // cmp BYTE PTR [rbx], 0 "\x74\x09" // je +9 "\x48\x81\xeb\x00\x00\x00\x00" // sub rbx, imm32 "\xeb\xf2"; // jmp -14 - code_append(snip); - *(unsigned int *)(fn + codesize - 6) = n; + codesize += code_append(&file[codesize], snip); + *(unsigned int *)(file + codesize - 6) = n; } } break; @@ -708,12 +702,41 @@ main(int argc, char *argv[]) } if unlikely (cvector_size(jmps) != 0) - die("unterminated ["); + die("unterminated loop"); + + cvector_free(jmps); +} + +void output_executable(void) +{ + // create file to write data to + size_t codemapsize = max(cvector_size(instrs) * 64, 1); + unsigned char *output_file = mmap(NULL, codemapsize, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if unlikely (!output_file) { + die("could not allocate executable memory:"); + } + + size_t codesize = 0; + + size_t icacheline = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + + codesize += code_append(&output_file[codesize], put_char_asm); + codesize += code_align(&output_file[codesize], icacheline); + + codesize += code_append(&output_file[codesize], get_char_asm); + codesize += code_align(&output_file[codesize], icacheline); + + codesize += code_append(&output_file[codesize], code_start); + codesize += code_align(&output_file[codesize], icacheline); + + process_instructions(output_file, codesize, icacheline); cvector_free(instrs); - cvector_free(jmps); - code_append("\x4d\x85\xe4" // test r12,r12 + /////// + + codesize += code_append(&output_file[codesize], + "\x4d\x85\xe4" // test r12,r12 "\x74\x11" // je 16 "\x48\x31\xc0" // xor rax,rax "\x48\xff\xc0" // inc rax @@ -725,21 +748,21 @@ main(int argc, char *argv[]) "\xc3"); // ret #ifdef PRINT_BUT_DONT_EXEC - fwrite(fn, codesize, 1, stdout); + fwrite(output_file, codesize, 1, stdout); return 0; #endif - mprotect(fn, codemapsize, PROT_EXEC); + mprotect(output_file, codemapsize, PROT_EXEC); size_t tapesize = 30000; size_t pagesize = getpagesize(); - realtapesize = (tapesize + pagesize - 1) & ~(pagesize - 1); + realtapesize = (tapesize + pagesize - 1) & ~(pagesize - 1); tapestart = mmap(NULL, realtapesize * 2 + pagesize * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if unlikely (!tapestart) die("could not allocate tape memory:"); - tape = tapestart + pagesize + realtapesize / 2; + tape = tapestart + pagesize + realtapesize / 2; tapeguardpages[1] = tapestart + realtapesize + pagesize; if unlikely (mprotect(tapeguardpages[1], pagesize, PROT_NONE) < 0) @@ -756,8 +779,49 @@ main(int argc, char *argv[]) if unlikely (sigaction(SIGSEGV, &sa, NULL) < 0) die("could not prepare tape memory guard page:"); - ((void (*)(void *))(uintptr_t)(fn + codestartoffset))(tape); + ((void (*)(void *))(uintptr_t)(output_file + code_start_asm_offset))(tape); // leak tape on purpose +} + +int +main(int argc, char *argv[]) +{ + // If code input isn't a filename, display help info + if unlikely (argc != 2) { + usage(argv[0]); + return 1; + } + + const char* filename = argv[1]; + + // Get file descriptor for file + int file_desc = open(filename, O_RDONLY); + if unlikely (file_desc < 0) { + die("cannot access '%s':", filename); + } + + // Get file info from descriptor + struct stat file_stats; + fstat(file_desc, &file_stats); + + // Get text from file + char *text = emalloc(file_stats.st_size); + read(file_desc, text, file_stats.st_size); + close(file_desc); + + // check if we should read all of stdin + // char *stdin_text = NULL; + // bool stdin_complete = should_use_buffer(&stdin_text); + + // set instruction size to be the size of the file + cvector_reserve(instrs, (size_t)file_stats.st_size); + + // convert text into instruction data + interpret_text(text); + + free(text); + + output_executable(); return 0; } diff --git a/bq.h b/bq.h index af86913..f37109d 100644 --- a/bq.h +++ b/bq.h @@ -37,6 +37,7 @@ static const char *const nops[] = { "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", // nop WORD PTR [eax+eax*1+0x0] "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00" // nop WORD PTR cs:[eax+eax*1+0x0] }; +#define MAX_NOP_SIZE 10 // should not have OP_NONE as Opcode typedef struct { From 84c706d7228dacda7c61293bc59e6f9a2ba90949 Mon Sep 17 00:00:00 2001 From: createsource Date: Wed, 20 May 2026 19:29:46 -0400 Subject: [PATCH 3/4] almost works --- .vscode/launch.json | 32 +++++++++++++++++++++++ Makefile | 4 +-- bq.c | 62 +++++---------------------------------------- bq.h | 2 +- 4 files changed, 41 insertions(+), 59 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..c357787 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,32 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Launch", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/bq", + "args": ["/home/createsource/git/brainquack/tests/392quine.bf"], + "stopAtEntry": false, + "cwd": "${fileDirname}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + }, + { + "description": "Set Disassembly Flavor to Intel", + "text": "-gdb-set disassembly-flavor intel", + "ignoreFailures": true + } + ] + } + ] +} \ No newline at end of file diff --git a/Makefile b/Makefile index cb5adb8..1dc44b3 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ # limitations under the License. CPPFLAGS = -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L -D_GNU_SOURCE -CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} -# CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} +# CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} +CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} CC = clang-22 bq: bq.c diff --git a/bq.c b/bq.c index e3fa6dc..cf1fa79 100644 --- a/bq.c +++ b/bq.c @@ -103,27 +103,6 @@ isop(char c) return strchr("><+-.,[]", c); } -// static bool cold -// isstdincomplete(struct stat *st) -// { -// if (isatty(STDIN_FILENO)) -// return false; - -// if (fstat(STDIN_FILENO, st) < 0) -// return false; - -// if (S_ISREG(st->st_mode)) -// return true; - -// if (S_ISFIFO(st->st_mode)) { -// struct pollfd pfd = { .fd = STDIN_FILENO, .events = POLLIN }; -// poll(&pfd, 1, 0); -// return pfd.revents & POLLHUP; -// } - -// return false; -// } - static void handler(int signum, siginfo_t *si, void *ucontext) { @@ -163,33 +142,6 @@ handler(int signum, siginfo_t *si, void *ucontext) die("could not protect tape memory underflow guard page:"); } -// static bool cold -// should_use_buffer(char **stdin_text) -// { -// struct stat stdin_st; -// bool stdin_complete = isstdincomplete(&stdin_st); -// if (!stdin_complete) return false; - -// if (S_ISREG(stdin_st.st_mode)) { -// *stdin_text = emalloc(stdin_st.st_size); -// read(STDIN_FILENO, *stdin_text, stdin_st.st_size); - -// return true; -// } -// if (S_ISFIFO(stdin_st.st_mode)) { -// int size; -// ioctl(STDIN_FILENO, FIONREAD, &size); - -// *stdin_text = emalloc(size + 1); -// read(STDIN_FILENO, *stdin_text, size); -// (*stdin_text)[size] = '\0'; - -// return true; -// } - -// return false; -// } - cvector(Instr) instrs = NULL; // funny type stuff we just talked about @@ -233,6 +185,7 @@ interp_move(char **ptr) } (*ptr)++; } + (*ptr)--; if unlikely (move_cnt == 0) { return NO_INSTR; @@ -261,6 +214,7 @@ interp_value(char **ptr) } (*ptr)++; } + (*ptr)--; if unlikely (offset_cnt == 0) { return NO_INSTR; @@ -428,13 +382,15 @@ interpret_text(char *text) "\x48\x89\xfb" /* mov rbx, rdi */ static inline size_t -code_append(unsigned char *restrict location, const char *restrict code) +_code_append(unsigned char *restrict location, const char *restrict code, size_t code_len) { - size_t code_size = strlen(code) / sizeof(*code) - 1; + size_t code_size = code_len / sizeof(*code) - 1; memcpy(location, code, code_size); return code_size; } +#define code_append(location, code) _code_append(location, code, sizeof(code)) + static inline size_t code_align(unsigned char *restrict location, size_t align) { @@ -610,13 +566,11 @@ process_instructions(unsigned char *restrict file, size_t codesize, size_t align case OP_JUMP_RIGHT: codesize += procinstr_jright(&file[codesize], jmps, align, i); cvector_push_back(jmps, codesize); - printf("+1\n"); // debug break; case OP_JUMP_LEFT: codesize += procinstr_jleft(file, codesize, jmps); cvector_pop_back(jmps); - printf("-1\n"); // debug break; case OP_CLEAR: @@ -809,10 +763,6 @@ main(int argc, char *argv[]) read(file_desc, text, file_stats.st_size); close(file_desc); - // check if we should read all of stdin - // char *stdin_text = NULL; - // bool stdin_complete = should_use_buffer(&stdin_text); - // set instruction size to be the size of the file cvector_reserve(instrs, (size_t)file_stats.st_size); diff --git a/bq.h b/bq.h index f37109d..d1e468b 100644 --- a/bq.h +++ b/bq.h @@ -48,4 +48,4 @@ typedef struct { // allows OP_NONE typedef Instr MaybeInstr; -#define NO_INSTR ((MaybeInstr){ .op = 0 }) \ No newline at end of file +#define NO_INSTR ((MaybeInstr){ .op = OP_NONE, .arg = 0 }) \ No newline at end of file From c2923a3c18ab82e51f6ad343e2888a73a416902f Mon Sep 17 00:00:00 2001 From: createsource Date: Wed, 20 May 2026 20:01:54 -0400 Subject: [PATCH 4/4] runs? --- Makefile | 4 ++-- bq.c | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 1dc44b3..cb5adb8 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,8 @@ # limitations under the License. CPPFLAGS = -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200809L -D_GNU_SOURCE -# CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} -CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} +CFLAGS = -std=c99 -pedantic -Wall -Wextra -Og -ggdb3 ${CPPFLAGS} +# CFLAGS = -std=c99 -pedantic -Wall -Wextra -O3 ${CPPFLAGS} CC = clang-22 bq: bq.c diff --git a/bq.c b/bq.c index cf1fa79..184c6bb 100644 --- a/bq.c +++ b/bq.c @@ -358,8 +358,7 @@ interpret_text(char *text) "\x45\x31\xe4" /* xor r12d,r12d */ \ "\xc3" /* ret */ -// compiles to a number even tho this would seem big!! just use optimization :) -#define get_char_asm_offset sizeof(put_char_asm) +size_t get_char_asm_offset; #define get_char_asm \ "\x49\x81\xfd\x00\x10\x00\x00" /* cmp r13,0x1000 */ \ "\x75\x16" /* jne have_data */ \ @@ -374,7 +373,7 @@ interpret_text(char *text) "\x49\xff\xc5" /* inc r13 */ \ "\xc3" /* ret */ -#define code_start_asm_offset get_char_asm_offset + sizeof(get_char_asm) +size_t code_start_asm_offset; #define code_start \ "\x48\x81\xec\x00\x20\x00\x00" /* sub rsp,0x2000 */ \ "\x4d\x31\xe4" /* xor r12,r12 */ \ @@ -677,9 +676,11 @@ void output_executable(void) codesize += code_append(&output_file[codesize], put_char_asm); codesize += code_align(&output_file[codesize], icacheline); + get_char_asm_offset = codesize; codesize += code_append(&output_file[codesize], get_char_asm); codesize += code_align(&output_file[codesize], icacheline); + code_start_asm_offset = codesize; codesize += code_append(&output_file[codesize], code_start); codesize += code_align(&output_file[codesize], icacheline); @@ -732,8 +733,10 @@ void output_executable(void) sa.sa_flags = SA_SIGINFO; if unlikely (sigaction(SIGSEGV, &sa, NULL) < 0) die("could not prepare tape memory guard page:"); + + printf("%X", output_file[code_start_asm_offset]); - ((void (*)(void *))(uintptr_t)(output_file + code_start_asm_offset))(tape); + ((void (*)(void *))(uintptr_t)(output_file + code_start_asm_offset + 2))(tape); // leak tape on purpose }