Thursday, September 20, 2007

Brainfuck Optimizer

So the improvement to make was turning consecutive p++;p++;p++; type things into p+=3;
The first step is separating the code generating parts from the bit which chooses which code to generate, like so:


void bf_emit_gt(FILE * output_file) { fputs("++p;", output_file); }
void bf_emit_lt(FILE * output_file) { fputs("--p;", output_file); }
void bf_emit_add(FILE * output_file) { fputs("++*p;", output_file); }
void bf_emit_sub(FILE * output_file) { fputs("--*p;", output_file); }
void bf_emit_dot(FILE * output_file) { fputs("putchar(*p);", output_file); }
void bf_emit_comma(FILE * output_file) { fputs("*p = getchar();", output_file); }
void bf_emit_left(FILE * output_file) { fputs("while (*p) {", output_file); }
void bf_emit_right(FILE * output_file) { fputs("}", output_file); }


and then updating the code switch/case stuff to use them:


while((ch = fgetc(input_file)) != EOF) {
switch(ch) {
case '>': bf_emit_gt(output_file); break;
case '<': bf_emit_lt(output_file); break;
case '+': bf_emit_add(output_file); break;
case '-': bf_emit_sub(output_file); break;
case '.': bf_emit_dot(output_file); break;
case ',': bf_emit_comma(output_file); break;
case '[': bf_emit_left(output_file); break;
case ']': bf_emit_right(output_file); break;
}
}


At this moment, the program should do the exact same thing as before and it would be a good plan to test it at this point.
Next up modify the functions so they can print out p+=4; and such.


void bf_emit_gt(FILE * output_file, int run_length) { fprintf(output_file, "p += %d;", run_length); }
void bf_emit_lt(FILE * output_file, int run_length) { fprintf(output_file, "p -= %d;", run_length); }
void bf_emit_add(FILE * output_file, int run_length) { fprintf(output_file, "*p+=%d;", run_length); }
void bf_emit_sub(FILE * output_file, int run_length) { fprintf(output_file, "*p-=%d;", run_length); }
void bf_emit_dot(FILE * output_file) { fputs("putchar(*p);", output_file); }
void bf_emit_comma(FILE * output_file) { fputs("*p = getchar();", output_file); }
void bf_emit_left(FILE * output_file) { fputs("while (*p) {", output_file); }
void bf_emit_right(FILE * output_file) { fputs("}", output_file); }


Now main can use them, counting up run length until the next character isn't of the same kind.


int main(int argc, char ** argv) {
int ch;
int run_length = 0;
int last_ch = 0;

if(!process_args(argc, argv)) {
display_usage(argv[0]); // ammendment, argv[0] is the program name
// we shall display this in usage
return EXIT_FAILURE;
}

filter_string = ".,+-<>[]";
fputs("#include <stdio.h>\n", output_file);
fputs("#include <stdlib.h>\n", output_file);
fprintf(output_file, "static int heap[%d];\n", heapsize);
fprintf(output_file, "int main(void) {int * p = heap+%d;", heapsize/2);
while((ch = fgetc(input_file)) != EOF) {
switch(ch) {
case '>':
if('>' == peekchar(input_file)) ++run_length;
else {
bf_emit_gt(output_file, ++run_length);
run_length = 0;
}
break;
case '<':
if('<' == peekchar(input_file)) ++run_length;
else {
bf_emit_lt(output_file, ++run_length);
run_length = 0;
}
break;
case '+':
if('+' == peekchar(input_file)) ++run_length;
else {
bf_emit_add(output_file, ++run_length);
run_length = 0;
}
break;
case '-':
if('-' == peekchar(input_file)) ++run_length;
else {
bf_emit_sub(output_file, ++run_length);
run_length = 0;
}
break;
case '.': bf_emit_dot(output_file); break;
case ',': bf_emit_comma(output_file); break;
case '[': bf_emit_left(output_file); break;
case ']': bf_emit_right(output_file); break;
}
}
fputs("return EXIT_SUCCESS;}\n", output_file);
fflush(output_file);
fclose(output_file);

return EXIT_SUCCESS;
}


The bottles.bf examples compiles into 12009 bytes with these optimizations where as it was 13551 bytes before!

No comments: