[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[tyndur-devel] [RFC] libc: sscanf



Im Folgenden also wie gewuenscht, kein Patch. Bisher nur unter Linux getestet,
scheint in den paar Testfaellen in main() was vernuenftiges zu tun. Ein paar
Sachen fehlen noch (float ueberlasse ich uebrigens gern einem Freiwilligen),
aber die ueblichsten Sachen (Strings und Integer) sollten gehen.

---

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>

typedef int (*jscanf_getc)(void* state);
typedef void (*jscanf_ungetc)(void* state, char c);

/**
 * @param base Basis der einzulesenden Zahl (8, 10 oder 16)
 */
static int jscanf_read_number(char* buf, int size, jscanf_getc jgetc,
    jscanf_ungetc jungetc, void* state, int base)
{
    int valid;
    int i;
    int c;
    int first_digit = 0;

    i = 0;
    while (i < size) {
        c = jgetc(state);

        if (c == EOF) {
            break;
        }

        valid = 0;
        switch (c) {
            case '+':
            case '-':
                if (i == 0) {
                    valid = 1;
                    first_digit++;
                }
                break;
            case '0':
                valid = 1;
                break;
            case '1' ... '7':
                if (base == 0) {
                    base = buf[first_digit] == '0' ? 8 : 10;
                }
                valid = 1;
                break;
            case '8' ... '9':
                if (base == 0) {
                    base = buf[first_digit] == '0' ? 8 : 10;
                }
                valid = (base != 8);
                break;
            case 'A' ... 'F':
            case 'a' ... 'f':
                valid = (base == 16);
                break;
            case 'x':
                if (base == 0) {
                    base = 16;
                }
                if ((base == 16) && (i == first_digit + 1) &&
                    (buf[first_digit] == '0'))
                {
                    valid = 1;
                }
                break;
        }

        if (!valid) {
            jungetc(state, c);
            break;
        }

        buf[i] = c;
        i++;
    }

    buf[i] = '\0';

    return i;
}

static int assign_number(void* ptr, uint64_t value, int size)
{
    if (size == 1) {
        uint8_t* tptr = ptr;
        *tptr = (uint8_t) value;
    } else if (size == 2) {
        uint16_t* tptr = ptr;
        *tptr = (uint16_t) value;
    } else if (size == 4) {
        uint32_t* tptr = ptr;
        *tptr = (uint32_t) value;
    } else if (size == 8) {
        uint64_t* tptr = ptr;
        *tptr = (uint64_t) value;
    } else {
        abort();
    }
}

static int jscanf(const char* fmt, va_list ap,
    jscanf_getc jgetc, jscanf_ungetc jungetc, void* state)
{
    int ret = 0;
    int assign;
    int len;
    int size;
    int c;
    char* endptr;
    uint64_t value;

    // 64 Bit oktal = 22 Zeichen, Vorzeichen und \0
    char buf[24];

    while (*fmt) {
        switch (*fmt) {
            case ' ':
            case '\t':
            case '\n':
            case '\f':
            case '\v':
                do {
                    c = jgetc(state);
                } while (isspace(c));
                break;

            case '%':
                fmt++;

                // Ein * bedeutet, dass der Wert nur eingelesen, aber keiner
                // Variablen zugewiesen wird
                if (*fmt == '*') {
                    assign = 0;
                    fmt++;
                } else {
                    assign = 1;
                }

                // Optional kann jetzt die Feldlaenge kommen
                if (isdigit(*fmt)) {
                    len = strtol(fmt, (char**) &fmt, 10);
                    if (len == 0) {
                        goto matching_error;
                    }
                } else {
                    len = 0;
                }

                // Und die Laenge der Variablen kann auch noch angegeben sein
                switch (*fmt) {
                    case 'h':
                        if (*++fmt == 'h') {
                            fmt++;
                            size = sizeof(char);
                        } else {
                            size = sizeof(short);
                        }
                        break;
                    case 'l':
                        if (*++fmt == 'l') {
                            fmt++;
                            size = sizeof(long long);
                        } else {
                            size = sizeof(long);
                        }
                        break;
                    case 'j':
                        size = sizeof(intmax_t);
                        break;
                    case 't':
                        size = sizeof(ptrdiff_t);
                        break;
                    case 'z':
                        size = sizeof(size_t);
                        break;
                    default:
                        size = sizeof(int);
                        break;
                }

                // Whitespace muss uebersprungen werden (ausser %[ %c %n)
                if ((*fmt != '[') && (*fmt != 'c') && (*fmt != 'n')) {
                    while (isspace(c = jgetc(state)));
                    if (c != EOF) {
                        jungetc(state, c);
                    }
                }

                // Eingabe parsen
                switch (*fmt) {
                    int base;

                    case 'i':
                        base = 0;
                        goto convert_number;
                    case 'o':
                        base = 8;
                        goto convert_number;
                    case 'x':
                    case 'X':
                        base = 16;
                        goto convert_number;
                    case 'p':
                        base = 16;
                        size = sizeof(void*);
                        len = 0;
                        goto convert_number;
                    case 'd':
                    case 'u':
                        base = 10;
                    convert_number:
                        if (len == 0 || len >= sizeof(buf)) {
                            len = sizeof(buf) - 1;
                        }
                        len = jscanf_read_number(buf, len, jgetc, jungetc,
                            state, base);

                        value = strtoull(buf, &endptr, base);
                        if (endptr != buf + len) {
                            goto matching_error;
                        }

                        if (assign) {
                            void* ptr = va_arg(ap, void*);
                            assign_number(ptr, value, size);
                            ret++;
                        }
                        break;

                    case 'n':
                    {
                        void* ptr = va_arg(ap, void*);
                        assign_number(ptr, ret, size);
                        break;
                    }

                    case 'c':
                    {
                        int i;
                        char* ptr = va_arg(ap, char*);

                        // TODO Multibyte-Zeichen
                        if (len == 0) {
                            len = 1;
                        }

                        for (i = 0; i < len; i++) {
                            c = jgetc(state);
                            if (c == EOF) {
                                goto matching_error;
                            }
                            ptr[i] = c;
                        }

                        break;
                    }

                    case 's':
                    {
                        char* ptr = va_arg(ap, char*);
                        if (len == 0) {
                            len = -1;
                        }
                        while ((len == -1) || len--) {
                            c = jgetc(state);
                            if (isspace(c)) {
                                jungetc(state, c);
                                break;
                            } else if (c == EOF) {
                                break;
                            }
                            *ptr++ = c;
                        }
                        *ptr = '\0';
                        break;
                    }

                    case '%':
                        goto parse_percent;
                    default:
                        abort();
                }
                break;

            parse_percent:
            default:
                c = jgetc(state);
                if (c == EOF) {
                    goto input_error;
                }
                if (c != *fmt) {
                    goto matching_error;
                }
                break;
        }

        fmt++;
    }

matching_error:
    return ret;

input_error:
    if (ret == 0) {
        return EOF;
    }
    return ret;
}

struct sscanf_state {
    const char* input;
    int pos;
};

int sscanf_getc(void* state)
{
    struct sscanf_state* s = state;
    int ret = EOF;

    if (s->input[s->pos]) {
        ret = s->input[s->pos];
        s->pos++;
    }

    return ret;
}

void sscanf_ungetc(void* state, char c)
{
    struct sscanf_state* s = state;

    if (s->pos > 0) {
        s->pos--;
        if (s->input[s->pos] != c) {
            abort();
        }
    } else {
        abort();
    }
}


int vsscanf(const char* input, const char* fmt, va_list ap)
{
    struct sscanf_state state = {
        .input = input,
        .pos = 0,
    };

    return jscanf(fmt, ap, sscanf_getc, sscanf_ungetc, &state);
}

int sscanf(const char* input, const char* fmt, ...)
{
    va_list ap;
    int ret;

    va_start(ap, fmt);
    ret = vsscanf(input, fmt, ap);
    va_end(ap);

    return ret;
}

int main(void)
{
    int n;
    int ret;
    int cnt;
    char buf[10] = { 0 };
    char bufx[] = "Vollgeschriebener Puffer";

    ret = sscanf("42", "%d", &n);
    printf("sscanf(%%d -- '42') = %d; read %d\n", ret, n);

    ret = sscanf("42", "%1d", &n);
    printf("sscanf(%%1d -- '42') = %d; read %d\n", ret, n);

    ret = sscanf("42", "%2d", &n);
    printf("sscanf(%%2d -- '42') = %d; read %d\n", ret, n);

    ret = sscanf("42abc", "%d%n%2c", &n, &cnt, buf);
    printf("sscanf(%%d -- '42abc') = %d; read %d\n", ret, n);
    printf("    cnt = %d, buf = '%s'\n", cnt, buf);

    ret = sscanf("-42", "%d", &n);
    printf("sscanf(%%d -- '-42') = %d; read %d\n", ret, n);

    ret = sscanf("2a", "%x", &n);
    printf("sscanf(%%x -- '2a') = %d; read %d\n", ret, n);

    ret = sscanf("0x2a", "%x", &n);
    printf("sscanf(%%x -- '0x2a') = %d; read %d\n", ret, n);

    ret = sscanf("0xz", "%x", &n);
    printf("sscanf(%%x -- '0xz') = %d; read %d\n", ret, n);

    puts("");

    ret = sscanf("020", "%i", &n);
    printf("sscanf(%%i -- '020') = %d; read %d\n", ret, n);

    ret = sscanf("0x20", "%i", &n);
    printf("sscanf(%%i -- '0x20') = %d; read %d\n", ret, n);

    ret = sscanf("20", "%i", &n);
    printf("sscanf(%%i -- '20') = %d; read %d\n", ret, n);

    puts("");

    ret = sscanf("42abc", "%*1d%d%n%1s", &n, &cnt, bufx);
    printf("sscanf(%%d -- '42abc') = %d; read %d\n", ret, n);
    printf("    cnt = %d, buf = '%s'\n", cnt, bufx);

    ret = sscanf("42abc", "%d%n%s", &n, &cnt, bufx);
    printf("sscanf(%%d -- '42abc') = %d; read %d\n", ret, n);
    printf("    cnt = %d, buf = '%s'\n", cnt, bufx);

    ret = sscanf("42abc", "%d%n%42s", &n, &cnt, bufx);
    printf("sscanf(%%d -- '42abc') = %d; read %d\n", ret, n);
    printf("    cnt = %d, buf = '%s'\n", cnt, bufx);

    return 0;
}