summaryrefslogtreecommitdiff
path: root/icu4c/source/samples/csdet/csdet.c
blob: 5b20dbfe3e936bd0be05ed2a89cc964a9af10a95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
********************************************************************************
*   © 2016 and later: Unicode, Inc. and others.
*   License & terms of use: http://www.unicode.org/copyright.html
********************************************************************************
********************************************************************************
 *   Copyright (C) 2005-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *******************************************************************************
 */

#include "unicode/utypes.h"
#include "unicode/ucsdet.h"

#include <string.h>
#include <stdio.h>

#define BUFFER_SIZE 8192

int main(int argc, char *argv[])
{
    static char buffer[BUFFER_SIZE];
    int32_t arg;

    if( argc <= 1 ) {
        printf("Usage: %s [filename]...\n", argv[0]);
        return -1;
    }

    for(arg = 1; arg < argc; arg += 1) {
        FILE *file;
        char *filename = argv[arg];
        int32_t inputLength, match, matchCount = 0;
        UCharsetDetector* csd;
        const UCharsetMatch **csm;
        UErrorCode status = U_ZERO_ERROR;

        if (arg > 1) {
            printf("\n");
        }

        file = fopen(filename, "rb");

        if (file == NULL) {
            printf("Cannot open file \"%s\"\n\n", filename);
            continue;
        }

        printf("%s:\n", filename);

        inputLength = (int32_t) fread(buffer, 1, BUFFER_SIZE, file);

        fclose(file);

        csd = ucsdet_open(&status);
        ucsdet_setText(csd, buffer, inputLength, &status);

        csm = ucsdet_detectAll(csd, &matchCount, &status);

        for(match = 0; match < matchCount; match += 1) {
            const char *name = ucsdet_getName(csm[match], &status);
            const char *lang = ucsdet_getLanguage(csm[match], &status);
            int32_t confidence = ucsdet_getConfidence(csm[match], &status);

            if (lang == NULL || strlen(lang) == 0) {
                lang = "**";
            }

            printf("%s (%s) %d\n", name, lang, confidence);
        }

        ucsdet_close(csd);
    }
    
    return 0;
}