diff options
Diffstat (limited to 'icu4c/source/samples/ucnv/convsamp.cpp')
-rw-r--r-- | icu4c/source/samples/ucnv/convsamp.cpp | 1144 |
1 files changed, 0 insertions, 1144 deletions
diff --git a/icu4c/source/samples/ucnv/convsamp.cpp b/icu4c/source/samples/ucnv/convsamp.cpp deleted file mode 100644 index 45a687618..000000000 --- a/icu4c/source/samples/ucnv/convsamp.cpp +++ /dev/null @@ -1,1144 +0,0 @@ -/************************************************************************* -* -* © 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -************************************************************************** -************************************************************************** -* -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -*************************************************************************** -* file name: convsamp.c -* encoding: ASCII (7-bit) -* -* created on: 2000may30 -* created by: Steven R. Loomis -* -* Sample code for the ICU conversion routines. -* -* Note: Nothing special is needed to build this sample. Link with -* the icu UC and icu I18N libraries. -* -* I use 'assert' for error checking, you probably will want -* something more flexible. '***BEGIN SAMPLE***' and -* '***END SAMPLE***' mark pieces suitable for stand alone -* code snippets. -* -* -* Each test can define it's own BUFFERSIZE -* -*/ - -#define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */ - -#include <stdio.h> -#include <ctype.h> /* for isspace, etc. */ -#include <assert.h> -#include <string.h> -#include <stdlib.h> /* malloc */ - -#include "unicode/utypes.h" /* Basic ICU data types */ -#include "unicode/ucnv.h" /* C Converter API */ -#include "unicode/ustring.h" /* some more string fcns*/ -#include "unicode/uchar.h" /* char names */ -#include "unicode/uloc.h" -#include "unicode/unistr.h" - -#include "flagcb.h" - -/* Some utility functions */ -#ifndef UPRV_LENGTHOF -#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -#endif - -static const char16_t kNone[] = { 0x0000 }; - -#define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }} - -/* Print a char16_t if possible, in seven characters. */ -void prettyPrintUChar(char16_t c) -{ - if( (c <= 0x007F) && - (isgraph(c)) ) { - printf(" '%c' ", (char)(0x00FF&c)); - } else if ( c > 0x007F ) { - char buf[1000]; - UErrorCode status = U_ZERO_ERROR; - int32_t o; - - o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status); - if(U_SUCCESS(status) && (o>0) ) { - buf[6] = 0; - printf("%7s", buf); - } else { - printf(" ??????"); - } - } else { - switch((char)(c & 0x007F)) { - case ' ': - printf(" ' ' "); - break; - case '\t': - printf(" \\t "); - break; - case '\n': - printf(" \\n "); - break; - default: - printf(" _ "); - break; - } - } -} - - -void printUChars(const char *name = "?", - const char16_t *uch = kNone, - int32_t len = -1 ) -{ - int32_t i; - - if( (len == -1) && (uch) ) { - len = u_strlen(uch); - } - - printf("%5s: ", name); - for( i = 0; i <len; i++) { - printf("%-6d ", i); - } - printf("\n"); - - printf("%5s: ", "uni"); - for( i = 0; i <len; i++) { - printf("\\u%04X ", (int)uch[i]); - } - printf("\n"); - - printf("%5s:", "ch"); - for( i = 0; i <len; i++) { - prettyPrintUChar(uch[i]); - } - printf("\n"); -} - -void printBytes(const char *name = "?", - const char *uch = "", - int32_t len = -1 ) -{ - int32_t i; - - if( (len == -1) && (uch) ) { - len = static_cast<int32_t>(strlen(uch)); - } - - printf("%5s: ", name); - for( i = 0; i <len; i++) { - printf("%-4d ", i); - } - printf("\n"); - - printf("%5s: ", "uni"); - for( i = 0; i <len; i++) { - printf("\\x%02X ", 0x00FF & (int)uch[i]); - } - printf("\n"); - - printf("%5s:", "ch"); - for( i = 0; i <len; i++) { - if(isgraph(0x00FF & (int)uch[i])) { - printf(" '%c' ", (char)uch[i]); - } else { - printf(" "); - } - } - printf("\n"); -} - -void printUChar(UChar32 ch32) -{ - if(ch32 > 0xFFFF) { - printf("ch: U+%06X\n", ch32); - } - else { - char16_t ch = (char16_t)ch32; - printUChars("C", &ch, 1); - } -} - -/******************************************************************* - Very simple C sample to convert the word 'Moscow' in Russian in Unicode, - followed by an exclamation mark (!) into the KOI8-R Russian code page. - - This example first creates a char16_t String out of the Unicode chars. - - targetSize must be set to the amount of space available in the target - buffer. After fromUChars is called, - len will contain the number of bytes in target[] which were - used in the resulting codepage. In this case, there is a 1:1 mapping - between the input and output characters. The exclamation mark has the - same value in both KOI8-R and Unicode. - - src: 0 1 2 3 4 5 6 - uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021 - ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!' - - targ: 0 1 2 3 4 5 6 - uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21 - ch: '!' - - -Converting FROM unicode - to koi8-r. - You must call ucnv_close to clean up the memory used by the - converter. - - 'len' returns the number of OUTPUT bytes resulting from the - conversion. - */ - -UErrorCode convsample_02() -{ - printf("\n\n==============================================\n" - "Sample 02: C: simple Unicode -> koi8-r conversion\n"); - - - // **************************** START SAMPLE ******************* - // "cat<cat>OK" - char16_t source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432, - 0x0430, 0x0021, 0x0000 }; - char target[100]; - UErrorCode status = U_ZERO_ERROR; - UConverter *conv; - int32_t len; - - // set up the converter - //! [ucnv_open] - conv = ucnv_open("koi8-r", &status); - //! [ucnv_open] - assert(U_SUCCESS(status)); - - // convert to koi8-r - len = ucnv_fromUChars(conv, target, 100, source, -1, &status); - assert(U_SUCCESS(status)); - - // close the converter - ucnv_close(conv); - - // ***************************** END SAMPLE ******************** - - // Print it out - printUChars("src", source); - printf("\n"); - printBytes("targ", target, len); - - return U_ZERO_ERROR; -} - - -UErrorCode convsample_03() -{ - printf("\n\n==============================================\n" - "Sample 03: C: print out all converters\n"); - - int32_t count; - int32_t i; - - // **************************** START SAMPLE ******************* - count = ucnv_countAvailable(); - printf("Available converters: %d\n", count); - - for(i=0;i<count;i++) - { - printf("%s ", ucnv_getAvailableName(i)); - } - - // ***************************** END SAMPLE ******************** - - printf("\n"); - - return U_ZERO_ERROR; -} - - - -#define BUFFERSIZE 17 /* make it interesting :) */ - -/* - Converting from a codepage to Unicode in bulk.. - What is the best way to determine the buffer size? - - The 'buffersize' is in bytes of input. - For a given converter, dividing this by the minimum char size - give you the maximum number of Unicode characters that could be - expected for a given number of input bytes. - see: ucnv_getMinCharSize() - - For example, a single byte codepage like 'Latin-3' has a - minimum char size of 1. (It takes at least 1 byte to represent - each Unicode char.) So the unicode buffer has the same number of - UChars as the input buffer has bytes. - - In a strictly double byte codepage such as cp1362 (Windows - Korean), the minimum char size is 2. So, only half as many Unicode - chars as bytes are needed. - - This work to calculate the buffer size is an optimization. Any - size of input and output buffer can be used, as long as the - program handles the following cases: If the input buffer is empty, - the source pointer will be equal to sourceLimit. If the output - buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned. - */ - -UErrorCode convsample_05() -{ - printf("\n\n==============================================\n" - "Sample 05: C: count the number of letters in a UTF-8 document\n"); - - FILE *f; - int32_t count; - char inBuf[BUFFERSIZE]; - const char *source; - const char *sourceLimit; - char16_t *uBuf; - char16_t *target; - char16_t *targetLimit; - char16_t *p; - int32_t uBufSize = 0; - UConverter *conv; - UErrorCode status = U_ZERO_ERROR; - uint32_t letters=0, total=0; - - f = fopen("data01.txt", "r"); - if(!f) - { - fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n"); - return U_FILE_ACCESS_ERROR; - } - - // **************************** START SAMPLE ******************* - conv = ucnv_open("utf-8", &status); - assert(U_SUCCESS(status)); - - uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv)); - printf("input bytes %d / min chars %d = %d UChars\n", - BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize); - uBuf = (char16_t*)malloc(uBufSize * sizeof(char16_t)); - assert(uBuf!=nullptr); - - // grab another buffer's worth - while((!feof(f)) && - ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) - { - // Convert bytes to unicode - source = inBuf; - sourceLimit = inBuf + count; - - do - { - target = uBuf; - targetLimit = uBuf + uBufSize; - - ucnv_toUnicode(conv, &target, targetLimit, - &source, sourceLimit, nullptr, - feof(f)?true:false, /* pass 'flush' when eof */ - /* is true (when no more data will come) */ - &status); - - if(status == U_BUFFER_OVERFLOW_ERROR) - { - // simply ran out of space - we'll reset the target ptr the next - // time through the loop. - status = U_ZERO_ERROR; - } - else - { - // Check other errors here. - assert(U_SUCCESS(status)); - // Break out of the loop (by force) - } - - // Process the Unicode - // Todo: handle UTF-16/surrogates - - for(p = uBuf; p<target; p++) - { - if(u_isalpha(*p)) - letters++; - total++; - } - } while (source < sourceLimit); // while simply out of space - } - - printf("%d letters out of %d total UChars.\n", letters, total); - - // ***************************** END SAMPLE ******************** - ucnv_close(conv); - - printf("\n"); - - fclose(f); - - return U_ZERO_ERROR; -} -#undef BUFFERSIZE - -#define BUFFERSIZE 1024 -typedef struct -{ - UChar32 codepoint; - uint32_t frequency; -} CharFreqInfo; - -UErrorCode convsample_06() -{ - printf("\n\n==============================================\n" - "Sample 06: C: frequency distribution of letters in a UTF-8 document\n"); - - FILE *f; - int32_t count; - char inBuf[BUFFERSIZE]; - const char *source; - const char *sourceLimit; - int32_t uBufSize = 0; - UConverter *conv; - UErrorCode status = U_ZERO_ERROR; - uint32_t letters=0, total=0; - - CharFreqInfo *info; - UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */ - UChar32 p; - - uint32_t ie = 0; - uint32_t gh = 0; - UChar32 l = 0; - - f = fopen("data06.txt", "r"); - if(!f) - { - fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n"); - return U_FILE_ACCESS_ERROR; - } - - info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount); - if(!info) - { - fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", static_cast<int>(sizeof(CharFreqInfo)*charCount)); - } - - /* reset frequencies */ - for(p=0;p<charCount;p++) - { - info[p].codepoint = p; - info[p].frequency = 0; - } - - // **************************** START SAMPLE ******************* - conv = ucnv_open("utf-8", &status); - assert(U_SUCCESS(status)); - - uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv)); - printf("input bytes %d / min chars %d = %d UChars\n", - BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize); - - // grab another buffer's worth - while((!feof(f)) && - ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) - { - // Convert bytes to unicode - source = inBuf; - sourceLimit = inBuf + count; - - while(source < sourceLimit) - { - p = ucnv_getNextUChar(conv, &source, sourceLimit, &status); - if(U_FAILURE(status)) - { - fprintf(stderr, "%s @ %d\n", u_errorName(status), total); - status = U_ZERO_ERROR; - continue; - } - U_ASSERT(status); - total++; - - if(u_isalpha(p)) - letters++; - - if((u_tolower(l) == 'i') && (u_tolower(p) == 'e')) - ie++; - - if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127)) - gh++; - - if(p>charCount) - { - fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p); - free(info); - fclose(f); - ucnv_close(conv); - return U_UNSUPPORTED_ERROR; - } - info[p].frequency++; - l = p; - } - } - - fclose(f); - ucnv_close(conv); - - printf("%d letters out of %d total UChars.\n", letters, total); - printf("%d ie digraphs, %d gh digraphs.\n", ie, gh); - - // now, we could sort it.. - - // qsort(info, charCount, sizeof(info[0]), charfreq_compare); - - for(p=0;p<charCount;p++) - { - if(info[p].frequency) - { - printf("% 5d U+%06X ", info[p].frequency, p); - if(p <= 0xFFFF) - { - prettyPrintUChar((char16_t)p); - } - printf("\n"); - } - } - free(info); - // ***************************** END SAMPLE ******************** - - printf("\n"); - - return U_ZERO_ERROR; -} -#undef BUFFERSIZE - - -/****************************************************** - You must call ucnv_close to clean up the memory used by the - converter. - - 'len' returns the number of OUTPUT bytes resulting from the - conversion. - */ - -UErrorCode convsample_12() -{ - printf("\n\n==============================================\n" - "Sample 12: C: simple sjis -> unicode conversion\n"); - - - // **************************** START SAMPLE ******************* - - char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 }; - char16_t target[100]; - UErrorCode status = U_ZERO_ERROR; - UConverter *conv; - int32_t len; - - // set up the converter - conv = ucnv_open("shift_jis", &status); - assert(U_SUCCESS(status)); - - // convert to Unicode - // Note: we can use strlen, we know it's an 8 bit null terminated codepage - target[6] = 0xFDCA; - len = ucnv_toUChars(conv, target, 100, source, static_cast<int32_t>(strlen(source)), &status); - U_ASSERT(status); - // close the converter - ucnv_close(conv); - - // ***************************** END SAMPLE ******************** - - // Print it out - printBytes("src", source, static_cast<int32_t>(strlen(source)) ); - printf("\n"); - printUChars("targ", target, len); - - return U_ZERO_ERROR; -} - -/****************************************************************** - C: Convert from codepage to Unicode one at a time. -*/ - -UErrorCode convsample_13() -{ - printf("\n\n==============================================\n" - "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n"); - - - const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e }; - // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e }; - const char *source, *sourceLimit; - UChar32 target; - UErrorCode status = U_ZERO_ERROR; - UConverter *conv = nullptr; - int32_t srcCount=0; - int32_t dstCount=0; - - srcCount = sizeof(sourceChars); - - conv = ucnv_open("Big5", &status); - U_ASSERT(status); - - source = sourceChars; - sourceLimit = sourceChars + sizeof(sourceChars); - - // **************************** START SAMPLE ******************* - - - printBytes("src", source, static_cast<int32_t>(sourceLimit - source)); - - while(source < sourceLimit) - { - puts(""); - target = ucnv_getNextUChar (conv, - &source, - sourceLimit, - &status); - - // printBytes("src",source,sourceLimit-source); - U_ASSERT(status); - printUChar(target); - dstCount++; - } - - - // ************************** END SAMPLE ************************* - - printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount); - ucnv_close(conv); - - return U_ZERO_ERROR; -} - - - - -UBool convsample_20_didSubstitute(const char *source) -{ - char16_t uchars[100]; - char bytes[100]; - UConverter *conv = nullptr; - UErrorCode status = U_ZERO_ERROR; - uint32_t len, len2; - UBool flagVal; - - FromUFLAGContext * context = nullptr; - - printf("\n\n==============================================\n" - "Sample 20: C: Test for substitution using callbacks\n"); - - /* print out the original source */ - printBytes("src", source); - printf("\n"); - - /* First, convert from UTF8 to unicode */ - conv = ucnv_open("utf-8", &status); - U_ASSERT(status); - - len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status); - U_ASSERT(status); - - printUChars("uch", uchars, len); - printf("\n"); - - /* Now, close the converter */ - ucnv_close(conv); - - /* Now, convert to windows-1252 */ - conv = ucnv_open("windows-1252", &status); - U_ASSERT(status); - - /* Converter starts out with the SUBSTITUTE callback set. */ - - /* initialize our callback */ - context = flagCB_fromU_openContext(); - - /* Set our special callback */ - ucnv_setFromUCallBack(conv, - flagCB_fromU, - context, - &(context->subCallback), - &(context->subContext), - &status); - - U_ASSERT(status); - - len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status); - U_ASSERT(status); - - flagVal = context->flag; /* it's about to go away when we close the cnv */ - - ucnv_close(conv); - - /* print out the original source */ - printBytes("bytes", bytes, len2); - - return flagVal; /* true if callback was called */ -} - -UErrorCode convsample_20() -{ - const char *sample1 = "abc\xdf\xbf"; - const char *sample2 = "abc_def"; - - - if(convsample_20_didSubstitute(sample1)) - { - printf("DID substitute.\n******\n"); - } - else - { - printf("Did NOT substitute.\n*****\n"); - } - - if(convsample_20_didSubstitute(sample2)) - { - printf("DID substitute.\n******\n"); - } - else - { - printf("Did NOT substitute.\n*****\n"); - } - - return U_ZERO_ERROR; -} - -// 21 - C, callback, with clone and debug - - - -UBool convsample_21_didSubstitute(const char *source) -{ - char16_t uchars[100]; - char bytes[100]; - UConverter *conv = nullptr, *cloneCnv = nullptr; - UErrorCode status = U_ZERO_ERROR; - uint32_t len, len2; - UBool flagVal = false; - UConverterFromUCallback junkCB; - - FromUFLAGContext *flagCtx = nullptr, - *cloneFlagCtx = nullptr; - - debugCBContext *debugCtx1 = nullptr, - *debugCtx2 = nullptr, - *cloneDebugCtx = nullptr; - - printf("\n\n==============================================\n" - "Sample 21: C: Test for substitution w/ callbacks & clones \n"); - - /* print out the original source */ - printBytes("src", source); - printf("\n"); - - /* First, convert from UTF8 to unicode */ - conv = ucnv_open("utf-8", &status); - U_ASSERT(status); - - len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status); - U_ASSERT(status); - - printUChars("uch", uchars, len); - printf("\n"); - - /* Now, close the converter */ - ucnv_close(conv); - - /* Now, convert to windows-1252 */ - conv = ucnv_open("windows-1252", &status); - U_ASSERT(status); - - /* Converter starts out with the SUBSTITUTE callback set. */ - - /* initialize our callback */ - /* from the 'bottom' innermost, out - * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */ - -#if DEBUG_TMI - printf("flagCB_fromU = %p\n", &flagCB_fromU); - printf("debugCB_fromU = %p\n", &debugCB_fromU); -#endif - - debugCtx1 = debugCB_openContext(); - flagCtx = flagCB_fromU_openContext(); - debugCtx2 = debugCB_openContext(); - - debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */ - debugCtx1->subContext = flagCtx; - - flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */ - flagCtx->subContext = debugCtx2; - - debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE; - debugCtx2->subContext = nullptr; - - /* Set our special callback */ - - ucnv_setFromUCallBack(conv, - debugCB_fromU, - debugCtx1, - &(debugCtx2->subCallback), - &(debugCtx2->subContext), - &status); - - U_ASSERT(status); - -#if DEBUG_TMI - printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n", - conv, debugCtx1, debugCtx1->subCallback, - debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback); -#endif - - cloneCnv = ucnv_safeClone(conv, nullptr, nullptr, &status); - - U_ASSERT(status); - -#if DEBUG_TMI - printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv); -#endif - - ucnv_close(conv); - -#if DEBUG_TMI - printf("%p closed.\n", conv); -#endif - - U_ASSERT(status); - /* Now, we have to extract the context */ - cloneDebugCtx = nullptr; - cloneFlagCtx = nullptr; - - ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx); - if(cloneDebugCtx != nullptr) { - cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext; - } - - printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n", - cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:nullptr ); - - len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status); - U_ASSERT(status); - - if(cloneFlagCtx != nullptr) { - flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */ - } else { - printf("** Warning, couldn't get the subcallback \n"); - } - - ucnv_close(cloneCnv); - - /* print out the original source */ - printBytes("bytes", bytes, len2); - - return flagVal; /* true if callback was called */ -} - -UErrorCode convsample_21() -{ - const char *sample1 = "abc\xdf\xbf"; - const char *sample2 = "abc_def"; - - if(convsample_21_didSubstitute(sample1)) - { - printf("DID substitute.\n******\n"); - } - else - { - printf("Did NOT substitute.\n*****\n"); - } - - if(convsample_21_didSubstitute(sample2)) - { - printf("DID substitute.\n******\n"); - } - else - { - printf("Did NOT substitute.\n*****\n"); - } - - return U_ZERO_ERROR; -} - - -// 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16] - -#define BUFFERSIZE 17 /* make it interesting :) */ - -UErrorCode convsample_40() -{ - printf("\n\n==============================================\n" - "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n"); - - FILE *f; - FILE *out; - int32_t count; - char inBuf[BUFFERSIZE]; - const char *source; - const char *sourceLimit; - char16_t *uBuf; - char16_t *target; - char16_t *targetLimit; - int32_t uBufSize = 0; - UConverter *conv = nullptr; - UErrorCode status = U_ZERO_ERROR; - uint32_t inbytes=0, total=0; - - f = fopen("data02.bin", "rb"); - if(!f) - { - fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n"); - return U_FILE_ACCESS_ERROR; - } - - out = fopen("data40.utf16", "wb"); - if(!out) - { - fprintf(stderr, "Couldn't create file 'data40.utf16'.\n"); - fclose(f); - return U_FILE_ACCESS_ERROR; - } - - // **************************** START SAMPLE ******************* - conv = ucnv_openCCSID(37, UCNV_IBM, &status); - assert(U_SUCCESS(status)); - - uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv)); - printf("input bytes %d / min chars %d = %d UChars\n", - BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize); - uBuf = (char16_t*)malloc(uBufSize * sizeof(char16_t)); - assert(uBuf!=nullptr); - - // grab another buffer's worth - while((!feof(f)) && - ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) ) - { - inbytes += count; - - // Convert bytes to unicode - source = inBuf; - sourceLimit = inBuf + count; - - do - { - target = uBuf; - targetLimit = uBuf + uBufSize; - - ucnv_toUnicode( conv, &target, targetLimit, - &source, sourceLimit, nullptr, - feof(f)?true:false, /* pass 'flush' when eof */ - /* is true (when no more data will come) */ - &status); - - if(status == U_BUFFER_OVERFLOW_ERROR) - { - // simply ran out of space - we'll reset the target ptr the next - // time through the loop. - status = U_ZERO_ERROR; - } - else - { - // Check other errors here. - assert(U_SUCCESS(status)); - // Break out of the loop (by force) - } - - // Process the Unicode - // Todo: handle UTF-16/surrogates - assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == (size_t)(target-uBuf)); - total += static_cast<uint32_t>((target-uBuf)); - } while (source < sourceLimit); // while simply out of space - } - - printf("%d bytes in, %d UChars out.\n", inbytes, total); - - // ***************************** END SAMPLE ******************** - ucnv_close(conv); - - fclose(f); - fclose(out); - printf("\n"); - - return U_ZERO_ERROR; -} -#undef BUFFERSIZE - - - -// 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out] - -#define BUFFERSIZE 24 /* make it interesting :) */ - -UErrorCode convsample_46() -{ - printf("\n\n==============================================\n" - "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n"); - - FILE *f; - FILE *out; - int32_t count; - char16_t inBuf[BUFFERSIZE]; - const char16_t *source; - const char16_t *sourceLimit; - char *buf; - char *target; - char *targetLimit; - - int32_t bufSize = 0; - UConverter *conv = nullptr; - UErrorCode status = U_ZERO_ERROR; - uint32_t inchars=0, total=0; - - f = fopen("data40.utf16", "rb"); - if(!f) - { - fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n"); - return U_FILE_ACCESS_ERROR; - } - - out = fopen("data46.out", "wb"); - if(!out) - { - fprintf(stderr, "Couldn't create file 'data46.out'.\n"); - fclose(f); - return U_FILE_ACCESS_ERROR; - } - - // **************************** START SAMPLE ******************* - conv = ucnv_open( "iso-8859-2", &status); - assert(U_SUCCESS(status)); - - bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv)); - printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n", - BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize); - buf = (char*)malloc(bufSize * sizeof(char)); - assert(buf!=nullptr); - - // grab another buffer's worth - while((!feof(f)) && - ((count=static_cast<int32_t>(fread(inBuf, sizeof(char16_t), BUFFERSIZE , f))) > 0) ) - { - inchars += count; - - // Convert bytes to unicode - source = inBuf; - sourceLimit = inBuf + count; - - do - { - target = buf; - targetLimit = buf + bufSize; - - ucnv_fromUnicode( conv, &target, targetLimit, - &source, sourceLimit, nullptr, - feof(f)?true:false, /* pass 'flush' when eof */ - /* is true (when no more data will come) */ - &status); - - if(status == U_BUFFER_OVERFLOW_ERROR) - { - // simply ran out of space - we'll reset the target ptr the next - // time through the loop. - status = U_ZERO_ERROR; - } - else - { - // Check other errors here. - assert(U_SUCCESS(status)); - // Break out of the loop (by force) - } - - // Process the Unicode - assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) == (size_t)(target-buf)); - total += static_cast<uint32_t>((target-buf)); - } while (source < sourceLimit); // while simply out of space - } - - printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, static_cast<int>(inchars * sizeof(char16_t)), total); - - // ***************************** END SAMPLE ******************** - ucnv_close(conv); - - fclose(f); - fclose(out); - printf("\n"); - - return U_ZERO_ERROR; -} -#undef BUFFERSIZE - -#define BUFFERSIZE 219 - -void convsample_50() { - printf("\n\n==============================================\n" - "Sample 50: C: ucnv_detectUnicodeSignature\n"); - - //! [ucnv_detectUnicodeSignature] - UErrorCode err = U_ZERO_ERROR; - UBool discardSignature = true; /* set to true to throw away the initial U+FEFF */ - char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' }; - int32_t signatureLength = 0; - const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err); - UConverter *conv = nullptr; - char16_t output[100]; - char16_t *target = output, *out; - const char *source = input; - if(encoding!=nullptr && U_SUCCESS(err)){ - // should signature be discarded ? - conv = ucnv_open(encoding, &err); - // do the conversion - ucnv_toUnicode(conv, - &target, output + UPRV_LENGTHOF(output), - &source, input + sizeof(input), - nullptr, true, &err); - out = output; - if (discardSignature){ - ++out; // ignore initial U+FEFF - } - while(out != target) { - printf("%04x ", *out++); - } - puts(""); - } - //! [ucnv_detectUnicodeSignature] - puts(""); -} - - - -/* main */ - -int main() -{ - - printf("Default Converter=%s\n", ucnv_getDefaultName() ); - - convsample_02(); // C , u->koi8r, conv - convsample_03(); // C, iterate - - convsample_05(); // C, utf8->u, getNextUChar - convsample_06(); // C freq counter thingy - - convsample_12(); // C, sjis->u, conv - convsample_13(); // C, big5->u, getNextU - - convsample_20(); // C, callback - convsample_21(); // C, callback debug - - convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16] - - convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out] - - convsample_50(); // C, detect unicode signature - - printf("End of converter samples.\n"); - - fflush(stdout); - fflush(stderr); - - return 0; -} |