summaryrefslogtreecommitdiff
path: root/icu4c/source/samples/ucnv/convsamp.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'icu4c/source/samples/ucnv/convsamp.cpp')
-rw-r--r--icu4c/source/samples/ucnv/convsamp.cpp1144
1 files changed, 0 insertions, 1144 deletions
diff --git a/icu4c/source/samples/ucnv/convsamp.cpp b/icu4c/source/samples/ucnv/convsamp.cpp
deleted file mode 100644
index 45a687618..000000000
--- a/icu4c/source/samples/ucnv/convsamp.cpp
+++ /dev/null
@@ -1,1144 +0,0 @@
-/*************************************************************************
-*
-* © 2016 and later: Unicode, Inc. and others.
-* License & terms of use: http://www.unicode.org/copyright.html
-*
-**************************************************************************
-**************************************************************************
-*
-* Copyright (C) 2000-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-***************************************************************************
-* file name: convsamp.c
-* encoding: ASCII (7-bit)
-*
-* created on: 2000may30
-* created by: Steven R. Loomis
-*
-* Sample code for the ICU conversion routines.
-*
-* Note: Nothing special is needed to build this sample. Link with
-* the icu UC and icu I18N libraries.
-*
-* I use 'assert' for error checking, you probably will want
-* something more flexible. '***BEGIN SAMPLE***' and
-* '***END SAMPLE***' mark pieces suitable for stand alone
-* code snippets.
-*
-*
-* Each test can define it's own BUFFERSIZE
-*
-*/
-
-#define DEBUG_TMI 0 /* define to 1 to enable Too Much Information */
-
-#include <stdio.h>
-#include <ctype.h> /* for isspace, etc. */
-#include <assert.h>
-#include <string.h>
-#include <stdlib.h> /* malloc */
-
-#include "unicode/utypes.h" /* Basic ICU data types */
-#include "unicode/ucnv.h" /* C Converter API */
-#include "unicode/ustring.h" /* some more string fcns*/
-#include "unicode/uchar.h" /* char names */
-#include "unicode/uloc.h"
-#include "unicode/unistr.h"
-
-#include "flagcb.h"
-
-/* Some utility functions */
-#ifndef UPRV_LENGTHOF
-#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-#endif
-
-static const char16_t kNone[] = { 0x0000 };
-
-#define U_ASSERT(x) { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
-
-/* Print a char16_t if possible, in seven characters. */
-void prettyPrintUChar(char16_t c)
-{
- if( (c <= 0x007F) &&
- (isgraph(c)) ) {
- printf(" '%c' ", (char)(0x00FF&c));
- } else if ( c > 0x007F ) {
- char buf[1000];
- UErrorCode status = U_ZERO_ERROR;
- int32_t o;
-
- o = u_charName(c, U_EXTENDED_CHAR_NAME, buf, 1000, &status);
- if(U_SUCCESS(status) && (o>0) ) {
- buf[6] = 0;
- printf("%7s", buf);
- } else {
- printf(" ??????");
- }
- } else {
- switch((char)(c & 0x007F)) {
- case ' ':
- printf(" ' ' ");
- break;
- case '\t':
- printf(" \\t ");
- break;
- case '\n':
- printf(" \\n ");
- break;
- default:
- printf(" _ ");
- break;
- }
- }
-}
-
-
-void printUChars(const char *name = "?",
- const char16_t *uch = kNone,
- int32_t len = -1 )
-{
- int32_t i;
-
- if( (len == -1) && (uch) ) {
- len = u_strlen(uch);
- }
-
- printf("%5s: ", name);
- for( i = 0; i <len; i++) {
- printf("%-6d ", i);
- }
- printf("\n");
-
- printf("%5s: ", "uni");
- for( i = 0; i <len; i++) {
- printf("\\u%04X ", (int)uch[i]);
- }
- printf("\n");
-
- printf("%5s:", "ch");
- for( i = 0; i <len; i++) {
- prettyPrintUChar(uch[i]);
- }
- printf("\n");
-}
-
-void printBytes(const char *name = "?",
- const char *uch = "",
- int32_t len = -1 )
-{
- int32_t i;
-
- if( (len == -1) && (uch) ) {
- len = static_cast<int32_t>(strlen(uch));
- }
-
- printf("%5s: ", name);
- for( i = 0; i <len; i++) {
- printf("%-4d ", i);
- }
- printf("\n");
-
- printf("%5s: ", "uni");
- for( i = 0; i <len; i++) {
- printf("\\x%02X ", 0x00FF & (int)uch[i]);
- }
- printf("\n");
-
- printf("%5s:", "ch");
- for( i = 0; i <len; i++) {
- if(isgraph(0x00FF & (int)uch[i])) {
- printf(" '%c' ", (char)uch[i]);
- } else {
- printf(" ");
- }
- }
- printf("\n");
-}
-
-void printUChar(UChar32 ch32)
-{
- if(ch32 > 0xFFFF) {
- printf("ch: U+%06X\n", ch32);
- }
- else {
- char16_t ch = (char16_t)ch32;
- printUChars("C", &ch, 1);
- }
-}
-
-/*******************************************************************
- Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
- followed by an exclamation mark (!) into the KOI8-R Russian code page.
-
- This example first creates a char16_t String out of the Unicode chars.
-
- targetSize must be set to the amount of space available in the target
- buffer. After fromUChars is called,
- len will contain the number of bytes in target[] which were
- used in the resulting codepage. In this case, there is a 1:1 mapping
- between the input and output characters. The exclamation mark has the
- same value in both KOI8-R and Unicode.
-
- src: 0 1 2 3 4 5 6
- uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
- ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL '!'
-
- targ: 0 1 2 3 4 5 6
- uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
- ch: '!'
-
-
-Converting FROM unicode
- to koi8-r.
- You must call ucnv_close to clean up the memory used by the
- converter.
-
- 'len' returns the number of OUTPUT bytes resulting from the
- conversion.
- */
-
-UErrorCode convsample_02()
-{
- printf("\n\n==============================================\n"
- "Sample 02: C: simple Unicode -> koi8-r conversion\n");
-
-
- // **************************** START SAMPLE *******************
- // "cat<cat>OK"
- char16_t source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
- 0x0430, 0x0021, 0x0000 };
- char target[100];
- UErrorCode status = U_ZERO_ERROR;
- UConverter *conv;
- int32_t len;
-
- // set up the converter
- //! [ucnv_open]
- conv = ucnv_open("koi8-r", &status);
- //! [ucnv_open]
- assert(U_SUCCESS(status));
-
- // convert to koi8-r
- len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
- assert(U_SUCCESS(status));
-
- // close the converter
- ucnv_close(conv);
-
- // ***************************** END SAMPLE ********************
-
- // Print it out
- printUChars("src", source);
- printf("\n");
- printBytes("targ", target, len);
-
- return U_ZERO_ERROR;
-}
-
-
-UErrorCode convsample_03()
-{
- printf("\n\n==============================================\n"
- "Sample 03: C: print out all converters\n");
-
- int32_t count;
- int32_t i;
-
- // **************************** START SAMPLE *******************
- count = ucnv_countAvailable();
- printf("Available converters: %d\n", count);
-
- for(i=0;i<count;i++)
- {
- printf("%s ", ucnv_getAvailableName(i));
- }
-
- // ***************************** END SAMPLE ********************
-
- printf("\n");
-
- return U_ZERO_ERROR;
-}
-
-
-
-#define BUFFERSIZE 17 /* make it interesting :) */
-
-/*
- Converting from a codepage to Unicode in bulk..
- What is the best way to determine the buffer size?
-
- The 'buffersize' is in bytes of input.
- For a given converter, dividing this by the minimum char size
- give you the maximum number of Unicode characters that could be
- expected for a given number of input bytes.
- see: ucnv_getMinCharSize()
-
- For example, a single byte codepage like 'Latin-3' has a
- minimum char size of 1. (It takes at least 1 byte to represent
- each Unicode char.) So the unicode buffer has the same number of
- UChars as the input buffer has bytes.
-
- In a strictly double byte codepage such as cp1362 (Windows
- Korean), the minimum char size is 2. So, only half as many Unicode
- chars as bytes are needed.
-
- This work to calculate the buffer size is an optimization. Any
- size of input and output buffer can be used, as long as the
- program handles the following cases: If the input buffer is empty,
- the source pointer will be equal to sourceLimit. If the output
- buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
- */
-
-UErrorCode convsample_05()
-{
- printf("\n\n==============================================\n"
- "Sample 05: C: count the number of letters in a UTF-8 document\n");
-
- FILE *f;
- int32_t count;
- char inBuf[BUFFERSIZE];
- const char *source;
- const char *sourceLimit;
- char16_t *uBuf;
- char16_t *target;
- char16_t *targetLimit;
- char16_t *p;
- int32_t uBufSize = 0;
- UConverter *conv;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t letters=0, total=0;
-
- f = fopen("data01.txt", "r");
- if(!f)
- {
- fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
- return U_FILE_ACCESS_ERROR;
- }
-
- // **************************** START SAMPLE *******************
- conv = ucnv_open("utf-8", &status);
- assert(U_SUCCESS(status));
-
- uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
- printf("input bytes %d / min chars %d = %d UChars\n",
- BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
- uBuf = (char16_t*)malloc(uBufSize * sizeof(char16_t));
- assert(uBuf!=nullptr);
-
- // grab another buffer's worth
- while((!feof(f)) &&
- ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
- {
- // Convert bytes to unicode
- source = inBuf;
- sourceLimit = inBuf + count;
-
- do
- {
- target = uBuf;
- targetLimit = uBuf + uBufSize;
-
- ucnv_toUnicode(conv, &target, targetLimit,
- &source, sourceLimit, nullptr,
- feof(f)?true:false, /* pass 'flush' when eof */
- /* is true (when no more data will come) */
- &status);
-
- if(status == U_BUFFER_OVERFLOW_ERROR)
- {
- // simply ran out of space - we'll reset the target ptr the next
- // time through the loop.
- status = U_ZERO_ERROR;
- }
- else
- {
- // Check other errors here.
- assert(U_SUCCESS(status));
- // Break out of the loop (by force)
- }
-
- // Process the Unicode
- // Todo: handle UTF-16/surrogates
-
- for(p = uBuf; p<target; p++)
- {
- if(u_isalpha(*p))
- letters++;
- total++;
- }
- } while (source < sourceLimit); // while simply out of space
- }
-
- printf("%d letters out of %d total UChars.\n", letters, total);
-
- // ***************************** END SAMPLE ********************
- ucnv_close(conv);
-
- printf("\n");
-
- fclose(f);
-
- return U_ZERO_ERROR;
-}
-#undef BUFFERSIZE
-
-#define BUFFERSIZE 1024
-typedef struct
-{
- UChar32 codepoint;
- uint32_t frequency;
-} CharFreqInfo;
-
-UErrorCode convsample_06()
-{
- printf("\n\n==============================================\n"
- "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
-
- FILE *f;
- int32_t count;
- char inBuf[BUFFERSIZE];
- const char *source;
- const char *sourceLimit;
- int32_t uBufSize = 0;
- UConverter *conv;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t letters=0, total=0;
-
- CharFreqInfo *info;
- UChar32 charCount = 0x10000; /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
- UChar32 p;
-
- uint32_t ie = 0;
- uint32_t gh = 0;
- UChar32 l = 0;
-
- f = fopen("data06.txt", "r");
- if(!f)
- {
- fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
- return U_FILE_ACCESS_ERROR;
- }
-
- info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
- if(!info)
- {
- fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", static_cast<int>(sizeof(CharFreqInfo)*charCount));
- }
-
- /* reset frequencies */
- for(p=0;p<charCount;p++)
- {
- info[p].codepoint = p;
- info[p].frequency = 0;
- }
-
- // **************************** START SAMPLE *******************
- conv = ucnv_open("utf-8", &status);
- assert(U_SUCCESS(status));
-
- uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
- printf("input bytes %d / min chars %d = %d UChars\n",
- BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
-
- // grab another buffer's worth
- while((!feof(f)) &&
- ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
- {
- // Convert bytes to unicode
- source = inBuf;
- sourceLimit = inBuf + count;
-
- while(source < sourceLimit)
- {
- p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
- if(U_FAILURE(status))
- {
- fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
- status = U_ZERO_ERROR;
- continue;
- }
- U_ASSERT(status);
- total++;
-
- if(u_isalpha(p))
- letters++;
-
- if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
- ie++;
-
- if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
- gh++;
-
- if(p>charCount)
- {
- fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
- free(info);
- fclose(f);
- ucnv_close(conv);
- return U_UNSUPPORTED_ERROR;
- }
- info[p].frequency++;
- l = p;
- }
- }
-
- fclose(f);
- ucnv_close(conv);
-
- printf("%d letters out of %d total UChars.\n", letters, total);
- printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
-
- // now, we could sort it..
-
- // qsort(info, charCount, sizeof(info[0]), charfreq_compare);
-
- for(p=0;p<charCount;p++)
- {
- if(info[p].frequency)
- {
- printf("% 5d U+%06X ", info[p].frequency, p);
- if(p <= 0xFFFF)
- {
- prettyPrintUChar((char16_t)p);
- }
- printf("\n");
- }
- }
- free(info);
- // ***************************** END SAMPLE ********************
-
- printf("\n");
-
- return U_ZERO_ERROR;
-}
-#undef BUFFERSIZE
-
-
-/******************************************************
- You must call ucnv_close to clean up the memory used by the
- converter.
-
- 'len' returns the number of OUTPUT bytes resulting from the
- conversion.
- */
-
-UErrorCode convsample_12()
-{
- printf("\n\n==============================================\n"
- "Sample 12: C: simple sjis -> unicode conversion\n");
-
-
- // **************************** START SAMPLE *******************
-
- char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
- char16_t target[100];
- UErrorCode status = U_ZERO_ERROR;
- UConverter *conv;
- int32_t len;
-
- // set up the converter
- conv = ucnv_open("shift_jis", &status);
- assert(U_SUCCESS(status));
-
- // convert to Unicode
- // Note: we can use strlen, we know it's an 8 bit null terminated codepage
- target[6] = 0xFDCA;
- len = ucnv_toUChars(conv, target, 100, source, static_cast<int32_t>(strlen(source)), &status);
- U_ASSERT(status);
- // close the converter
- ucnv_close(conv);
-
- // ***************************** END SAMPLE ********************
-
- // Print it out
- printBytes("src", source, static_cast<int32_t>(strlen(source)) );
- printf("\n");
- printUChars("targ", target, len);
-
- return U_ZERO_ERROR;
-}
-
-/******************************************************************
- C: Convert from codepage to Unicode one at a time.
-*/
-
-UErrorCode convsample_13()
-{
- printf("\n\n==============================================\n"
- "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
-
-
- const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
- // const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
- const char *source, *sourceLimit;
- UChar32 target;
- UErrorCode status = U_ZERO_ERROR;
- UConverter *conv = nullptr;
- int32_t srcCount=0;
- int32_t dstCount=0;
-
- srcCount = sizeof(sourceChars);
-
- conv = ucnv_open("Big5", &status);
- U_ASSERT(status);
-
- source = sourceChars;
- sourceLimit = sourceChars + sizeof(sourceChars);
-
- // **************************** START SAMPLE *******************
-
-
- printBytes("src", source, static_cast<int32_t>(sourceLimit - source));
-
- while(source < sourceLimit)
- {
- puts("");
- target = ucnv_getNextUChar (conv,
- &source,
- sourceLimit,
- &status);
-
- // printBytes("src",source,sourceLimit-source);
- U_ASSERT(status);
- printUChar(target);
- dstCount++;
- }
-
-
- // ************************** END SAMPLE *************************
-
- printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
- ucnv_close(conv);
-
- return U_ZERO_ERROR;
-}
-
-
-
-
-UBool convsample_20_didSubstitute(const char *source)
-{
- char16_t uchars[100];
- char bytes[100];
- UConverter *conv = nullptr;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t len, len2;
- UBool flagVal;
-
- FromUFLAGContext * context = nullptr;
-
- printf("\n\n==============================================\n"
- "Sample 20: C: Test for substitution using callbacks\n");
-
- /* print out the original source */
- printBytes("src", source);
- printf("\n");
-
- /* First, convert from UTF8 to unicode */
- conv = ucnv_open("utf-8", &status);
- U_ASSERT(status);
-
- len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status);
- U_ASSERT(status);
-
- printUChars("uch", uchars, len);
- printf("\n");
-
- /* Now, close the converter */
- ucnv_close(conv);
-
- /* Now, convert to windows-1252 */
- conv = ucnv_open("windows-1252", &status);
- U_ASSERT(status);
-
- /* Converter starts out with the SUBSTITUTE callback set. */
-
- /* initialize our callback */
- context = flagCB_fromU_openContext();
-
- /* Set our special callback */
- ucnv_setFromUCallBack(conv,
- flagCB_fromU,
- context,
- &(context->subCallback),
- &(context->subContext),
- &status);
-
- U_ASSERT(status);
-
- len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
- U_ASSERT(status);
-
- flagVal = context->flag; /* it's about to go away when we close the cnv */
-
- ucnv_close(conv);
-
- /* print out the original source */
- printBytes("bytes", bytes, len2);
-
- return flagVal; /* true if callback was called */
-}
-
-UErrorCode convsample_20()
-{
- const char *sample1 = "abc\xdf\xbf";
- const char *sample2 = "abc_def";
-
-
- if(convsample_20_didSubstitute(sample1))
- {
- printf("DID substitute.\n******\n");
- }
- else
- {
- printf("Did NOT substitute.\n*****\n");
- }
-
- if(convsample_20_didSubstitute(sample2))
- {
- printf("DID substitute.\n******\n");
- }
- else
- {
- printf("Did NOT substitute.\n*****\n");
- }
-
- return U_ZERO_ERROR;
-}
-
-// 21 - C, callback, with clone and debug
-
-
-
-UBool convsample_21_didSubstitute(const char *source)
-{
- char16_t uchars[100];
- char bytes[100];
- UConverter *conv = nullptr, *cloneCnv = nullptr;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t len, len2;
- UBool flagVal = false;
- UConverterFromUCallback junkCB;
-
- FromUFLAGContext *flagCtx = nullptr,
- *cloneFlagCtx = nullptr;
-
- debugCBContext *debugCtx1 = nullptr,
- *debugCtx2 = nullptr,
- *cloneDebugCtx = nullptr;
-
- printf("\n\n==============================================\n"
- "Sample 21: C: Test for substitution w/ callbacks & clones \n");
-
- /* print out the original source */
- printBytes("src", source);
- printf("\n");
-
- /* First, convert from UTF8 to unicode */
- conv = ucnv_open("utf-8", &status);
- U_ASSERT(status);
-
- len = ucnv_toUChars(conv, uchars, 100, source, static_cast<int32_t>(strlen(source)), &status);
- U_ASSERT(status);
-
- printUChars("uch", uchars, len);
- printf("\n");
-
- /* Now, close the converter */
- ucnv_close(conv);
-
- /* Now, convert to windows-1252 */
- conv = ucnv_open("windows-1252", &status);
- U_ASSERT(status);
-
- /* Converter starts out with the SUBSTITUTE callback set. */
-
- /* initialize our callback */
- /* from the 'bottom' innermost, out
- * CNV -> debugCtx1[debug] -> flagCtx[flag] -> debugCtx2[debug] */
-
-#if DEBUG_TMI
- printf("flagCB_fromU = %p\n", &flagCB_fromU);
- printf("debugCB_fromU = %p\n", &debugCB_fromU);
-#endif
-
- debugCtx1 = debugCB_openContext();
- flagCtx = flagCB_fromU_openContext();
- debugCtx2 = debugCB_openContext();
-
- debugCtx1->subCallback = flagCB_fromU; /* debug1 -> flag */
- debugCtx1->subContext = flagCtx;
-
- flagCtx->subCallback = debugCB_fromU; /* flag -> debug2 */
- flagCtx->subContext = debugCtx2;
-
- debugCtx2->subCallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
- debugCtx2->subContext = nullptr;
-
- /* Set our special callback */
-
- ucnv_setFromUCallBack(conv,
- debugCB_fromU,
- debugCtx1,
- &(debugCtx2->subCallback),
- &(debugCtx2->subContext),
- &status);
-
- U_ASSERT(status);
-
-#if DEBUG_TMI
- printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
- conv, debugCtx1, debugCtx1->subCallback,
- debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
-#endif
-
- cloneCnv = ucnv_safeClone(conv, nullptr, nullptr, &status);
-
- U_ASSERT(status);
-
-#if DEBUG_TMI
- printf("Cloned converter from %p -> %p. Closing %p.\n", conv, cloneCnv, conv);
-#endif
-
- ucnv_close(conv);
-
-#if DEBUG_TMI
- printf("%p closed.\n", conv);
-#endif
-
- U_ASSERT(status);
- /* Now, we have to extract the context */
- cloneDebugCtx = nullptr;
- cloneFlagCtx = nullptr;
-
- ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
- if(cloneDebugCtx != nullptr) {
- cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
- }
-
- printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
- cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:nullptr );
-
- len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
- U_ASSERT(status);
-
- if(cloneFlagCtx != nullptr) {
- flagVal = cloneFlagCtx->flag; /* it's about to go away when we close the cnv */
- } else {
- printf("** Warning, couldn't get the subcallback \n");
- }
-
- ucnv_close(cloneCnv);
-
- /* print out the original source */
- printBytes("bytes", bytes, len2);
-
- return flagVal; /* true if callback was called */
-}
-
-UErrorCode convsample_21()
-{
- const char *sample1 = "abc\xdf\xbf";
- const char *sample2 = "abc_def";
-
- if(convsample_21_didSubstitute(sample1))
- {
- printf("DID substitute.\n******\n");
- }
- else
- {
- printf("Did NOT substitute.\n*****\n");
- }
-
- if(convsample_21_didSubstitute(sample2))
- {
- printf("DID substitute.\n******\n");
- }
- else
- {
- printf("Did NOT substitute.\n*****\n");
- }
-
- return U_ZERO_ERROR;
-}
-
-
-// 40- C, cp37 -> UTF16 [data02.bin -> data40.utf16]
-
-#define BUFFERSIZE 17 /* make it interesting :) */
-
-UErrorCode convsample_40()
-{
- printf("\n\n==============================================\n"
- "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
-
- FILE *f;
- FILE *out;
- int32_t count;
- char inBuf[BUFFERSIZE];
- const char *source;
- const char *sourceLimit;
- char16_t *uBuf;
- char16_t *target;
- char16_t *targetLimit;
- int32_t uBufSize = 0;
- UConverter *conv = nullptr;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t inbytes=0, total=0;
-
- f = fopen("data02.bin", "rb");
- if(!f)
- {
- fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
- return U_FILE_ACCESS_ERROR;
- }
-
- out = fopen("data40.utf16", "wb");
- if(!out)
- {
- fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
- fclose(f);
- return U_FILE_ACCESS_ERROR;
- }
-
- // **************************** START SAMPLE *******************
- conv = ucnv_openCCSID(37, UCNV_IBM, &status);
- assert(U_SUCCESS(status));
-
- uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
- printf("input bytes %d / min chars %d = %d UChars\n",
- BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
- uBuf = (char16_t*)malloc(uBufSize * sizeof(char16_t));
- assert(uBuf!=nullptr);
-
- // grab another buffer's worth
- while((!feof(f)) &&
- ((count=static_cast<int32_t>(fread(inBuf, 1, BUFFERSIZE , f))) > 0) )
- {
- inbytes += count;
-
- // Convert bytes to unicode
- source = inBuf;
- sourceLimit = inBuf + count;
-
- do
- {
- target = uBuf;
- targetLimit = uBuf + uBufSize;
-
- ucnv_toUnicode( conv, &target, targetLimit,
- &source, sourceLimit, nullptr,
- feof(f)?true:false, /* pass 'flush' when eof */
- /* is true (when no more data will come) */
- &status);
-
- if(status == U_BUFFER_OVERFLOW_ERROR)
- {
- // simply ran out of space - we'll reset the target ptr the next
- // time through the loop.
- status = U_ZERO_ERROR;
- }
- else
- {
- // Check other errors here.
- assert(U_SUCCESS(status));
- // Break out of the loop (by force)
- }
-
- // Process the Unicode
- // Todo: handle UTF-16/surrogates
- assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) == (size_t)(target-uBuf));
- total += static_cast<uint32_t>((target-uBuf));
- } while (source < sourceLimit); // while simply out of space
- }
-
- printf("%d bytes in, %d UChars out.\n", inbytes, total);
-
- // ***************************** END SAMPLE ********************
- ucnv_close(conv);
-
- fclose(f);
- fclose(out);
- printf("\n");
-
- return U_ZERO_ERROR;
-}
-#undef BUFFERSIZE
-
-
-
-// 46- C, UTF16 -> latin2 [data40.utf16 -> data46.out]
-
-#define BUFFERSIZE 24 /* make it interesting :) */
-
-UErrorCode convsample_46()
-{
- printf("\n\n==============================================\n"
- "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
-
- FILE *f;
- FILE *out;
- int32_t count;
- char16_t inBuf[BUFFERSIZE];
- const char16_t *source;
- const char16_t *sourceLimit;
- char *buf;
- char *target;
- char *targetLimit;
-
- int32_t bufSize = 0;
- UConverter *conv = nullptr;
- UErrorCode status = U_ZERO_ERROR;
- uint32_t inchars=0, total=0;
-
- f = fopen("data40.utf16", "rb");
- if(!f)
- {
- fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
- return U_FILE_ACCESS_ERROR;
- }
-
- out = fopen("data46.out", "wb");
- if(!out)
- {
- fprintf(stderr, "Couldn't create file 'data46.out'.\n");
- fclose(f);
- return U_FILE_ACCESS_ERROR;
- }
-
- // **************************** START SAMPLE *******************
- conv = ucnv_open( "iso-8859-2", &status);
- assert(U_SUCCESS(status));
-
- bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
- printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
- BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
- buf = (char*)malloc(bufSize * sizeof(char));
- assert(buf!=nullptr);
-
- // grab another buffer's worth
- while((!feof(f)) &&
- ((count=static_cast<int32_t>(fread(inBuf, sizeof(char16_t), BUFFERSIZE , f))) > 0) )
- {
- inchars += count;
-
- // Convert bytes to unicode
- source = inBuf;
- sourceLimit = inBuf + count;
-
- do
- {
- target = buf;
- targetLimit = buf + bufSize;
-
- ucnv_fromUnicode( conv, &target, targetLimit,
- &source, sourceLimit, nullptr,
- feof(f)?true:false, /* pass 'flush' when eof */
- /* is true (when no more data will come) */
- &status);
-
- if(status == U_BUFFER_OVERFLOW_ERROR)
- {
- // simply ran out of space - we'll reset the target ptr the next
- // time through the loop.
- status = U_ZERO_ERROR;
- }
- else
- {
- // Check other errors here.
- assert(U_SUCCESS(status));
- // Break out of the loop (by force)
- }
-
- // Process the Unicode
- assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) == (size_t)(target-buf));
- total += static_cast<uint32_t>((target-buf));
- } while (source < sourceLimit); // while simply out of space
- }
-
- printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, static_cast<int>(inchars * sizeof(char16_t)), total);
-
- // ***************************** END SAMPLE ********************
- ucnv_close(conv);
-
- fclose(f);
- fclose(out);
- printf("\n");
-
- return U_ZERO_ERROR;
-}
-#undef BUFFERSIZE
-
-#define BUFFERSIZE 219
-
-void convsample_50() {
- printf("\n\n==============================================\n"
- "Sample 50: C: ucnv_detectUnicodeSignature\n");
-
- //! [ucnv_detectUnicodeSignature]
- UErrorCode err = U_ZERO_ERROR;
- UBool discardSignature = true; /* set to true to throw away the initial U+FEFF */
- char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
- int32_t signatureLength = 0;
- const char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
- UConverter *conv = nullptr;
- char16_t output[100];
- char16_t *target = output, *out;
- const char *source = input;
- if(encoding!=nullptr && U_SUCCESS(err)){
- // should signature be discarded ?
- conv = ucnv_open(encoding, &err);
- // do the conversion
- ucnv_toUnicode(conv,
- &target, output + UPRV_LENGTHOF(output),
- &source, input + sizeof(input),
- nullptr, true, &err);
- out = output;
- if (discardSignature){
- ++out; // ignore initial U+FEFF
- }
- while(out != target) {
- printf("%04x ", *out++);
- }
- puts("");
- }
- //! [ucnv_detectUnicodeSignature]
- puts("");
-}
-
-
-
-/* main */
-
-int main()
-{
-
- printf("Default Converter=%s\n", ucnv_getDefaultName() );
-
- convsample_02(); // C , u->koi8r, conv
- convsample_03(); // C, iterate
-
- convsample_05(); // C, utf8->u, getNextUChar
- convsample_06(); // C freq counter thingy
-
- convsample_12(); // C, sjis->u, conv
- convsample_13(); // C, big5->u, getNextU
-
- convsample_20(); // C, callback
- convsample_21(); // C, callback debug
-
- convsample_40(); // C, cp37 -> UTF16 [data02.bin -> data40.utf16]
-
- convsample_46(); // C, UTF16 -> latin3 [data41.utf16 -> data46.out]
-
- convsample_50(); // C, detect unicode signature
-
- printf("End of converter samples.\n");
-
- fflush(stdout);
- fflush(stderr);
-
- return 0;
-}