No. 214/622 Index Prev Next
Path: titcca!sragwa!srava!kameyama
From: kameyama@srava.sra.JUNET (Toyohisa Kameyama)
Newsgroups: fj.kanji,fj.sources
Subject: KANJI to ASCII convert (Re: hankaku-kana to zenkaku-kana convertor)
Message-ID: < 1213@srava.sra.JUNET> 
Date: 30 Nov 87 09:13:35 GMT
References: < 2102@flab.flab.fujitsu.JUNET>  < 755@cskvax.csk.JUNET>  < 2138@flab.flab.fujitsu.JUNET> 
Reply-To: kameyama@srava.UUCP (Toyohisa Kameyama)
Distribution: fj
Organization: Software Research Associates, Inc., Japan
Lines: 342
Xref: titcca fj.kanji:273 fj.sources:509

In article < 2138@flab.flab.fujitsu.JUNET>  ichikawa@flab.flab.fujitsu.JUNET (I.Ichikawa) writes:
>  また、半角英字と全角英字ですが、1区3区ですみそうですね。
実は, 昔 (JTROFF を作ろうとしているころ)  2 byte コードのうち ASCII で
表せるものを ascii に変換するプログラムを書きました. (EUC version)
(全角, 半角という言葉はこの意味で使用したくないのでこのような表現にしました.)

そのプログラムを post します.

JIS では 3 区にある英数字は下位バイトをそのまま出力すればいいだけです.
(EUC の場合は 8 bit 目を 0 にする.)
これで, 1 区の記号だけを table にすればできあがりです.

\ や ~ は ASCII と決めてしまいました.
なお, -e オプションで鉤括弧をクォートに, 点, 丸をコンマ, ピリオドにするなど
JTROFF のための変換をおこないます.
(ついでに ¥ を \ に変換している.)
変換するコードは勝手に変更して使用して下さい.

実は long char 版もできているのですが, library を post するのに少し
手間がかかりそうなのでとりあえず EUC 版を post します.
---- cut here --- cut here ----- cut here ---
#! /bin/sh
# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing " sh file" .
#
# Wrapped by srava!kameyama on Mon Nov 30 12:23:32 JST 1987
# Contents:  Makefile atojlib.c jascii.c
 
echo x - Makefile
sed 's/^@//' > "Makefile" < < '@//E*O*F Makefile//'
DEST          = /usr/itroff/lib

EXTHDRS       = /usr/include/ctype.h \
		/usr/include/stdio.h

HDRS          =

LDFLAGS       =

LIBS          =

LINKER        = cc

MAKEFILE      = Makefile

OBJS          = atojlib.o \
		jascii.o

PRINT         = pr

PROGRAM       = jascii

SRCS          = atojlib.c \
		jascii.c

all:            $(PROGRAM)

$(PROGRAM):     $(OBJS) $(LIBS)
		@echo -n " Loading $(PROGRAM) ... " 
		@$(LINKER) $(LDFLAGS) $(OBJS) $(LIBS) -o $(PROGRAM)
		@echo " done" 

clean:;         @rm -f $(OBJS)

depend:;        @mkmf -f $(MAKEFILE) PROGRAM=$(PROGRAM) DEST=$(DEST)

index:;         @ctags -wx $(HDRS) $(SRCS)

install:        $(PROGRAM)
		@echo Installing $(PROGRAM) in $(DEST)
		@install -s $(PROGRAM) $(DEST)

print:;         @$(PRINT) $(HDRS) $(SRCS)

program:        $(PROGRAM)

tags:           $(HDRS) $(SRCS); @ctags $(HDRS) $(SRCS)

update:         $(DEST)/$(PROGRAM)

$(DEST)/$(PROGRAM): $(SRCS) $(LIBS) $(HDRS) $(EXTHDRS)
		@make -f $(MAKEFILE) DEST=$(DEST) install

###
atojlib.o: /usr/include/ctype.h
jascii.o: /usr/include/stdio.h
@//E*O*F Makefile//
chmod u=rw,g=r,o=r Makefile
 
echo x - atojlib.c
sed 's/^@//' > "atojlib.c" < < '@//E*O*F atojlib.c//'
#include 
/********************************************************************
 *			code convert program
 *
 *	This program converts ascii to jis and jis to ascii.  This program
 * includes  two  functions  for  extern  used  and  one  function for
 * internal, and two array for convert code. Three extern functions as
 * follow:
 *
 *		a_to_j(ascii,jis):
 *			convert ascii to jis.
 *		j_to_a(jis,ascii):
 *			convert jis to ascii.
 *		ex_j_to_a(mode,jis,ascii);
 *			convert jis to ascii extend mode if mode !=0.
 *
 *	These functions return SUC when convert is successful, FAIL when convert
 * is fail.
 *
 ***********************************************************************


/* ascii to jis 2 byte (non alphabet or non number) */
#define SUC 0
#define FAIL 1
char atoj[128-32]={
	0x21, /* space (+0x2100) */
	0x2a, /* ! */
	0x6d, /* "  */
	0x74, /* # */
	0x70, /* $ */
	0x73, /* % */
	0x75, /* &  */
	0x6c, /* ' */
	0x4a, /* ( */
	0x4b, /* ) */
	0x76, /* * */
	0x5c, /* + */
	0x24, /* , */
	0x5d, /* - */
	0x25, /* . */
	0x3f, /* / */
	0,0,0,0,0,0,0,0,0,0, /* 0-9 */
	0x27, /* : */
	0x28, /* ; */
	0x63, /* <  */
	0x61, /* = */
	0x64, /* >  */
	0x29, /* ? */
	0x77, /* @ */
	0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,		 /* A-Z */
	0x4e, /* [ */
	0x40, /* \ */
	0x4f, /* ] */
	0x30, /* ^ */
	0x32, /* _ */
	0x2e, /* ` */
	0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,0,0,0,0,
	0,0,0,0,0,0,		 /* a-z */
	0x50, /* { */
	0x43, /* | */
	0x51, /* } */
	0x41, /* ~ */
	0, /* del */
},jtoa[128-32];
static char init=0; /* initialize table flag */

#include 
#define DEL	0x7f
/* initialize jtoa */
static mkjtoa()
{
	register int i;
	for(i=32;i< 128;i++)
		if(atoj[i-32]!=0)
			jtoa[atoj[i-32]-32]=(char)i;
	init=1;
}

/*
 * ascii to jis
 *
 *	return code:
 *		0: convert successfull
 *		1: convert fail
 *
 */
extern a_to_j(c,j)
char c; /* ascii character */
char j[2]; /* return jis code */
{
	if(c==DEL||c< ' ')
		return(FAIL);
	else if(isalpha(c)||isdigit(c)) {
		j[0]='#';
		j[1]=c;
	} else {
		j[0]='!';
		j[1]=atoj[c-32];
		if(!j[1])
			return(FAIL);
	}
	return(SUC);
}

/*
 * jis to ascii
 *
 *	return code
 *		0: convert successfull
 *		1: convert fail
 */
extern int j_to_a(j,c)
char j[2]; /* jis code */
int *c; /* return ascii */
{
	if(init==0)
		mkjtoa();
	if(j[0]=='#') {
		if((!isalpha(j[1]))& & (!isdigit(j[1])))
			return(FAIL);
		else
			*c=j[1];
	} else if(j[0]=='!') {
		if((*c=jtoa[j[1]-32])==0)
			return(FAIL);
	} else
		return(FAIL);
	return(SUC);
}

/*
 * extend jis to ascii
 *
 *	return code
 *		0: convert successfull
 *		1: convert fail
 */
extern int ex_j_to_a(mode,j,a)
int mode;
char j[2];
int *a;
{
	int r;
	r=j_to_a(j,a);
	if((!mode)||(!r))
		return(r);
	if (j[0]!='!')
		return(r);
	switch (j[1]) {
		case 0x22: /* Japanese `ten' */
			*a=',';
			break;
		case 0x23: /* Japanese `maru' */
			*a='.';
			break;
		case 0x56: /* Japanese `kagikakko' */
			*a='`';
			break;
		case 0x57: /* Japanese `kagikakko toziru' */
			*a='\'';
			break;
		case 0x58: /* Japanese `double kagikakko' */
			*a='`';
			putchar(*a);
			break;
		case 0x59: /* Japanese `double kagikakko toziru' */
			*a='\'';
			putchar(*a);
			break;
		case 0x49: /* Japanese "  */
			*a='" ';
			break;
		case 0x6f: /* Japanese Yen */
			*a='\\';
			break;
		default:
			return(FAIL);
			break;
	}
	return(SUC);
}
@//E*O*F atojlib.c//
chmod u=rw,g=r,o=r atojlib.c
 
echo x - jascii.c
sed 's/^@//' > "jascii.c" < < '@//E*O*F jascii.c//'
/******************************************************************
 *				text file convert program
 *
 *	This program input include kanzi file (standard input default)
 * convert  ascii  character  from kanzi code to ascii  code,  and
 * output standard output.
 *	Option:
 *		-e	Extend mode. Japanese `maru' and `ten' convert.
 *
 *		Usage:
 *			jascii [-e] [input_file_name] [>  output_file_name]
 *
 ******************************************************************/
#include 
extern j_to_a();
main(ac,av)
int ac;
char **av;
{
	FILE *inf,*fopen();
	int c;
	int exmode=0;
	char j[2];
	if (!strcmp(av[1]," -e" )) {
		exmode++;
		ac--;
		av++;
	}
	if(ac> 1)
		inf=fopen(av[1]," r" );
	else
		inf=stdin;
	if(inf==NULL)
		perror(av[1]);

	while((c=getc(inf))!=EOF) {
		if((c& 0200)==0)
			putchar(c);
		else {
			j[0]=c& 0177;
			j[1]=getc(inf);
			if(j[1]& 0200==0) { /* not kanzi code ? */
				putchar(j[0]|0200);
				putchar(j[1]);
			} else if(j[1]& =0177,ex_j_to_a(exmode,j,& c)) { /* not convert ascii */
				putchar(j[0]|0200);
				putchar(j[1]|0200);
			} else
				putchar(c);
		}
	}
}
@//E*O*F jascii.c//
chmod u=rw,g=r,o=r jascii.c
 
exit 0
---- cut here --- cut here ----- cut here ---
Next
Continue