リストの内容はまあほぼ固まったのだけど、納得いくマップにできないというかこう。
なんかですね、入力末尾に改行がついてないと出力のときにもつけてないっぽいのですよ。
GNU sed。
kbk@toybox /cygdrive/c/Users/kbk
$ echo -n foo|sed -n -e 'p'|hexdump
00000000: 66 6F 6F foo
kbk@toybox /cygdrive/c/Users/kbk
$ echo foo|sed -n -e 'p'|hexdump
00000000: 66 6F 6F 0A foo.
kbk@toybox /cygdrive/c/Users/kbk
んが、基本動作の説明を読むとそれでいいのだろうかという気が
sed
In default operation, sed cyclically copies a line of input, less its terminating newline character, into a
pattern space (unless there is something left after a D command), applies in sequence all commands whose
addresses select that pattern space, and at the end of the script copies the pattern space to standard output
(except when -n is specified) and deletes the pattern space. Whenever the pattern space is written to standard
output or a named file, sed will immediately follow it with a newline character.
Some of the commands use a hold space to save all or part of the pattern space for subsequent retrieval. The
pattern and hold spaces will each be able to hold at least 8192 bytes.
んでまあコードを見ていくと
execute.c
case 'p':
output_line(line.active, line.length, line.chomped, &output_file);
break;
line.chomped という怪しげなものが。
/* Sed operates a line at a time. */
struct line {
char *text; /* Pointer to line allocated by malloc. */
char *active; /* Pointer to non-consumed part of text. */
size_t length; /* Length of text (or active, if used). */
size_t alloc; /* Allocated space for active. */
bool chomped; /* Was a trailing newline dropped? */
#ifdef HAVE_MBRTOWC
mbstate_t mbstate;
#endif
};
構造体定義でのコメントを見てもそんな感じです。
出力の箇所はこう。パラメータの名前が nl になってますが、これが FALSE だと改行をつけません。
static void
output_line(text, length, nl, outf)
const char *text;
size_t length;
int nl;
struct output *outf;
{
if (!text)
return;
output_missing_newline(outf);
if (length)
ck_fwrite(text, 1, length, outf->fp);
if (nl)
ck_fwrite("\n", 1, 1, outf->fp);
else
outf->missing_newline = true;
flush_output(outf->fp);
}
一方入力はどうかというと
static bool
read_file_line(input)
struct input *input;
{
static char *b;
static size_t blen;
long result = ck_getline (&b, &blen, input->fp);
if (result <= 0)
return false;
/* Remove the trailing new-line that is left by getline. */
if (b[result - 1] == '\n')
--result;
else
line.chomped = false;
str_append(&line, b, result);
return true;
}
ここで、入力末尾に改行がない場合に line.chomped を false にしています。
この関数はさらに ck_getline という関数を呼んでいますが
getline.c
ssize_t
getline (char **lineptr, size_t *n, FILE *stream)
{
return getdelim (lineptr, n, '\n', stream);
}
さらにもう一段潜って
getdelim.c
/* Read up to (and including) a DELIMITER from FP into *LINEPTR (and
NUL-terminate it). *LINEPTR is a pointer returned from malloc (or
NULL), pointing to *N characters of space. It is realloc'ed as
necessary. Returns the number of characters read (not including
the null terminator), or -1 on error or EOF. */
ssize_t
getdelim (char **lineptr, size_t *n, int delimiter, FILE *fp)
{
ssize_t result;
size_t cur_len = 0;
if (lineptr == NULL || n == NULL || fp == NULL)
{
errno = EINVAL;
return -1;
}
flockfile (fp);
if (*lineptr == NULL || *n == 0)
{
char *new_lineptr;
*n = 120;
new_lineptr = (char *) realloc (*lineptr, *n);
if (new_lineptr == NULL)
{
result = -1;
goto unlock_return;
}
*lineptr = new_lineptr;
}
for (;;)
{
int i;
i = getc_maybe_unlocked (fp);
if (i == EOF)
{
result = -1;
break;
}
/* Make enough space for len+1 (for final NUL) bytes. */
if (cur_len + 1 >= *n)
{
size_t needed_max =
SSIZE_MAX < SIZE_MAX ? (size_t) SSIZE_MAX + 1 : SIZE_MAX;
size_t needed = 2 * *n + 1; /* Be generous. */
char *new_lineptr;
if (needed_max < needed)
needed = needed_max;
if (cur_len + 1 >= needed)
{
result = -1;
errno = EOVERFLOW;
goto unlock_return;
}
new_lineptr = (char *) realloc (*lineptr, needed);
if (new_lineptr == NULL)
{
result = -1;
goto unlock_return;
}
*lineptr = new_lineptr;
*n = needed;
}
(*lineptr)[cur_len] = i;
cur_len++;
if (i == delimiter)
break;
}
(*lineptr)[cur_len] = '\0';
result = cur_len ? cur_len : result;
unlock_return:
funlockfile (fp); /* doesn't set errno */
return result;
}
入力の途中でとんでもなく長い行があった場合にはメモリ確保できずに落ちるでしょうから、
改行がつかずに戻るのは入力末尾に改行がなかった場合のみと考えてよさそうです。
ChangeLog
2003-07-15 Stepan Kasal <kasal@ucw.cz>
Paolo Bonzini <bonzini@fnu.org>
Change the way we treat lines which are not terminated by a newline.
Such lines are printed without the terminating newline (as before)
but as soon as more text is sent to the same output stream, the
missing newline is printed, so that the two lines don't concatenate.
* sed/execute.c (output_file): Is now struct output; users adjusted
to access the fp field, call output_missing_newline before, and
call flush_output afterwards.
(read_file_line): Set line.chomped FALSE each time we encounter a
line without the newline terminator, no matter whether this is the
last input file or not, and no matter whether we are in
POSIXLY_CORRECT mode or not.
changelog のこの記述を読めば意図的だろうと考えられるのですが、
そうすると opengroup のページにあった記述とつじつまが合っていないような気もするし…
さて真相は?
メーリングリストに質問投げるのも面倒だなあ。