This question on stackoverflow that sparked my curiosity:
"So this is probably a long shot, but is there any way to
run a
C or
C++
file as a
script"
With
gcc it is not so straight forward, but with the tiny C compiler
tcc this is
easy, e.g.:
File: hw.c
#
!/usr/local/bin/tcc -run
#include <stdio.h>
int main()
{
printf("Hello, world!\n");
return 0;
}
¢ Didn't someone (
Rusty?) manage to get linux to boot completely from a source-code C-script, starting with only a tcc binary?
¢
In Algol68g (my weekend extreme sport language) this is "reasonably" easy... mostly because a "#comment#" is native to the Algol68 language. e.g. here is an implementations of the
classic echo command.
File: echo.a68
#!/usr/bin/a68g --script #
# -*- coding: utf-8 -*- #
STRING ofs := "";
FOR i FROM 4 TO argc DO print((ofs, argv(i))); ofs:=" " OD
But how can we natively do this in C with gcc? ...
Simple
shebangs can help with scripting, e.g. "
#!/usr/bin/env python" at the top of a
Python script will allow it to be run in a terminal as "./script.py".
The task
Multiline shebang largely demonstrates how to use "shell" code in the shebang to compile and/or run source-code from a 3rd language.
However in this task
Native shebang task we are go
native.
In the shebang, instead of running a shell, we call a
binary-executable generated from the original native language, e.g. when
using
C with gcc "
#!/usr/local/bin/script_gcc" to extract, compile and run the native "script" source code.
Other small innovations required of this
Native shebang task:
- Cache the executable in some appropriate place in a path, dependant on available write permissions.
- Generate a new cached executable only when the source has been touched.
- If a cached is available, then run this instead of regenerating a new executable.
Difficulties:
- Naturally, some languages are not compiled. These languages are forced to use shebang executables from another language, eg "#!/usr/bin/env python" uses the C binaries /usr/bin/env and /usr/bin/python. If this is the case, then simply document the details of the case.
- In a perfect world, the test file (e.g. echo.c) would still be a valid program, and would compile without error using the native compiler (e.g. gcc for text.c). The problem is that "#!" is syntactically incorrect on many languages, but in others it can be parsed as a comment.
- The "test binary" should be exec-ed and hence retain the original Process identifier.
Test case:
- Create a simple "script file" (in the same native language) called "echo" then use the "script" to output "Hello, world!"
File: script_gcc.c
#!/usr/local/bin/script_gcc.sh
/* Optional: this C code initially is-being/can-be boot strapped (compiled) using bash script_gcc.sh */
#include <errno.h>
#include <libgen.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
/* the actual shebang for C target scripts is:
#!/usr/local/bin/script_gcc.c
*/
/* general readability constants */
typedef char /* const */ *STRING;
typedef enum{FALSE=0, TRUE=1} BOOL;
const STRING ENDCAT = NULL;
/* script_gcc.c specific constants */
#define DIALECT "c" /* or cpp */
const STRING
CC="gcc",
COPTS="-lm -x "DIALECT,
IEXT="."DIALECT,
OEXT=".out";
const BOOL OPT_CACHE = TRUE;
/* general utility procedured */
char strcat_out[BUFSIZ];
STRING STRCAT(STRING argv, ... ){
va_list ap;
va_start(ap, argv);
STRING arg;
strcat_out[0]='\0';
for(arg=argv; arg != ENDCAT; arg=va_arg(ap, STRING)){
strncat(strcat_out, arg, sizeof strcat_out);
}
va_end(ap);
return strndup(strcat_out, sizeof strcat_out);
}
char itoa_out[BUFSIZ];
STRING itoa(int i){
sprintf(itoa_out, "%d", i);
return itoa_out;
}
time_t modtime(STRING filename){
struct stat buf;
if(stat(filename, &buf) != EXIT_SUCCESS)perror(filename);
return buf.st_mtime;
}
/* script_gcc specific procedure */
BOOL compile(STRING srcpath, STRING binpath){
int out;
STRING compiler_command=STRCAT(CC, " ", COPTS, " -o ", binpath, " -", ENDCAT);
FILE *src=fopen(srcpath, "r"),
*compiler=popen(compiler_command, "w");
char buf[BUFSIZ];
BOOL shebang;
for(shebang=TRUE; fgets(buf, sizeof buf, src); shebang=FALSE)
if(!shebang)fwrite(buf, strlen(buf), 1, compiler);
out=pclose(compiler);
return out;
}
void main(int argc, STRING *argv, STRING *envp){
STRING binpath,
srcpath=argv[1],
argv0_basename=STRCAT(basename((char*)srcpath /*, .DIALECT */), ENDCAT),
*dirnamew, *dirnamex;
argv++; /* shift */
/* Warning: current dir "." is in path, AND * /tmp directories are common/shared */
STRING paths[] = {
dirname(strdup(srcpath)), /* not sure why strdup is required? */
STRCAT(getenv("HOME"), "/bin", ENDCAT),
"/usr/local/bin",
".",
STRCAT(getenv("HOME"), "/tmp", ENDCAT),
getenv("HOME"),
STRCAT(getenv("HOME"), "/Desktop", ENDCAT),
/* "/tmp" ... a bit of a security hole */
ENDCAT
};
for(dirnamew = paths; *dirnamew; dirnamew++){
if(access(*dirnamew, W_OK) == EXIT_SUCCESS) break;
}
/* if a CACHEd copy is not to be kept, then fork a sub-process to unlink the .out file */
if(OPT_CACHE == FALSE){
binpath=STRCAT(*dirnamew, "/", argv0_basename, itoa(getpid()), OEXT, ENDCAT);
if(compile(srcpath, binpath) == EXIT_SUCCESS){
if(fork()){
sleep(0.1); unlink(binpath);
} else {
execvp(binpath, argv);
}
}
} else {
/* else a CACHEd copy is kept, so find it */
time_t modtime_srcpath = modtime(srcpath);
for(dirnamex = paths; *dirnamex; dirnamex++){
binpath=STRCAT(*dirnamex, "/", argv0_basename, OEXT, ENDCAT);
if((access(binpath, X_OK) == EXIT_SUCCESS) && (modtime(binpath) >= modtime_srcpath))
execvp(binpath, argv);
}
}
binpath=STRCAT(*dirnamew, "/", argv0_basename, OEXT, ENDCAT);
if(compile(srcpath, binpath) == EXIT_SUCCESS)
execvp(binpath, argv);
perror(STRCAT(binpath, ": executable not available", ENDCAT));
exit(errno);
}
Test Source File: echo.c
#!/usr/local/bin/script_gcc.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char **argv, char **envp){
char ofs = '\0';
for(argv++; *argv; argv++){
if(ofs)putchar(ofs); else ofs=' ';
fwrite(*argv, strlen(*argv), 1, stdout);
}
putchar('\n');
exit(EXIT_SUCCESS);
}
Test Execution:
$ ./echo.c Hello, world!
Test Output:
Hello, world!
Note: this
Native shebang task does not exactly apply to
bash
because bash is interpretive, but as a skeleton template the following
script is an example of how compiled languages can implement the
shebang. Also: this bash code can be used to
automatically compile the C code in /usr/local/bin/script_gcc.c above.
File: script_gcc.sh
#!/bin/bash
# Actual shebang when using bash:
#!/usr/local/bin/script_gcc.sh
# Alternative shebang when using bash:
#!/bin/bash /usr/local/bin/script_gcc.sh
# CACHE=No # to turn off caching...
# Note: this shell should be re-written in actual C! :-)
DIALECT=c # or cpp
CC="gcc"
COPTS="-lm -x $DIALECT"
IEXT=.$DIALECT
OEXT=.out
ENOENT=2
srcpath="$1"; shift # => "$@"
#basename="$(basename "$srcpath" ."$DIALECT")"
basename="$(basename "$srcpath")"
# Warning: current dir "." is in path, AND */tmp directories are common/shared
paths="$(dirname "$srcpath")
$HOME/bin
/usr/local/bin
.
$HOME/tmp
$HOME
$HOME/Desktop"
#/tmp
while read dirnamew; do
[ -w "$dirnamew" ] && break
done << end_here_is
$paths
end_here_is
compile(){
sed -n '2,$p' "$srcpath" | "$CC" $COPTS -o "$binpath" -
}
if [ "'$CACHE'" = "'No'" ]; then
binpath="$dirnamew/$basename-v$$$OEXT"
if compile; then
( sleep 0.1; exec rm "$binpath" ) & exec "$binpath" "$@"
fi
else
while read dirnamex; do
binpath="$dirnamex/$basename$OEXT"
if [ -x "$binpath" -a "$binpath" -nt "$srcpath" ];
then exec "$binpath" "$@"; fi
done << end_here_is
$paths
end_here_is
binpath="$dirnamew/$basename$OEXT"
if compile; then exec "$binpath" "$@"; fi
echo "$binpath: executable not available" 1>&2
exit $ENOENT
fi
Test Source File: echo.c
#!/usr/local/bin/script_gcc.sh
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char **argv, char **envp){
char ofs = '\0';
for(argv++; *argv; argv++){
if(ofs)putchar(ofs); else ofs=' ';
fwrite(*argv, strlen(*argv), 1, stdout);
}
putchar('\n');
exit(EXIT_SUCCESS);
}
Test Execution:
$ ./echo.c Hello, world!
Test Output:
Hello, world!
Here is a summary of the input
script files, together with
binary files generated:
$ ls -ltr hw.* echo.* /usr/local/bin
-rwxr-xr-x. 1 nevillednz nevillednz 193 Sep 6 13:17 hw.c
-rwxr-xr-x. 1 nevillednz nevillednz 4757 Sep 6 13:18 hw.c.out
-rwxr-xr-x. 1 nevillednz nevillednz 131 Sep 6 13:19 echo.a68
-rwxr-xr-x. 1 nevillednz nevillednz 12373 Sep 6 13:20 echo.so
-rwxr-xr-x. 1 nevillednz nevillednz 315 Sep 6 13:29 echo.c
-rwxr-xr-x. 1 nevillednz nevillednz 5128 Sep 6 13:30 echo.c.out
/usr/local/bin:
total 20
-rwxr-xr-x. 1 nevillednz nevillednz 1216 Sep 6 13:13 script_gcc.sh
-rwxr-xr-x. 1 nevillednz nevillednz 3431 Sep 6 13:15 script_gcc.c
-rwxr-xr-x. 1 nevillednz nevillednz 8564 Sep 6 13:16 script_gcc.c.out
One interesting thing to observe is that
"script_gcc.c" is boot strapped by
"script_gcc.sh", so as long as gcc is installed, you don't have to compile anything... not even the
script_gcc.c "script"... (But you do have to have the right permissions to start off with!)