From: Stephen J. Friedl Software Consultant For: SpamAssassin-2.60-cvs Date: Sun Apr 27 20:00:50 PDT 2003 This patch is for three updates: 1) supports UNIX domain sockets for client/server communications. 2) supports randomization of IP addresses for failover 3) very minor compiler-warning fixes UNIX DOMAIN SOCKETS ------------------- IPC with UNIX domain sockets is substantially more efficient than going through the TCP stack even if localhost is used. On the machine used for testing - Red Hat Linux 6.2 running at 1GHz - UNIX domain sockets are about 3.5 times faster than using TCP. We used the "netperf" tool - found at http://www.netperf.org/ -- and this comports with what we believed to be true as well. We also have this strange feeling that avoiding TCP/IP entirely is a bit more secure compared even to listening to TCP/IP on localhost. Once IP is enabled, it's just one configuration mistake away from listening to the world at large. We're just wierd about this. The approach taken was to create a "transport" structure that contains either the path of the UNIX domain socket or the list of IP addresses. All the libspamc functions that previously took a hostname/port now take the transport structure, and we dispatch the appropriate helper routine to connect to the server. Once connected, the program more or less works as before. To use this, the server is started with the name of the socket: /usr/local/bin/spamd -d --socketpath=/tmp/spamd.sock and the client is called spamc -U /tmp/spamd.sock LOAD BALANCING/FALLBACK ----------------------- The previous code treated fallback as a special case, but this seems troublesome to us. The new code fetches the list of A records for the given hostname, and stores each one into the transport structure (up to some large number). This list is then truncated to 1 if the caller didn't want fallback, which means that all the various connection methods use the same code. A single DNS name can have multiple A records: spamd IN A 172.27.217.1 IN A 172.27.217.3 IN A 172.27.217.7 so gethostbyname("spamd") when run from spamc returns all three addresses. Some nameservers such as BIND9 seem to automatically randomize the returned values, but the "-H" parameter does this as well. This was suggested by Jeremy Zawodny. --- spamd/libspamc.c.orig 2003-04-25 19:59:31.000000000 +0000 +++ spamd/libspamc.c 2003-04-25 22:38:16.000000000 +0000 @@ -12,12 +12,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -42,10 +44,15 @@ /* RedHat 5.2 doesn't define Shutdown 2nd Parameter Constants */ /* KAM 12-4-01 */ -#ifndef HAVE_SHUT_RD -#define SHUT_RD (0) /* No more receptions. */ -#define SHUT_WR (1) /* No more transmissions. */ -#define SHUT_RDWR (2) /* No more receptions or transmissions. */ +/* SJF 2003/04/25 - now test for macros directly */ +#ifndef SHUT_RD +# define SHUT_RD 0 /* no more receptions */ +#endif +#ifndef SHUT_WR +# define SHUT_WR 1 /* no more transmissions */ +#endif +#ifndef SHUT_RDWR +# define SHUT_RDWR 2 /* no more receptions or transmissions */ #endif #ifndef HAVE_H_ERRNO @@ -96,6 +103,293 @@ int libspamc_timeout = 0; +/* + * translate_connect_errno() + * + * Given a UNIX error number obtained (probably) from "connect(2)", + * translate this to a failure code. This module is shared by both + * transport modules - UNIX and TCP. + * + * This should ONLY be called when there is an error. + */ +static int +translate_connect_errno(int err) +{ + switch (err) + { + case EBADF: + case EFAULT: + case ENOTSOCK: + case EISCONN: + case EADDRINUSE: + case EINPROGRESS: + case EALREADY: + case EAFNOSUPPORT: + return EX_SOFTWARE; + + case ECONNREFUSED: + case ETIMEDOUT: + case ENETUNREACH: + return EX_UNAVAILABLE; + + case EACCES: + return EX_NOPERM; + + default: + return EX_SOFTWARE; + } +} + +/* + * opensocket() + * + * Given a socket type (PF_INET or PF_UNIX), try to create this socket + * and store the FD in the pointed-to place. If it's successful, do any + * other setup required to make the socket ready to use, such as setting + * TCP_NODELAY mode, and in any case we return EX_OK if all is well. + * + * Upon failure we return one of the other EX_??? error codes. + */ +static int opensocket(int type, int *psock) +{ +const char *typename; +int proto = 0; + + assert(psock != 0); + + /*---------------------------------------------------------------- + * Create a few induction variables that are implied by the socket + * type given by the user. The typename is strictly used for debug + * reporting. + */ + if ( type == PF_UNIX ) + { + typename = "PF_UNIX"; + } + else + { + typename = "PF_INET"; + proto = IPPROTO_TCP; + } + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + syslog (DEBUG_LEVEL, "dbg: create socket(%s)", typename); +#endif + + if ( (*psock = socket(type, SOCK_STREAM, proto)) < 0 ) + { + int origerr; + + /*-------------------------------------------------------- + * At this point we had a failure creating the socket, and + * this is pretty much fatal. Translate the error reason + * into something the user can understand. + */ + origerr = errno; /* take a copy before syslog() */ + + syslog (LOG_ERR, "socket(%s) to spamd failed: %m", typename); + + switch (origerr) + { + case EPROTONOSUPPORT: + case EINVAL: + return EX_SOFTWARE; + + case EACCES: + return EX_NOPERM; + + case ENFILE: + case EMFILE: + case ENOBUFS: + case ENOMEM: + return EX_OSERR; + + default: + return EX_SOFTWARE; + } + } + + + /*---------------------------------------------------------------- + * Do a bit of setup on the TCP socket if required. Notes above + * suggest this is probably not set + */ +#ifdef USE_TCP_NODELAY + { + int one = 1; + + if ( type == PF_INET + && setsockopt(*psock, 0, TCP_NODELAY, &one, sizeof one) != 0 ) + { + switch(errno) + { + case EBADF: + case ENOTSOCK: + case ENOPROTOOPT: + case EFAULT: + syslog(LOG_ERR, + "setsockopt(TCP_NODELAY) failed: %m"); + close (*psock); + return EX_SOFTWARE; + + default: + break; /* ignored */ + } + } + } +#endif /* USE_TCP_NODELAY */ + + return EX_OK; /* all is well */ +} + +/* + * try_to_connect_unix() + * + * Given a transport handle that implies using a UNIX domain + * socket, try to make a connection to it and store the resulting + * file descriptor in *sockptr. Return is EX_OK if we did it, + * and some other error code otherwise. + */ +static int +try_to_connect_unix (struct transport *tp, int *sockptr) +{ +int mysock, status, origerr; +struct sockaddr_un addrbuf; +int ret; + + assert(tp != 0); + assert(sockptr != 0); + assert(tp->socketpath != 0); + + /*---------------------------------------------------------------- + * If the socket itself can't be created, this is a fatal error. + */ + if ( (ret = opensocket(PF_UNIX, &mysock)) != EX_OK ) + return ret; + + /* set up the UNIX domain socket */ + memset(&addrbuf, 0, sizeof addrbuf); + addrbuf.sun_family = AF_UNIX; + strncpy(addrbuf.sun_path, tp->socketpath, sizeof addrbuf.sun_path - 1); + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + syslog (DEBUG_LEVEL, "dbg: connect(AF_UNIX) to spamd at %s", + addrbuf.sun_path); +#endif + + status = connect(mysock, (struct sockaddr *) &addrbuf, sizeof(addrbuf)); + + origerr = errno; + + if ( status >= 0 ) + { +#ifdef DO_CONNECT_DEBUG_SYSLOGS + syslog(DEBUG_LEVEL, "dbg: connect(AF_UNIX) ok"); +#endif + + *sockptr = mysock; + + return EX_OK; + } + + syslog(LOG_ERR, "connect(AF_UNIX) to spamd %s failed: %m", + addrbuf.sun_path); + + close(mysock); + + return translate_connect_errno(origerr); +} + +/* + * try_to_connect_tcp() + * + * Given a transport that implies a TCP connection, either to + * localhost or a list of IP addresses, attempt to connect. The + * list of IP addresses has already been randomized (if requested) + * and limited to just one if fallback has been enabled. + */ +static int +try_to_connect_tcp (const struct transport *tp, int *sockptr) +{ +int numloops; +int origerr = 0; +int ret; + + assert(tp != 0); + assert(sockptr != 0); + assert(tp->nhosts > 0); + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + for (numloops = 0; numloops < tp->nhosts; numloops++) + { + syslog(LOG_ERR, "dbg: %d/%d: %s", + numloops+1, tp->nhosts, inet_ntoa(tp->hosts[numloops])); + } +#endif + + for (numloops = 0; numloops < MAX_CONNECT_RETRIES; numloops++) + { + struct sockaddr_in addrbuf; + const int hostix = numloops % tp->nhosts; + int status, mysock; + const char * ipaddr; + + /*-------------------------------------------------------- + * We always start by creating the socket, as we get only + * one attempt to connect() on each one. If this fails, + * we're done. + */ + if ( (ret = opensocket(PF_INET, &mysock)) != EX_OK ) + return ret; + + memset(&addrbuf, 0, sizeof(addrbuf)); + + addrbuf.sin_family = AF_INET; + addrbuf.sin_port = htons(tp->port); + addrbuf.sin_addr = tp->hosts[hostix]; + + ipaddr = inet_ntoa(addrbuf.sin_addr); + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + syslog (DEBUG_LEVEL, + "dbg: connect(AF_INET) to spamd at %s (try #%d of %d)", + ipaddr, + numloops+1, + MAX_CONNECT_RETRIES); +#endif + + status = connect(mysock, (struct sockaddr *)&addrbuf, sizeof(addrbuf)); + + if (status != 0) + { + syslog (LOG_ERR, + "connect(AF_INET) to spamd at %s failed, retrying (#%d of %d): %m", + ipaddr, numloops+1, MAX_CONNECT_RETRIES); + + close(mysock); + + sleep(CONNECT_RETRY_SLEEP); + } + else + { +#ifdef DO_CONNECT_DEBUG_SYSLOGS + syslog(DEBUG_LEVEL, + "dbg: connect(AF_INET) to spamd at %s done", + ipaddr); +#endif + *sockptr = mysock; + + return EX_OK; + } + } + + syslog (LOG_ERR, "connection attempt to spamd aborted after %d retries", + MAX_CONNECT_RETRIES); + + return translate_connect_errno(origerr); +} + +#if 0 static int try_to_connect (const struct sockaddr *argaddr, struct hostent *hent, int hent_port, int *sockptr) @@ -323,6 +617,7 @@ return EX_SOFTWARE; } } +#endif /* Aug 14, 2002 bj: Reworked things. Now we have message_read, message_write, * message_dump, lookup_host, message_filter, and message_process, and a bunch @@ -345,7 +640,7 @@ message_read_raw(int fd, struct message *m){ clear_message(m); if((m->raw=malloc(m->max_len+1))==NULL) return EX_OSERR; - m->raw_len=full_read(fd, (unsigned char *) m->raw, m->max_len+1, m->max_len+1); + m->raw_len=full_read(fd, m->raw, m->max_len+1, m->max_len+1); if(m->raw_len<=0){ free(m->raw); m->raw=NULL; m->raw_len=0; return EX_IOERR; @@ -368,7 +663,7 @@ if((m->raw=malloc(m->max_len+1))==NULL) return EX_OSERR; /* Find the DATA line */ - m->raw_len=full_read(fd, (unsigned char *) m->raw, m->max_len+1, m->max_len+1); + m->raw_len=full_read(fd, m->raw, m->max_len+1, m->max_len+1); if(m->raw_len<=0){ free(m->raw); m->raw=NULL; m->raw_len=0; return EX_IOERR; @@ -453,7 +748,7 @@ if (m->priv->flags&SPAMC_CHECK_ONLY) { if(m->is_spam==EX_ISSPAM || m->is_spam==EX_NOTSPAM){ - return full_write(fd, (unsigned char *) m->out, m->out_len); + return full_write(fd, m->out, m->out_len); } else { syslog(LOG_ERR, "oops! SPAMC_CHECK_ONLY is_spam: %d\n", m->is_spam); @@ -468,13 +763,13 @@ return -1; case MESSAGE_ERROR: - return full_write(fd, (unsigned char *) m->raw, m->raw_len); + return full_write(fd, m->raw, m->raw_len); case MESSAGE_RAW: - return full_write(fd, (unsigned char *) m->out, m->out_len); + return full_write(fd, m->out, m->out_len); case MESSAGE_BSMTP: - total=full_write(fd, (unsigned char *) m->pre, m->pre_len); + total=full_write(fd, m->pre, m->pre_len); for(i=0; iout_len; ){ jlimit = (off_t) (sizeof(buffer)/sizeof(*buffer)-4); for(j=0; i < (off_t) m->out_len && @@ -491,9 +786,9 @@ buffer[j++]=m->out[i++]; } } - total+=full_write(fd, (unsigned char *) buffer, j); + total+=full_write(fd, buffer, j); } - return total+full_write(fd, (unsigned char *) m->post, m->post_len); + return total+full_write(fd, m->post, m->post_len); default: syslog(LOG_ERR, "Unknown message type %d\n", m->type); @@ -508,8 +803,8 @@ if(m!=NULL && m->type!=MESSAGE_NONE) { message_write(out_fd, m); } - while((bytes=full_read(in_fd, (unsigned char *) buf, 8192, 8192))>0){ - if (bytes!=full_write(out_fd, (unsigned char *) buf, bytes)) { + while((bytes=full_read(in_fd, buf, 8192, 8192))>0){ + if (bytes!=full_write(out_fd, buf, bytes)) { syslog(LOG_ERR, "oops! message_dump of %d returned different", bytes); } } @@ -523,6 +818,8 @@ int bytesread = 0; int len; + UNUSED_VARIABLE(m); + /* Now, read from spamd */ for(len=0; lentimeout; - if((i=try_to_connect(addr, (struct hostent *) hent, - hent_port, &sock)) != EX_OK) + if ( tp->socketpath ) + rc = try_to_connect_unix(tp, &sock); + else + rc = try_to_connect_tcp(tp, &sock); + + if ( rc != EX_OK ) { free(m->out); m->out=m->msg; m->out_len=m->msg_len; return i; @@ -666,8 +970,8 @@ SSL_write(ssl, m->msg, m->msg_len); #endif } else { - full_write(sock, (unsigned char *) buf, len); - full_write(sock, (unsigned char *) m->msg, m->msg_len); + full_write(sock, buf, len); + full_write(sock, m->msg, m->msg_len); shutdown(sock, SHUT_WR); } @@ -716,7 +1020,7 @@ len = ssl_timeout_read (ssl, m->out+m->out_len, m->max_len+EXPANSION_ALLOWANCE+1-m->out_len); } else{ - len = full_read (sock, (unsigned char *) m->out+m->out_len, + len = full_read (sock, m->out+m->out_len, m->max_len+EXPANSION_ALLOWANCE+1-m->out_len, m->max_len+EXPANSION_ALLOWANCE+1-m->out_len); } @@ -754,49 +1058,17 @@ return failureval; } -static int _lookup_host(const char *hostname, struct hostent *out_hent) -{ - struct hostent *hent = NULL; - int origherr; - - /* no need to try using inet_addr(), gethostbyname() will do that */ - - if (NULL == (hent = gethostbyname(hostname))) { - origherr = h_errno; /* take a copy before syslog() */ - syslog (LOG_ERR, "gethostbyname(%s) failed: h_errno=%d", - hostname, origherr); - switch(origherr) - { - case HOST_NOT_FOUND: - case NO_ADDRESS: - case NO_RECOVERY: - return EX_NOHOST; - case TRY_AGAIN: - return EX_TEMPFAIL; - default: - return EX_OSERR; - } - } - memcpy (out_hent, hent, sizeof(struct hostent)); - - return EX_OK; -} - -int message_process(const char *hostname, int port, char *username, int max_size, int in_fd, int out_fd, const int flags){ - struct hostent hent; +int message_process(struct transport *trans, char *username, int max_size, int in_fd, int out_fd, const int flags){ int ret; struct message m; m.type=MESSAGE_NONE; - ret=lookup_host_for_failover(hostname, &hent); - if(ret!=EX_OK) goto FAIL; - m.max_len=max_size; ret=message_read(in_fd, flags, &m); if(ret!=EX_OK) goto FAIL; - ret=message_filter_with_failover(&hent, port, username, flags, &m); + ret=message_filter(trans, username, flags, &m); if(ret!=EX_OK) goto FAIL; if(message_write(out_fd, &m)<0) goto FAIL; if(m.is_spam!=EX_TOOBIG) { @@ -808,7 +1080,7 @@ FAIL: if(flags&SPAMC_CHECK_ONLY){ - full_write(out_fd, (unsigned char *) "0/0\n", 4); + full_write(out_fd, "0/0\n", 4); message_cleanup(&m); return EX_NOTSPAM; } else { @@ -826,41 +1098,176 @@ } /* Aug 14, 2002 bj: Obsolete! */ -int process_message(const char *hostname, int port, char *username, int max_size, int in_fd, int out_fd, const int my_check_only, const int my_safe_fallback){ +int process_message(struct transport *tp, char *username, int max_size, int in_fd, int out_fd, const int my_check_only, const int my_safe_fallback){ int flags; flags=SPAMC_RAW_MODE; if(my_check_only) flags|=SPAMC_CHECK_ONLY; if(my_safe_fallback) flags|=SPAMC_SAFE_FALLBACK; - return message_process(hostname, port, username, max_size, in_fd, out_fd, flags); + return message_process(tp, username, max_size, in_fd, out_fd, flags); } -/* public APIs, which call into the static code and enforce sockaddr-OR-hostent - * conventions */ - -int lookup_host(const char *hostname, int port, struct sockaddr *out_addr) +/* + * init_transport() + * + * Given a pointer to a transport structure, set it to "all empty". + * The default is a localhost connection. + */ +void transport_init(struct transport *tp) { - struct sockaddr_in *addr = (struct sockaddr_in *)out_addr; - struct hostent hent; - int ret; + assert(tp != 0); + + memset(tp, 0, sizeof *tp); - memset(&out_addr, 0, sizeof(out_addr)); - addr->sin_family=AF_INET; - addr->sin_port=htons(port); - ret = _lookup_host(hostname, &hent); - memcpy (&(addr->sin_addr), hent.h_addr, sizeof(addr->sin_addr)); - return ret; + tp->type = TRANSPORT_LOCALHOST; + tp->port = 783; } -int lookup_host_for_failover(const char *hostname, struct hostent *hent) { - return _lookup_host(hostname, hent); +/* + * randomize_hosts() + * + * Given the transport object that contains one or more IP addresses + * in this "hosts" list, rotate it by a random number of shifts to + * randomize them - this is a kind of load balancing. It's possible + * that the random number will be 0, which says not to touch. We don't + * do anything unless + */ + +static void randomize_hosts(struct transport *tp) +{ +int rnum; + + assert(tp != 0); + + if ( tp->nhosts <= 1 ) return; + + rnum = rand() % tp->nhosts; + + while ( rnum-- > 0 ) + { + struct in_addr tmp = tp->hosts[0]; + int i; + + for (i = 1; i < tp->nhosts; i++ ) + tp->hosts[i-1] = tp->hosts[i]; + + tp->hosts[i-1] = tmp; + } } -int message_filter(const struct sockaddr *addr, char *username, int flags, - struct message *m) -{ return _message_filter (addr, NULL, 0, username, flags, m); } +/* + * transport_setup() + * + * Given a "transport" object that says how we're to connect to the + * spam daemon, perform all the initial setup required to make the + * connection process a smooth one. The main work is to do the host + * name lookup and copy over all the IP addresses to make a local copy + * so they're not kept in the resolver's static state. + * + * Here we also manage quasi-load balancing and failover: if we're + * doing load balancing, we randomly "rotate" the list to put it in + * a different order, and then if we're not doing failover we limit + * the hosts to just one. This way *all* connections are done with + * the intention of failover - makes the code a bit more clear. + */ +int transport_setup(struct transport *tp, int flags) +{ +struct hostent *hp = 0; +char **addrp; + + assert(tp != 0); -int message_filter_with_failover (const struct hostent *hent, int port, - char *username, int flags, struct message *m) -{ return _message_filter (NULL, hent, port, username, flags, m); } + switch ( tp->type ) + { + case TRANSPORT_UNIX: + assert(tp->socketpath != 0); + return EX_OK; + + case TRANSPORT_LOCALHOST: + tp->hosts[0].s_addr = inet_addr("127.0.0.1"); + tp->nhosts = 1; + return EX_OK; + + case TRANSPORT_TCP: + if (NULL == (hp = gethostbyname(tp->hostname))) + { + int origherr = h_errno; /* take a copy before syslog() */ + + syslog (LOG_ERR, "gethostbyname(%s) failed: h_errno=%d", + tp->hostname, origherr); + switch (origherr) + { + case HOST_NOT_FOUND: + case NO_ADDRESS: + case NO_RECOVERY: + return EX_NOHOST; + case TRY_AGAIN: + return EX_TEMPFAIL; + default: + return EX_OSERR; + } + } + + /*-------------------------------------------------------- + * If we have no hosts at all, or if they are some other + * kind of address family besides IPv4, then we really + * just have no hosts at all. + */ + if ( hp->h_addr_list[0] == 0 ) + { + /* no hosts in this list */ + return EX_NOHOST; + } + + if ( hp->h_length != sizeof tp->hosts[0] + || hp->h_addrtype != AF_INET ) + { + /* FAIL - bad size/protocol/family? */ + return EX_NOHOST; + } + + /*-------------------------------------------------------- + * Copy all the IP addresses into our private structure. + * This gets them out of the resolver's static area and + * means we won't ever walk all over the list with other + * calls. + * + * ==TODO: check that we don't copy more than we have room for + */ + tp->nhosts = 0; + + for (addrp = hp->h_addr_list; *addrp; addrp++) + { + memcpy(&tp->hosts[tp->nhosts], *addrp, + sizeof tp->hosts[0]); + + tp->nhosts++; + } + + /*-------------------------------------------------------- + * QUASI-LOAD-BALANCING + * + * If the user wants to do quasi load balancing, "rotate" + * the list by a random amount based on the current time. + * This may later be truncated to a single item. This is + * meaningful only if we have more than one host. + */ + if ( (flags & SPAMC_RANDOMIZE_HOSTS) && tp->nhosts > 1 ) + { + randomize_hosts(tp); + } + + /*-------------------------------------------------------- + * If the user wants no fallback, simply truncate the host + * list to just one - this pretends that this is the extent + * of our connection list - then it's not a special case. + */ + if ( !(flags & SPAMC_SAFE_FALLBACK) && tp->nhosts > 1 ) + { + /* truncating list */ + tp->nhosts = 1; + } + } + return EX_OK; +} --- spamd/libspamc.h.orig 2003-04-25 19:59:35.000000000 +0000 +++ spamd/libspamc.h 2003-04-25 03:29:33.000000000 +0000 @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -34,6 +35,9 @@ /* Feb 1 2003 jm: might as well fix bug 191 as well */ #define SPAMC_SYMBOLS (1<<24) +/* 2003/04/16 SJF: randomize hostname order (quasi load balancing) */ +#define SPAMC_RANDOMIZE_HOSTS (1<<23) + /* Aug 14, 2002 bj: A struct for storing a message-in-progress */ typedef enum { @@ -71,6 +75,48 @@ struct libspamc_private_message *priv; }; +/*------------------------------------------------------------------------ + * TRANSPORT (2004/04/16 - SJF) + * + * The code to connect with the daemon has gotten more complicated: support + * for SSL, fallback to multiple hosts, and using UNIX domain sockets. The + * code has gotten ugly with way too many parameters being passed all around. + * + * So we've created this object to hold all the info required to connect with + * the remote site, including a self-contained list of all the IP addresses + * in the event this is using TCP sockets. These multiple IPs can be obtained + * only from DNS returning more than one A record for a single name, and + * this allows for fallback. + * + * We also allow a kind of quasi-load balancing, where we take the list of + * A records from DNS and randomize them before starting out - this lets + * us spread the load out among multiple servers if desired. The idea for + * load balancing goes to Jeremy Zawodny. + * + * By putting all our data here, we remove "fallback" from being a special + * case. We may find ourselves with several IP addresses, but if the user + * disables fallback, we set the IP address count to one. Now the connect + * code just loops over that same address. + */ +#define TRANSPORT_LOCALHOST 0x01 /* TCP to localhost only */ +#define TRANSPORT_TCP 0x02 /* standard TCP socket */ +#define TRANSPORT_UNIX 0x03 /* UNIX domain socket */ + +struct transport { + int type; + + const char *socketpath; /* for UNIX dommain socket */ + const char *hostname; /* for TCP sockets */ + + unsigned short port; /* for TCP sockets */ + + struct in_addr hosts[256]; + int nhosts; +}; + +extern void transport_init(struct transport *tp); +extern int transport_setup(struct transport *tp, int flags); + /* Aug 14, 2002 bj: New interface functions */ /* Read in a message from the fd, with the mode specified in the flags. @@ -84,31 +130,13 @@ * the "score/threshold" line. */ long message_write(int out_fd, struct message *m); -/* Pass the message through spamd (at addr) as the specified user, with the - * given flags. Returns EX_OK on success, or various errors on error. If it was - * successful, message_write will print either the CHECK_ONLY output, or the - * filtered message in the appropriate output format. */ -int message_filter(const struct sockaddr *addr, char *username, int flags, struct message *m); - -/* Convert the host/port into a struct sockaddr. Returns EX_OK on success, or - * else an error EX. */ -int lookup_host(const char *hostname, int port, struct sockaddr *a); - -/* Pass the message through one of a set of spamd's. This variant will handle - * multiple spamd machines; if a connect failure occurs, it will fail-over to - * the next one in the struct hostent. Otherwise identical to message_filter(). +/* Process the message through the spamd filter, making as many connection + * attempts as are implied by the transport structure. To make this do + * failover, more than one host is defined, but if there is only one there, + * no failover is done. */ -int message_filter_with_failover (const struct hostent *hent, int port, char - *username, int flags, struct message *m); - -/* Convert the host into a struct hostent, for use with - * message_filter_with_failover() above. Returns EX_OK on success, or else an - * error EX. Note that the data filled into hent is from gethostbyname()'s - * static storage, so any call to gethostbyname() between - * lookup_host_for_failover() and message_filter_with_failover() will overwrite - * this. Take a copy, and use that instead, if you think a call may occur in - * your code, or library code that you use (such as syslog()). */ -int lookup_host_for_failover(const char *hostname, struct hostent *hent); +int message_filter(struct transport *tp, const char *username, + int flags, struct message *m); /* Dump the message. If there is any data in the message (typically, m->type * will be MESSAGE_ERROR) it will be message_writed. Then, fd_in will be piped @@ -119,14 +147,14 @@ /* Do a message_read->message_filter->message_write sequence, handling errors * appropriately with dump_message or appropriate CHECK_ONLY output. Returns * EX_OK or EX_ISSPAM/EX_NOTSPAM on success, some error EX on error. */ -int message_process(const char *hostname, int port, char *username, int max_size, int in_fd, int out_fd, const int flags); +int message_process(struct transport *trans, char *username, int max_size, int in_fd, int out_fd, const int flags); /* Cleanup the resources we allocated for storing the message. Call after * you're done processing. */ void message_cleanup(struct message *m); /* Aug 14, 2002 bj: This is now legacy, don't use it. */ -int process_message(const char *hostname, int port, char *username, +int process_message(struct transport *tp, char *username, int max_size, int in_fd, int out_fd, const int check_only, const int safe_fallback); --- spamd/spamc.c.orig 2003-04-25 19:59:42.000000000 +0000 +++ spamd/spamc.c 2003-04-25 03:01:55.000000000 +0000 @@ -79,17 +79,31 @@ printf("-u username: specify the username for spamd to process this message under\n"); printf("-x: don't fallback safely - in a comms error, exit with a TEMPFAIL error code\n"); printf("-t: timeout in seconds to read from spamd. 0 disables. [default: 600]\n\n"); + printf("-H: randomize the IP addresses in the looked-up hostname\n"); + printf("-U path: use UNIX domain socket with path\n"); } int -read_args(int argc, char **argv, char **hostname, int *port, int *max_size, char **username) +read_args(int argc, char **argv, int *max_size, const char **username, + struct transport *ptrn) { int opt, i, j; - while(-1 != (opt = getopt(argc,argv,"-BcrRd:e:fhyp:t:s:u:xS"))) + while(-1 != (opt = getopt(argc,argv,"-BcrRd:e:fhyp:t:s:u:xSHU:"))) { switch(opt) { + case 'H': + { + flags |= SPAMC_RANDOMIZE_HOSTS; + break; + } + case 'U': + { + ptrn->type = TRANSPORT_UNIX; + ptrn->socketpath = optarg; + break; + } case 'B': { flags = (flags & ~SPAMC_MODE_MASK) | SPAMC_BSMTP_MODE; @@ -117,7 +131,8 @@ } case 'd': { - *hostname = optarg; /* fix the ptr to point to this string */ + ptrn->type = TRANSPORT_TCP; + ptrn->hostname = optarg; /* fix the ptr to point to this string */ break; } case 'e': @@ -132,7 +147,7 @@ } case 'p': { - *port = atoi(optarg); + ptrn->port = atoi(optarg); break; } case 'f': @@ -220,43 +235,67 @@ } int main(int argc, char **argv){ - int port = 783; int max_size = 250*1024; - char *hostname = (char *) "127.0.0.1"; - char *username = NULL; - struct passwd *curr_user; - struct hostent hent; + const char *username = NULL; int ret; struct message m; int out_fd; + struct transport trans; + + transport_init(&trans); openlog ("spamc", LOG_CONS|LOG_PID, LOG_MAIL); signal (SIGPIPE, SIG_IGN); - read_args(argc,argv,&hostname,&port,&max_size,&username); + read_args(argc,argv, &max_size, &username, &trans); + /*-------------------------------------------------------------------- + * DETERMINE USER + * + * If the program's caller didn't identify the user to run as, use the + * current user for this. Note that we're not talking about UNIX perm- + * issions, but giving SpamAssassin a username so it can do per-user + * configuration (whitelists & the like). + * + * Since "curr_user" points to static library data, we don't wish to risk + * some other part of the system overwriting it, so we copy the username + * to our own buffer - then this won't arise as a problem. + */ + if(NULL == username) { + static char userbuf[256]; + struct passwd *curr_user; + curr_user = getpwuid(geteuid()); if (curr_user == NULL) { perror ("getpwuid failed"); if(flags&SPAMC_CHECK_ONLY) { printf("0/0\n"); return EX_NOTSPAM; } else { return EX_OSERR; } } - username = curr_user->pw_name; + memset(userbuf, 0, sizeof userbuf); + strncpy(userbuf, curr_user->pw_name, sizeof userbuf - 1); + username = userbuf; } - out_fd=-1; - m.type=MESSAGE_NONE; + /*-------------------------------------------------------------------- + * SET UP TRANSPORT + * + * This takes the user parameters and digs up what it can about how + * we connect to the spam daemon. Mainly this involves lookup up the + * hostname and getting the IP addresses to connect to. + */ + if ( (ret = transport_setup(&trans, flags)) != EX_OK ) + goto FAIL; - ret=lookup_host_for_failover (hostname, &hent); - if(ret!=EX_OK) goto FAIL; + out_fd=-1; + m.type = MESSAGE_NONE; m.max_len = max_size; m.timeout = timeout; ret=message_read(STDIN_FILENO, flags, &m); if(ret!=EX_OK) goto FAIL; - ret=message_filter_with_failover(&hent, port, username, flags, &m); + ret=message_filter(&trans, username, flags, &m); if(ret!=EX_OK) goto FAIL; get_output_fd(&out_fd); if(message_write(out_fd, &m)<0) goto FAIL; @@ -268,7 +307,7 @@ FAIL: get_output_fd(&out_fd); if(flags&SPAMC_CHECK_ONLY || flags&SPAMC_REPORT || flags&SPAMC_REPORT_IFSPAM){ - full_write(out_fd, (unsigned char *) "0/0\n", 4); + full_write(out_fd, "0/0\n", 4); return EX_NOTSPAM; } else { message_dump(STDIN_FILENO, out_fd, &m); --- spamd/utils.c.orig 2003-04-25 19:59:53.000000000 +0000 +++ spamd/utils.c 2003-04-25 02:04:28.000000000 +0000 @@ -22,6 +22,7 @@ /* Aug 14, 2002 bj: EINTR and EAGAIN aren't fatal, are they? */ /* Aug 14, 2002 bj: moved these to utils.c */ /* Jan 13, 2003 ym: added timeout functionality */ +/* Apr 24, 2003 sjf: made full_read and full_write void* params */ /* -------------------------------------------------------------------------- */ @@ -38,7 +39,7 @@ } static void catch_alrm(int x) { - /* dummy */ + UNUSED_VARIABLE(x); } ssize_t @@ -75,6 +76,12 @@ int nred; sigfunc* sig; +#ifndef SPAMC_SSL + UNUSED_VARIABLE(ssl); + UNUSED_VARIABLE(buf); + UNUSED_VARIABLE(nbytes); +#endif + sig = sig_catch(SIGALRM, catch_alrm); if (libspamc_timeout > 0) { alarm(libspamc_timeout); @@ -104,8 +111,9 @@ /* -------------------------------------------------------------------------- */ int -full_read (int fd, unsigned char *buf, int min, int len) +full_read (int fd, void *vbuf, int min, int len) { + unsigned char *buf = (unsigned char *)vbuf; int total; int thistime; @@ -126,8 +134,9 @@ } int -full_write (int fd, const unsigned char *buf, int len) +full_write (int fd, const void *vbuf, int len) { + const unsigned char *buf = (const unsigned char *)vbuf; int total; int thistime; --- spamd/utils.h.orig 2003-04-25 20:00:02.000000000 +0000 +++ spamd/utils.h 2003-04-25 01:57:26.000000000 +0000 @@ -1,6 +1,8 @@ #ifndef UTILS_H #define UTILS_H +#define UNUSED_VARIABLE(v) ((void)(v)) + extern int libspamc_timeout; /* default timeout in seconds */ #ifdef SPAMC_SSL @@ -18,7 +20,7 @@ int ssl_timeout_read (SSL *ssl, void *, int ); /* these are fd-only, no SSL support */ -int full_read(int fd, unsigned char *buf, int min, int len); -int full_write(int fd, const unsigned char *buf, int len); +int full_read(int fd, void *buf, int min, int len); +int full_write(int fd, const void *buf, int len); #endif --- spamd/spamd.raw.orig 2003-04-25 20:00:22.000000000 +0000 +++ spamd/spamd.raw 2003-04-25 22:17:24.000000000 +0000 @@ -81,6 +81,7 @@ my @OLD_ARGV = @ARGV; # Getopt::Long tends to clear @ARGV Getopt::Long::Configure ("bundling"); GetOptions( + 'socketpath=s' => \$opt{'socketpath'}, 'auto-whitelist|whitelist|a' => \$opt{'auto-whitelist'}, 'create-prefs!' => \$opt{'create-prefs'}, 'c' => \$opt{'create-prefs'}, @@ -130,12 +131,32 @@ $opt{'help'} and pod2usage(-exitval => $resphash{'EX_USAGE'}, -verbose => 0, -message => 'For more details, use "man spamd"'); -# These can be changed on command line with -A flag -if(@{$opt{'allowed-ip'}}) { - set_allowed_ip(split /,/, join(',', @{$opt{'allowed-ip'}})); +# sanity checking on parameters: if --socketpath is used, it means that we're using +# UNIX domain sockets, none of the IP params are allowed. The code would probably +# work ok if we didn't check it, but it's better if we detect the error and report +# it lest the admin find surprises. +# + +if ( defined $opt{'socketpath'} + and ( @{$opt{'allowed-ip'}} ) + or defined $opt{'ssl'} + or defined $opt{'auth-ident'} + or defined $opt{'port'} ) +{ + die "ERROR: --socketpath mutually exclusive with --allowed-ip/--ssl/--port params\n"; } -else { - set_allowed_ip('127.0.0.1'); + + +# These can be changed on command line with -A flag +# but only if we're not using UNIX domain sockets +if ( not defined $opt{'socketpath'} ) +{ + if(@{$opt{'allowed-ip'}}) { + set_allowed_ip(split /,/, join(',', @{$opt{'allowed-ip'}})); + } + else { + set_allowed_ip('127.0.0.1'); + } } # ident-based spamc user authentication @@ -217,15 +238,59 @@ } } -my $port = $opt{'port'} || 783; -my $addr = (gethostbyname($opt{'listen-ip'} || '127.0.0.1'))[0]; -my $proto = getprotobyname('tcp'); +my($port, $addr, $proto); +my($listeninfo); # just for reporting -($port) = $port =~ /^(\d+)$/ or die "invalid port\n"; +if ( defined $opt{'socketpath'} ) +{ + $listeninfo = "UNIX domain socket " . $opt{'socketpath'}; +} +else +{ + $port = $opt{'port'} || 783; + $addr = (gethostbyname($opt{'listen-ip'} || '127.0.0.1'))[0]; + $proto = getprotobyname('tcp'); + + ($port) = $port =~ /^(\d+)$/ or die "invalid port\n"; + + $listeninfo = "port $port/tcp"; +} # Be a well-behaved daemon my $server; -if ($opt{'ssl'}) { +if ( $opt{'socketpath'} ) { + my $path = $opt{'socketpath'}; + + #--------------------------------------------------------------------- + # see if the socket is in use: if we connect to the current socket, it + # means that spamd is already running, so we have to bail on our own. + # Yes, there is a window here: best we can do for now. There is almost + # certainly a better way, but we don't know it. Yet. + + if ( -e $path ) + { + if ( new IO::Socket::UNIX(Peer => $path, Type => SOCK_STREAM) ) + { + # we connected successfully: must alreadybe running + + undef $opt{'socketpath'}; # so exit handlers won't unlink it! + + die "spamd already running on $path, exiting\n"; + } + else + { + unlink $path; + } + } + + $server = new IO::Socket::UNIX(Local => $path, + Type => SOCK_STREAM, + Listen => SOMAXCONN + ) || die "Could not create UNIX socket on $path: $! $@\n"; + + chmod 0666, $path; # make sure everybody can talk to it +} +elsif ($opt{'ssl'}) { $server = new IO::Socket::SSL(LocalAddr => $addr, LocalPort => $port, Proto => $proto, @@ -265,6 +330,12 @@ chown $uuid, -1, $opt{'pidfile'} || die "fatal: could not chown '$opt{'pidfile'}' to uid $uuid\n"; } + + # ditto with the socket file + if (defined $opt{'socketpath'}) { + chown $uuid, -1, $opt{'socketpath'} + || die "fatal: could not chown '$opt{'socketpath'}' to uid $uuid\n"; + } # Change GID $) = "$ugid $ugid"; # effective gid @@ -310,12 +381,13 @@ # now allow waiting processes to connect, if they're watching the log. # The test suite does this! + if ($opt{'debug'}) { - warn "server started on port $port (running version ". Mail::SpamAssassin::Version().")\n"; + warn "server started on $listeninfo (running version ". Mail::SpamAssassin::Version().")\n"; warn "server pid: $$\n"; } -logmsg("server started on port $port (running version ". Mail::SpamAssassin::Version(). ")"); +logmsg("server started on $listeninfo (running version ". Mail::SpamAssassin::Version(). ")"); my $current_user; my $client; @@ -396,16 +468,21 @@ my $start = time; - my($port, $ip) = sockaddr_in($client->peername); - my $name = gethostbyaddr($ip, AF_INET); + if ( $opt{'socketpath'} ) + { + logmsg("got connection over " . $opt{'socketpath'}); + } else { + my($port, $ip) = sockaddr_in($client->peername); + my $name = gethostbyaddr($ip, AF_INET); - $ip = inet_ntoa($ip); - if (ip_is_allowed($ip)) { + $ip = inet_ntoa($ip); + if (ip_is_allowed($ip)) { logmsg("connection from $name [$ip] at port $port"); - } else { + } else { logmsg("unauthorized connection from $name [$ip] at port $port"); $client->close; next; + } } spawn sub { @@ -1015,6 +1092,9 @@ logmsg "server killed by SIG$sig, shutting down"; $server->close; defined($opt{'pidfile'}) and unlink($opt{'pidfile'}); + + # the UNIX domain socket + defined($opt{'socketpath'}) and unlink($opt{'socketpath'}); exit 0; } @@ -1025,6 +1105,9 @@ if (!eof Server) { shutdown (Server, 2); close Server; + + # the UNIX domain socket + defined($opt{'socketpath'}) and unlink($opt{'socketpath'}); } $got_sighup = 1; } @@ -1164,6 +1247,7 @@ --ssl Run an SSL server --server-key keyfile Specify an SSL keyfile --server-cert certfile Specify an SSL certificate + --socketpath=path Listen on given UNIX domain socket =head1 DESCRIPTION @@ -1432,6 +1516,10 @@ Specify the SSL certificate file to use for SSL connections. +=item B<--socketpath> I + +Listen on UNIX domain path I instead of a TCP socket. + =back =head1 BUGS --- spamd/spamc.pod.orig 2003-04-25 20:59:18.000000000 +0000 +++ spamd/spamc.pod 2003-04-25 21:03:11.000000000 +0000 @@ -135,6 +135,17 @@ EX_NOPERM 77 permission denied EX_CONFIG 78 configuration error +=item B<-U> I + +Connect to C via UNIX domain socket I instead of a +TCP/IP connection. + +=item B<-H> + +For TCP/IP sockets, randomize the IP addresses returned from a DNS name +lookup (when more than one IP is returned). This provides for a kind of +hostname-base load balancing. + =back =head1 DESCRIPTION