/* Distributed Checksum Clearinghouse database definitions
 *
 * Copyright (c) 2005 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC and permission to use
 * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/
 * or by email to nospam@commtouch.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.42-1.83 $Revision$
 */

#ifndef DB_H
#define DB_H

#include "srvr_defs.h"
#include <math.h>

extern u_char grey_on;

#define DB_DCC_NAME	"dcc_db"
#define DB_GREY_NAME	"grey_db"
#define DB_HASH_SUFFIX  ".hash"
#define DB_LOCK_SUFFIX	".lock"

#define WHITELIST_NM(g)	    ((g) ? "grey_whitelist" : "whitelist")

#define DB_VERSION3_STR "DCC checksum database version 3"
#define DB_VERSION_STR  DB_VERSION3_STR
#define DB_VERSION      3

/* because the hash table magic string fits in the first slot,
 * it must be at most 12 bytes */
#define MAX_HASH_ENTRIES    (0x7fffffff/sizeof(HASH_ENTRY))
#define DB_HASH_MAGIC   "DCC hash B4"

#define MIN_HASH_ENTRIES    512		/* minimum # size of hash table */
#define DEF_HASH_ENTRIES    (6*1024*1024)


/* primitive multiplicative hash function */
static inline u_int
mhash(u_int n, u_int nbins)
{
#define GR_HEX  0x9ccf9319		/* golden mean */
#define GR_FLOAT 0.618033988749894848204586834365
	double v = n;

	v *= GR_FLOAT;
	v -= floor(v);
	v *= nbins;
	return floor(v);
}


#define DB_CP3(x,v) do {u_int32_t _v = v; (x)[0] = _v>>16;		\
    (x)[1] = _v>>8; (x)[2] = _v;} while (0)
#define DB_CP4(x,v) do {u_int32_t _v = v; (x)[0] = _v>>24;		\
    (x)[1] = _v>>16; (x)[2] = _v>>8; (x)[3] = _v;} while (0)
#define DB_EX3(x) ((((u_int32_t)(x)[0])<<16) + ((x)[1]<<8) + (x)[2])
#define DB_EX4(x) ((((u_int32_t)(x)[0])<<24) + ((x)[1]<<16)		\
		   + ((x)[2]<<8) + (x)[3])
/* the least significant byte should be tested first */
#define DB_ZERO3(x) ((x)[2] == 0 && (x)[1] == 0 && (x)[0] == 0)
#define DB_ZERO4(x) ((x)[3] == 0 && (x)[2] == 0 && (x)[1] == 0 && (x)[0] == 0)


/* a single checksum in a database record */
typedef u_char      DB_TGTS[3];		/* a compressed count */
typedef u_int64_t   DB_PTR;		/* database record offset */
typedef u_int32_t   DB_PTR_C;		/*      compressed by DB_PTR_CP() */
typedef struct {
    DB_PTR_C    prev;			/* previous record for this checksum */
    DB_TGTS     tgts;			/* accumulated reported targets */
    DCC_CK_TYPE_B type_fgs;
#    define      DB_CK_FG_OBS	0x80    /* obsolete report of a checksum */
#    define	 DB_CK_FG_DUP	0x40	/* duplicate report */
#    define	 DB_CK_MASK	0x0f
#    define	 DB_CK_OBS(ck)	((ck)->type_fgs & DB_CK_FG_OBS)
#    define	 DB_CK_DUP(ck)	((ck)->type_fgs & DB_CK_FG_DUP)
#    define      DB_CK_TYPE(ck)	((DCC_CK_TYPES)((ck)->type_fgs & DB_CK_MASK))
    DCC_SUM     sum;
} DB_RCD_CK;
#define DB_TGTS_CK_SET(ck,v) DB_CP3((ck)->tgts,v)
#define DB_TGTS_CK(ck) DB_EX3((ck)->tgts)

/* shape of a checksum database entry */
typedef struct {
    DCC_TS      ts;			/* original server's creation date */
    DCC_SRVR_ID srvr_id_auth;		/* initial server & client auth bit */
#    define	 DB_RCD_ID(r)	((r)->srvr_id_auth & ~DCC_SRVR_ID_AUTH)
    DB_TGTS     tgts_del;		/* # target addresses or delete flag */
    u_char      fgs_num_cks;		/* # of cksums | flags */
#    define      DB_RCD_FG_TRIM	    0x80    /* some checksums deleted */
#    define	 DB_RCD_FG_SUMRY    0x40    /* fake summary record */
#    define	 DB_RCD_FG_DELAY    0x20    /* delayed for fake summary */
#    define	 DB_RCD_TRIMMED(r)  ((r)->fgs_num_cks & DB_RCD_FG_TRIM)
#    define	 DB_RCD_SUMRY(r)    ((r)->fgs_num_cks & DB_RCD_FG_SUMRY)
#    define	 DB_RCD_DELAY(r)    ((r)->fgs_num_cks & DB_RCD_FG_DELAY)
#    define      DB_NUM_CKS(r)	    ((r)->fgs_num_cks & DB_CK_MASK)
    DB_RCD_CK   cks[DCC_NUM_CKS];
} DB_RCD;

#define DB_RCD_HDR_LEN (ISZ(DB_RCD) - ISZ(DB_RCD_CK)*DCC_NUM_CKS)
#define DB_RCD_LEN(r) (DB_RCD_HDR_LEN + DB_NUM_CKS(r) * ISZ(DB_RCD_CK))

#define DB_TGTS_RCD_SET(r,v) DB_CP3((r)->tgts_del,v)
#define DB_TGTS_RCD_RAW(r) DB_EX3((r)->tgts_del)
static inline DCC_TGTS
DB_TGTS_RCD(const DB_RCD *r)
{
	DCC_TGTS e = DB_TGTS_RCD_RAW(r);
	return e == DCC_TGTS_DEL ? 0 : e;
}

/* this allows database of up to 48 GBytes */
#define DB_PTR_MULT	    ((DB_PTR)12)    /* gcd of all sizes of DB_RCD */
#define DB_PTR_CP(v)	    ((u_int32_t)((v) / DB_PTR_MULT))
#define DB_PTR_EX(x)	    ((x) * DB_PTR_MULT)

#define DB_PTR_NULL	    0
#define DB_PTR_BASE	    ISZ(DB_HDR)
#define DB_PTR_MAX	    DB_PTR_EX((((DB_PTR)1)<<(sizeof(DB_PTR_C)*8)) -1)
#define DB_PTR_BAD	    (DB_PTR_MAX+1)
#define DB_PTR_IS_BAD(l)    ((l) < DB_PTR_BASE || (l) >= DB_PTR_MAX)
#define DB_PTR_IS_BAD_FULL(l) (DB_PTR_IS_BAD(l) || (l)%DB_PTR_MULT != 0)


typedef DCC_TS DB_SN;			/* database serial number */

/* information for expiring database entries */
#define DB_EXPIRE_SECS_DEF	(24*60*60)	/* 1 day non-spam expiration */
#define DB_EXPIRE_SECS_DEF_MIN  (4*60*60)	/* 4 hours */
#define DB_EXPIRE_SECS_MAX	DCC_MAX_SECS	/* forever */
#define DB_EXPIRE_SECS_MIN	(60*60)		/* 1 hour */
#define DB_EXPIRE_SPAMSECS_DEF  (30*24*60*60)	/* 30 days */
#define DB_EXPIRE_SPAMSECS_DEF_MIN (3*24*60*60)	/* 3 days */
#define DCC_NEW_SPAM_SECS   (23*60*60)	/* announce spam at least this often */

/* seconds to greylist or delay new mail messages
 *  RFC 2821 says SMTP clients should wait at least 30 minutes to retry,
 *  but 15 minutes seems more common than 30 minutes.  Many retry after
 *  only 5 minutes, and some after only 1 (one!) second.  However,
 *  many of those that retry after a few seconds keep trying for a minute
 *  or two. */
#define DEF_GREY_EMBARGO    270
#define MAX_GREY_EMBARGO    (24*60*60)

#define DEF_GREY_WINDOW	    (7*24*60*60)    /* wait as long as this */
#define DEF_GREY_WHITE	    (63*24*60*60)   /* remember this long */


typedef struct {
    DCC_TS      all;			/* older reports are stale */
} DB_EX_TS[DCC_DIM_CKS];
typedef struct {			/* -t type,allsecs,threshold,spamsecs */
    DCC_TGTS	clean_thold;		/* threshold */
    int32_t	all;			/* allsecs */
    int32_t	spam;			/* spamsecs */
} DB_EX_SEC;
typedef DB_EX_SEC DB_EX_SECS[DCC_DIM_CKS];

typedef DCC_TGTS DB_FLOD_THOLDS[DCC_DIM_CKS];	/* dccd -t */

#define DCC_CK_OK_GREY_CLNT(t) ((t) > DCC_CK_INVALID			    \
				&& t <= DCC_CK_G_TRIPLE_R_BULK)
#define DCC_CK_OK_GREY_FLOD(t) ((t) == DCC_CK_BODY			    \
				|| ((t) >= DCC_CK_G_MSG_R_TOTAL		    \
				    && (t) <= DCC_CK_FLOD_PATH)		    \
				|| ((t) == DCC_CK_IP && grey_weak_ip))

/* flood server-ID declarations immediately, body checksums by the bulk
 * threshold, and nothing else */
#define DEF_FLOD_THOLDS(g,t) ((g) ? 1					    \
			      : DCC_CK_IS_BODY(t) ? DEF_BULK_THRESHOLD	    \
			      : t == DCC_CK_SRVR_ID ? 1			    \
			      : DCC_TGTS_INVALID)

#define DCC_CK_OK_DCC_CLNT(g,t) ((t) > DCC_CK_INVALID			    \
				 && (t) <= DCC_CK_G_TRIPLE_R_BULK	    \
				 && ((g)|| (t) <= DCC_CK_FUZ2))
#define DCC_CK_OK_DB(g,t) ((t) > DCC_CK_INVALID && t <= DCC_CK_TYPE_LAST    \
			   && ((g) || ((t) != DCC_CK_G_MSG_R_TOTAL	    \
				       && (t) != DCC_CK_G_TRIPLE_R_BULK)))
#define DCC_CK_OK_FLOD(g,t) ((g) ? DCC_CK_OK_GREY_FLOD(t)		    \
			     : ((t) > DCC_CK_INVALID			    \
				&& ((t) <= DCC_CK_FUZ2			    \
				    || (t) == DCC_CK_FLOD_PATH		    \
				    || (t) == DCC_CK_SRVR_ID)))


typedef u_int32_t DB_NOKEEP_CKS;	/* bitmask of ignored checksums */
#define DB_SET_NOKEEP(map,t)	((map) |= (1<<(t)))
#define DB_RESET_NOKEEP(map,t)	((map) &= ~(1<<(t)))
#define DB_TEST_NOKEEP(map,t)	((map) & (1<<(t)))

/* relative fuzziness of checksums
 *	Should this be configurable? */
#define DCC_CK_FUZ_LVL_NO   1		/* least fuzzy */
#define DCC_CK_FUZ_LVL1	    2		/* somewhat fuzzy */
#define DCC_CK_FUZ_LVL2	    3		/* fuzzier */
#define DCC_CK_FUZ_LVL3	    4		/* reputations */
#define DCC_CK_FUZ_LVL_REP  DCC_CK_FUZ_LVL3
extern const u_char *db_ck_fuzziness;

/* shape of the magic string that starts a database */
typedef char DB_VERSION_BUF[64];
typedef struct {
    DB_VERSION_BUF version;
    DB_PTR      db_csize;		/* size of database contents in bytes */
    u_int32_t   page_size;		/* size of 1 DB buffer */
    DB_SN       sn;			/* creation or expiration serial # */
    DB_EX_TS    ex_ts;			/* non-spam older than this expired */
    DB_EX_SECS  ex_secs;		/* recent expiration thresholds */
    DB_NOKEEP_CKS nokeep_cks;		/* ignore these checksums */
    DB_FLOD_THOLDS flod_tholds;
    u_int	    flags;
#    define DB_PARM_FG_GREY	    0x01    /* greylist database */
#    define DB_PARM_FG_SELF_CLEAN   0x02    /* last dbclean -R or dccd started*/
#    define DB_PARM_FG_SELF_CLEAN2  0x04    /* preceding -R or dccd started */
#    define DB_PARM_FG_CLEARED	    0x08    /* new file */
} DB_PARMS;
typedef union {
    DB_PARMS	p;
    char	c[256*3];
} DB_HDR;
#define DB_HDR_DEF {{DB_VERSION_STR, DB_PTR_BASE, 0, {0},	\
    {{{0}}}, {{0,0,0}}, 0, {0}, DB_PARM_FG_CLEARED}}


/* shape of a database hash table entry */
typedef u_int32_t DB_HADDR;		/* index of a hash table entry */
typedef u_char DB_HADDR_C[4];		/* compressed hash chain link */
#define DB_HADDR_CP(x,v)    DB_CP4(x,v)
#define DB_HADDR_EX(x)      DB_EX4(x)
#define DB_HADDR_NULL       0		/* no-answer from hashing & linking */
#define DB_HADDR_C_NULL(x)  DB_ZERO4(x)
#define DB_HADDR_INVALID(h) ((h) < DB_HADDR_MIN || (h) >= db_hash_len)
#define DB_HADDR_C_INVALID(h) DB_HADDR_INVALID(DB_HADDR_EX(h))

typedef u_char DB_PTR_HC[4];
#define DB_HPTR_CP(x,v) {u_int32_t _v = DB_PTR_CP( v);			\
    (x)[0] = _v>>24; (x)[1] = _v>>16; (x)[2] = _v>>8; (x)[3] = _v;}
#define DB_HPTR_EX(x) DB_PTR_EX(((x)[0]<<24) + ((x)[1]<<16)		\
				+ ((x)[2]<<8) + (x)[3])

typedef struct {
    DB_HADDR_C  fwd, bak;		/* hash collision chain */
    u_char	hv_type[2];		/* checksum type + some hash bits */
#    define	 HE_TYPE(e)	((DCC_CK_TYPES)((e)->hv_type[0] & 0xf))
#    define	 HE_IS_FREE(e)	((e)->hv_type[0] == 0)
#    define	 HE_MERGE(e,t,s) ((e)->hv_type[0] = ((((s)[0])<<4)+t),	\
				  (e)->hv_type[1] = (s)[1])
#    define	 HE_CMP(e,t,s)	((e)->hv_type[1] == (s)[1]		\
				 && (e)->hv_type[0] ==(u_char)((((s)[0])<<4)+t))

    DB_PTR_HC   rcd;			/* record for this hash table entry */
} HASH_ENTRY;


/* The initial entries in the hash table are (mis)used for other things */
#define DB_HADDR_MAGIC      0		/* contains DB_HASH_MAGIC */
#define DB_HADDR_FREE       1		/* hash table internal free list */
#define DB_HADDR_SIZES      2
#define HASH_STORE_DB_CSIZE rcd		/* size of the database file */
#define HASH_STORE_LEN      fwd		/* size of file in entries */
#define HASH_STORE_USED	    bak		/* entries actually used */

#define DB_HADDR_MIN	    (DB_HADDR_SIZES+1)	/* 1st real entry */
#define HASH_LEN_EXT(l)     ((l)-DB_HADDR_MIN)	/* convert offset to length */


/* control a block of mapped memory */
typedef u_int16_t DB_PG_NUM;
typedef u_int32_t DB_PG_OFF;
typedef enum {
    DB_BUF_TYPE_FREE = 0,
    DB_BUF_TYPE_HASH,
    DB_BUF_TYPE_DB
} DB_BUF_TYPE;
typedef struct db_buf {
    struct db_buf *fwd, *bak, **hash;
    struct db_buf *older, *newer;
    union {
	void	    *v;
	HASH_ENTRY  *h;
	char	    *c;
    } buf;
    DB_PG_NUM	pg_num;
    int		lock_cnt;
    DB_BUF_TYPE	type;
    u_char	flags;
#    define	 DB_BUF_FG_MSYNC    0x01    /* need msync() for safety */
#    define	 DB_BUF_FG_DIRTY    0x02    /* buffer possibly changed */
#    define	 DB_BUF_FG_NO_MMAP  0x04    /* use read() and write() */
#    define	 DB_BUF_FG_ANON	    0x08
} DB_BUF;

/* context for search for or adding a record */
typedef struct {
    union {
	void	    *p;
	HASH_ENTRY  *h;
	char	    *c;
	DB_RCD      *r;
	DB_PARMS    *parms;
    } d;
    union {
	DB_HADDR    haddr;
	DB_PTR      rptr;
    } s;
    DB_BUF      *b;
} DB_STATE;

/* see db_close() before changing this */
typedef struct {
    DB_STATE	rcd;			/* must be first */
    DB_STATE	rcd2;
    DB_STATE	sumrcd;
    DB_STATE	hash;
    DB_STATE	free;
    DB_STATE	tmp;
    DB_STATE	db_parms;
    DB_STATE	hash_ctl;		/* hash control info; must be last */
} DB_STATES;
extern DB_STATES db_sts;

extern int db_failed_line;
extern const char *db_failed_file;
extern int db_fd, db_hash_fd;
extern DCC_PATH db_nm, db_hash_nm;
extern struct timeval db_locked;	/* 0 or when database was locked */
extern int db_debug;
extern DB_SN db_sn;
extern DB_HADDR db_hash_len;		/* # of hash table entries */
extern DB_HADDR db_hash_used;		/* # of hash table entries in use */
extern u_int db_hash_page_len;		/* # of HASH_ENTRY's per buffer */
extern DB_HADDR db_max_hash_entries;	/* max size of hash table */
extern DB_PTR db_csize;			/* size of database contents in bytes */
extern DB_PARMS db_parms;
extern u_int db_page_size;		/* size of 1 DB buffer */
extern DB_PG_NUM db_end_pg_num;		/* active end of the database */
extern u_int db_page_max;		/* only padding after this */
extern char db_window_size[32];		/* size of mmap() window */


typedef struct {
    u_int   db_mmaps;
    u_int   hash_mmaps;
    u_int   adds;			/* reports added */
} DB_STATS;
extern DB_STATS db_stats;


/* If the two files were smaller than the typical mmap() limit of a fraction
 * of a GByte, they could be mmap()'ed directly.  In some cases they
 * are too large.
 *
 * Use a modest pool of buffers to map the DB hash table and the database
 * itself.
 * Each access to the files could be with a single, common buffer,
 * but that would involve many more mmap() system calls.
 * Most of the DB hash table is expected to fit in the application's memory.
 *
 * Use the same modest pool of buffers to map the database itself.
 * References to the database have a lot of locality, so the commonly used
 * checksums and counts should remain in memory.
 *
 * Common operating system limits on the number of mapped segments are
 * below 256 and so that is a bound on DB_BUF_MAX */
#define DB_BUF_MAX 128			/* maximum # of buffers */
#define DB_BUF_MIN (sizeof(DB_STATES)/sizeof(DB_STATE) + 2)
extern int db_buf_total;		/* total # of db buffers */
extern DB_PTR db_max_rss;		/* maximum db resident set size */


/* srvr/db.c */
extern struct timeval db_time;
#define DB_IS_TIME(tgt,lim) DCC_IS_TIME(db_time.tv_sec,tgt,lim)
extern u_char db_unload(DCC_EMSG, u_char);
extern u_char db_close(int);
extern int db_get_page_size(u_int, u_int);
extern u_char db_buf_init(u_int);
extern u_char db_open(DCC_EMSG, const char *, DB_HADDR, u_char);
/*	check these if they blow out of the u_char */
# define DB_OPEN_RDONLY		0x01
# define DB_OPEN_LOCK_WAIT	0x02	/* wait to get lock */
# define DB_OPEN_LOCK_NOWAIT	0x04	/* get lock but don't wait */
# define DB_OPEN_LOCK_EXT	0x08	/* already locked */
# define DB_OPEN_NO_MMAP	0x10    /* use read() and write() */
extern u_char db_flush_parms(DCC_EMSG );
#define DB_IS_LOCKED() (db_locked.tv_sec != 0)
extern int db_lock(void);
extern u_char db_unlock(void);
extern u_char db_sync_some(int);
extern DCC_TGTS db_sum_ck(DCC_TGTS, DCC_TGTS, DCC_CK_TYPES);
extern DB_NOKEEP_CKS def_nokeep_cks(void);
extern u_char db_extend(DCC_EMSG, int, const char *, DB_PTR, DB_PTR);
extern u_char db_map_rcd(DCC_EMSG, DB_STATE *, DB_PTR, u_int *);
extern DB_RCD_CK *db_find_ck(DCC_EMSG, DB_RCD *, DB_PTR, DCC_CK_TYPES);
extern DB_RCD_CK *db_map_rcd_ck(DCC_EMSG, DB_STATE *, DB_PTR, DCC_CK_TYPES);
extern DB_HADDR db_hash(DCC_CK_TYPES, const DCC_SUM);
typedef enum {
    DB_FOUND_SYSERR=0,			/* fatal error */
    DB_FOUND_LATER,			/* out of specified hash table range */
    DB_FOUND_IT,
    DB_FOUND_EMPTY,			/* home slot empty */
    DB_FOUND_CHAIN,			/* not in chain--have last entry */
    DB_FOUND_INTRUDER			/* intruder in home slot */
} DB_FOUND;
extern DB_FOUND db_lookup(DCC_EMSG, DCC_CK_TYPES, const DCC_SUM,
			  DB_HADDR, DB_HADDR, DB_STATE *,
			  DB_STATE *, DB_RCD_CK **);
extern u_char db_link_rcd(DCC_EMSG, DB_HADDR, DB_HADDR);
extern DB_PTR db_add_rcd(DCC_EMSG, DB_RCD *);

#endif /* DB_H */
