/*
 *  Copyright (C) 2000 Marco Pesenti Gritti
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include "galeon.h"

/*
 *  AutoBookmarks
 *
 *  (c) Matthew Aubury 2000
 *
 *  This code implements "AutoBookmarks", bookmarks which are generated
 *  automatically by scanning and scoring the user's history file.
 *  In theory, these represent the most commonly used sites and are
 *  handy for particularly lazy users who otherwise don't get round
 *  to making bookmarks (really, I've seen a lot of them!).
 *
 *  The current code is the result of some considerable tweaking and
 *  refinement but at present is based ONLY on my history file, which
 *  cannot possibly be representative of general usage. I'm interested
 *  in ANY feedback people have about how well the scoring algorithm 
 *  actually works.
 *
 *  The Scoring Algorithm
 *
 *  This works as follows. All visits to a server (e.g. www.foo.com) are
 *  lumped together and counted as one. The root page of the site is taken
 *  as the most popular amongst these. This means that if you visit
 *  lots of news pages linked from a site (say, slashdot.org) the rating
 *  of http://slashdot.org/index.pl should be extremely high.
 *  The score is then scaled according to how long you've been visiting
 *  the site (simply visiting a bunch of pages in one session doesn't
 *  give a high score), and how long it is since you last visited the
 *  site (so if I stop going somewhere it gradually drops down the list --
 *  but if I go again it shoots back up).
 * 
 *  FIXME: at the moment autobookmarks are generated once only at the
 *  beginning of the session. In future they should be regenerated at
 *  "some point" during execution... but when?
 *
 *  -- MattA  <matt@ookypooky.com>  19/12/2000
 */

/** The AutoBookmarks folder */
BookMarkItem *autobookmarks_root = NULL;

/* the type of a "server" */
typedef struct
{
	gint total_visits;             /* total visits to this server     */
	history_item *history_item;    /* most visited url of this server */
}
server_t;

/* local function prototypes */
static void add_server_to_hashtable(gpointer key, history_item *item,
				    GHashTable *servers);
static void add_server_to_list(gpointer key, server_t *server, GList **list);
static gint compare_servers(const server_t *a, const server_t *b);
static gdouble score_of_server(const server_t *server);
static gchar *server_of_url(gchar *url);

/* FIXME: I don't like importing this, let alone this way! */
extern GHashTable *history;

/* The bookmarks editor */
extern bookmarks_editor_controls *bookmarks_editor;

/**
 * autobookmarks_generate: generate AutoBookmarks category from 
 * history elements
 */
void 
autobookmarks_generate(void)
{
	GList *list, *item;
	GHashTable *servers;
	gint count, maximum;
	gint shorten;

	/* check if it's not enabled */
	if (gnome_config_get_bool(CONF_AUTOBOOKMARKS_ENABLED) == FALSE)
	{
		/* delete the folder if it exists */
		if (autobookmarks_root != NULL)
		{
			GtkCTree *ctree;

			bookmarks_remove_recursively(autobookmarks_root);
			if (bookmarks_editor)
			{
				ctree = GTK_CTREE(bookmarks_editor->ctree);
				gtk_ctree_remove_node(ctree,
						autobookmarks_root->tree_item);
			}
			autobookmarks_root = NULL;
		}
		return;
	}
		
	/* we're configured to generate them -- is the folder there? */
	if (autobookmarks_root != NULL)
	{
		/* yes, delete the contents */
		item = autobookmarks_root->list;
		while (item != NULL)
		{
			bookmarks_remove_recursively(item->data);
			item = item->next;
		}
		autobookmarks_root->list = NULL;
	}
	else
	{
		/* no, create the folder */
		autobookmarks_root = bookmarks_new_bookmark 
			(AUTOBOOKMARKS, _("AutoBookmarks"), 
			 NULL, NULL, NULL, NULL);

		/* add it to the main folder */
		g_return_if_fail(bookmarks_root != NULL);
		bookmarks_root->list = g_list_append (bookmarks_root->list, 
						      autobookmarks_root);
		autobookmarks_root->parent = bookmarks_root;
		autobookmarks_root->create_toolbar = FALSE;
		autobookmarks_root->expanded = FALSE;
		autobookmarks_root->list = NULL;
	}

	/* get values from configuration */
	maximum = gnome_config_get_int(CONF_AUTOBOOKMARKS_COUNT);
	shorten = gnome_config_get_int(CONF_AUTOBOOKMARKS_SHORTEN);

	/* build the table of servers */
	servers = g_hash_table_new(g_str_hash, g_str_equal);
	g_hash_table_foreach(history, (GHFunc)add_server_to_hashtable,
			     servers);
	
	/* insert each server into a new list */
	list = NULL;
	g_hash_table_foreach(servers, (GHFunc)add_server_to_list, &list);

	/* free the hashtable elements */
	g_hash_table_destroy(servers);
	
	/* sort by score */
	list = g_list_sort(list, (GCompareFunc)compare_servers);

	/* iterate over the first useful elements */
	count = 0;
	for (item = list; item != NULL; item = g_list_next(item), count++)
	{
		server_t *server = (server_t *)item->data;
		history_item *hi = (history_item *)server->history_item;
		BookMarkItem *b;
		gchar *name;

		/* add it if within bounds */
		if (count < maximum)
		{
			/* create the bookmark */
			/* note that shorten_name takes care of
			 * duplicating the title string for us, even 
			 * when it's not changed */
			name = shorten_name (hi->title, shorten);
			b = bookmarks_new_bookmark (SITE, name, hi->url,
						    NULL, NULL, NULL);
			g_free(name);
			
			/* add it to the autobookmarks folder */
			b->parent = autobookmarks_root;
			autobookmarks_root->list = 
				g_list_append(autobookmarks_root->list, b);
		}

		/* free server data */
		g_free(server);
	}

	/* free the list */
	g_list_free(list);
}

/**
 * add_server: go from a history item to an entry in the server hashtable
 */
static void 
add_server_to_hashtable(gpointer key, history_item *item, GHashTable *servers)
{
	gchar *server_name;
	server_t *server;

	/* find the server from the url */
	server_name = server_of_url(item->url);

	/* look it up */
	server = g_hash_table_lookup(servers, server_name);

	/* if not found... */
	if (server == NULL)
	{
		/* add this server to table */
		server = g_new0 (server_t, 1);
		g_hash_table_insert(servers, server_name, server);
		server->total_visits = item->visits;
		server->history_item = item;
	}
	else
	{
		/* add to this server entry */
		server->total_visits += item->visits;
		if (item->visits > server->history_item->visits)
		{
			/* most popular url of this server */
			server->history_item = item;
		}
		g_free(server_name);
	}	
}

/**
 * add_server_to_list: callback to add server structure to a list
 */
static void 
add_server_to_list(gpointer key, server_t *server, GList **list)
{
	/* free the key */
	g_free(key);
	
	/* add it to the list */
	*list = g_list_prepend(*list, server);
}

/**
 * compare_servers: compare the scores of two servers
 */
static gint 
compare_servers(const server_t *a, const server_t *b)
{
	gdouble score_a, score_b;

	/* compute scores */
	score_a = score_of_server(a);
	score_b = score_of_server(b);

	/* return compared values */
	return (score_b > score_a ? 1 : -1);
}

/**
 * score_of_server:: compute the "score" of a server.
 * This value represents how likely the item is to make a good AutoBookmark. 
 * This is extremely heuristic and I'm still experimenting -- MattA.
 */
static gdouble 
score_of_server(const server_t *server)
{
	gdouble age, age_scale;
 	gdouble interval_scale;
	gdouble score;

	/* get some basic values */
	age = (gdouble)(time(NULL) - server->history_item->last);

	/* age scaling, falls of exponentially with time */
	age_scale = exp((double)(-age / 1e12));

	/* scale linearly according to how long we've been visiting */
	interval_scale = (gdouble)(server->history_item->last - 
				   server->history_item->first);

	/* score by scaled number of vists */
	score = (gdouble)server->total_visits * age_scale * interval_scale;

	/* return the score */
	return score;	
}

/**
 * server_of_url: extract the server part of a url. for example, 
 * http://www.foo.com/bar/zed.html => www.foo.com
 */
static gchar *
server_of_url(gchar *url)
{
	gchar *begin, *end, *result;

	/* search for protocol string */
	begin = strstr(url, "://");
	if (begin == NULL)
	{
		begin = url; /* start from beginning */
	}
	else
	{
		begin += 3; /* skip ...:// */
	}

	/* search for path */
	end = strstr(begin, "/");
	if (end == NULL)
	{
		result = g_strdup(begin);
	}
	else
	{
		result = g_strndup(begin, end - begin);
	}

	/* return copied string */
	return result;
}
