From Wikipedia, the free encyclopedia
<?php
/* Given a string containing only a template expression surrounded by curly brackets,
 * returns a structure that can be modified and then converted back into a string
 * Status: unstable, parameters and return value may change */
function wp_parse_template($text) {
    if (!ereg("^{{(.*)}}$", $text, $regs))
	trigger_error("Incomplete template", E_USER_ERROR);
    $text = $regs1];
    if (strpos($text, "{") !== false)
	trigger_error("No support for complex templates", E_USER_ERROR);
    $args = explode("|", $text);
    $ret = array();
    $ret"title" = array_shift($args);
    $rargs = array();
    foreach ($args as $a) {
	$pair = explode("=", $a);
	if (count($pair) == 2) {
	    $rargstrim($pair0])] = $pair1];
	    if (trim($pair0]) != $pair0])
		$pmaptrim($pair0])] = $pair0];
	}
	else if (count($pair) == 1)
	    $rargs[] = $pair0];
	else
	    trigger_error("Unhandled template argument \"".$a."\a", E_USER_WARNING);
    }
    $ret"args" = $rargs;
    if (!empty($pmap))
	$ret"pmap" = $pmap;
    return $ret;
}

/* Returns an empty but titled template structure
 * Status: unstable, parameters may change */
function wptmpl_create($title) {
    return array("title" => $title, "args" => array());
}

/* Sets a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_set_arg(&$tmpl, $param, $val) {
    $tmpl"args"][$param = $val;
}

/* Removes a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_unset_arg(&$tmpl, $param) {
    $tmpl"args"][$param = null;
}

/* Gets the value a named template parameter
 * Status: unstable, parameters may change */
function wptmpl_get_arg($tmpl, $param) {
    if (!array_key_exists($param, $tmpl"args"]))
	return null;
    return $tmpl"args"][$param];
}

/* Returns true if the template has a parameter with the given name
 * Status: unstable, parameters may change */
function wptmpl_has_arg($tmpl, $param) {
    if (!array_key_exists($param, $tmpl"args"]))
	return false;
    return isset($tmpl"args"][$param]);
}

/* Changes the template name
 * Status: unstable, parameters may change */
function wptmpl_set_title(&$tmpl, $title) {
    $tmpl"title" = $title;
}

/* Changes the template name
 * Status: unstable, parameters may change */
function wptmpl_get_title($tmpl) {
    return $tmpl"title"];
}

/* Returns a string from the template structure
 * Status: unstable, parameters may change */
function wp_build_template($tmpl) {
    $text = "{{".$tmpl"title"];
    if (!array_key_exists("args", $tmpl))
	return $text."}}";
    $args = $tmpl"args"];
    if (array_key_exists("pmap", $tmpl))
	$pmap = $tmpl"pmap"];
    foreach ($args as $k => $v) {
	if (!isset($v))
	    continue;
	else if (is_string($k) && isset($pmap) && array_key_exists($k, $pmap))
	    $text .= "|".$pmap$k."=".$v;
	else if (is_string($k))
	    $text .= "|".$k."=".$v;
	else
	    $text .= "|".$v;
    }
    return $text."}}";
}

/* Finds the first full template with the given name in the text
 * Status: unstable, parameters may change */
function wp_find_template($title, $text, $ignore_case = false) {
    $regchars = ".[]{}*?";
    if (!eregi("({{[[:space:]]*(".addcslashes($title, $regchars).")[[:space:]]*[|}]+(.*))$", $text, $regs))
	return null;
    $tstr = $regs1];
    if (!$ignore_case && ucfirst($regs2]) != ucfirst($title))
	return wp_find_template($regs3]);
    $l = 0;
    $len = strlen($tstr);
    for ($i = 0; $i < $len; $i++) {
	if ($tstr$i == "{")
	    $l++;
	else if ($tstr$i == "}")
	    $l--;
	if ($l <= 0)
	    break;
    }
    if ($l > 0)
	return null;
    return substr($tstr, 0, $i + 1);
}

/* Bot exclusion detector, returns false if the text contains a directive disallowing
 * this bot
 * Status: unstable, parameters may change */
function wp_page_allows_bot($text, $context = null, $messages = null) {
    $tstr = wp_find_template("Nobots", $text, true);
    if (!empty($tstr))
	return false;
    $tstr = wp_find_template("Bots", $text, true);
    if (empty($tstr))
	return true;
    $tmpl = wp_parse_template($tstr);
    if (isset($context) && array_key_exists("username", $context))
	$botname = $context"username"];
    if (array_key_exists("deny", $tmpl"args"])) {
	$denied = explode(",", $tmpl"args"]["deny"]);
	foreach ($denied as $d) {
	    $d = trim($d);
	    if (strtolower($d) == "all")
		return false;
	    if (isset($botname) && $d == $botname)
		return false;
	}
    }
    if (array_key_exists("allow", $tmpl"args"])) {
	$allowed = explode(",", $tmpl"args"]["allow"]);
	foreach ($allowed as $a) {
	    $a = trim($a);
	    if (strtolower($a) == "none")
		return false;
	}
    }
    if (array_key_exists("optout", $tmpl"args"]) && isset($messages)) {
	$optout = explode(",", $tmpl"args"]["optout"]);
	foreach ($optout as $o) {
	    $o = trim($o);
	    if (strtolower($o) == "all")
		return false;
	    if (is_string($messages) && strtolower($o) == strtolower($messages))
		return false;
	    if (is_array($messages) && in_array($o, $messages))
		return false;
	}
    }
    return true;
}

/* Returns an integer timestamp for the date the comment was signed, or null if no signature
 * was found. */
function wp_date_comment($text) {
    if (!eregi("\[\[user.* ([0-9]+:[0-9]+, [0-9]+ [a-z]+ [0-9]+ \(utc\))", $text, $regs))
	return null;
    $time = strtotime($regs1]);
    if ($time > time())
	return null;
    return $time;
}

/* Returns a date string formatted for POSTing, for the given UNIX timestamp. */
function wp_format_post_date($ts) {
    return gmdate("Y-m-d\TH:i:s\Z", $ts);
}

/* Returns an array of the transcluded subpages. The subpages are the keys, the section
 * names are the values. */
function wp_list_subpages($title, $page) {
    $subpages = array();
    $lines = explode("\n", $page);
    foreach ($lines as $line) {
	$line = trim($line);
	if (ereg("==(.*)==", $line, $regs)) {
	    $section = trim($regs1]);
	    continue;
	}
	$line = str_replace("_", " ", $line);
	$regchars = ".[]{}*?";
	if (ereg("{{".addcslashes($title, $regchars)."/(.*)}}", $line, $regs))
	    $subpagesrtrim($regs1])] = $section;
    }
    return $subpages;
}


/* POSTs the array of data to the wiki
 * Status: stable */
function wp_post($post, $context = null) {
    $url = "http://en.wikipedia.org/w/api.php";
    if (isset($context) && array_key_exists("api url", $context))
	$url = $context"api url"];
    $header = "Content-type: application/x-www-form-urlencoded\n";
    if (isset($context) && array_key_exists("cookies", $context))
	$header .= "Cookie: ".http_build_query($context"cookies"], "", "; ")."\n";
    $http_opts = array(
	"http" => array(
	    "method" => "POST",
	    "header" => $header,
	    "content" => http_build_query($post)
	)
    );
    $sctx = stream_context_create($http_opts);
    while (true) {
	$ret = file_get_contents($url, 0, $sctx);
	if (strstr($ret, "maxlag")) {
	    sleep(5);
	    continue;
	}
	break;
    }
    return $ret;
}

/* Downloads the page contents from the wiki
 * Status: stable */
function wp_get($title, $context = null, &$timestamp = null) {
    $titles = array($title);
    $timestamps = array();
    $pages = wp_get_multiple($titles, $context, $timestamps);
    $timestamp = $timestamps$title];
    return $pages$title];
}

/* Returns an associative array of the contents of all the specified pages
 * Status: stable */
function wp_get_multiple($titles, $context = null, &$timestamps = null) {
    $timestamps = array();
    $pages = array();
    if (empty($titles))
	return $pages;
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "revisions",
	"titles" => implode("|", $titles),
	"rvprop" => "timestamp|content",
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post"maxlag" = (string)$context"maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pinfo = $ret"query"]["pages"];
    if (!isset($pinfo))
	return array();
    $revs = array();
    foreach ($pinfo as $p) {
	$t = $p"title"];
	if (empty($p"revisions"]))
	    continue;
	$a = array_shift($p"revisions"]);
	$timestamps$t = strtotime($a"timestamp"]);
	$pages$t = $a"*"];
    }
    return $pages;
}

/* Creates a context structure to be passed to the other functions, also sets
 * various options
 * Status: unstable, parameters and return value may change */
function wp_create_context($maxlag = null, $bot = false, $api_url = null) {
    $context = array();
    if (isset($maxlag))
	$context"maxlag" = $maxlag;
    if (isset($bot))
	$context"bot" = $bot;
    if (isset($api_url))
	$context"api url" = $api_url;
    return $context;
}

/* Sets the number of items to be returned for each query
 * Status: stable */
function wp_context_set_query_limit($limit, &$context) {
    $context"qlimit" = $limit;
}

/* Logs the bot into the wiki associated with the given context
 * Status: unstable, parameters and return value may change */
function wp_login($username, $password, &$context) {
    if (!isset($username) || $username == "")
	trigger_error("Username not set", E_USER_ERROR);
    if (!isset($password))
	trigger_error("Password not set", E_USER_ERROR);
    $login_post = array(
	"action" => "login",
	"format" => "php",
	"lgname" => $username,
	"lgpassword" => $password,
    );
    $ret = wp_post($login_post, $context);
    $ret = unserialize($ret);
    if (!array_key_exists("login", $ret))
	return false;
    $login = $ret"login"];
    if ($login"result" != "Success")
	return false;
    $prefix = $login"cookieprefix"];
    $cookies = array($prefix."UserName" => $settings"username"]);
    if (array_key_exists("lguserid", $login))
	$cookies$prefix."UserID" = $login"lguserid"];
    if (array_key_exists("lgtoken", $login))
	$cookies$prefix."Token" = $login"lgtoken"];
    if (array_key_exists("sessionid", $login))
	$cookies$prefix."_session" = $login"sessionid"];
    if (!isset($context))
	$context = array();
    $context"username" = $username;
    $context"cookies" = $cookies;
    return true;
}

/* Logs out of the wiki
 * Status: stable */
function wp_logout($context) {
    $post = array(
	"action" => "logout",
	"format" => "php",
    );
    wp_post($post, $context);
}

/* Returns an edit token to be used for all edits in the session
 * Status: stable */
function wp_get_edit_token($title, $context) {
    if (!isset($context) || !isset($context"cookies"]))
	trigger_error("Must be logged in to get edit token", E_USER_ERROR);
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "info",
	"intoken" => "edit",
	"titles" => $title,
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post"maxlag" = (string)$context"maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pages = $ret"query"]["pages"];
    foreach ($pages as $p) {
	if ($p"title" == $title)
	    return $p"edittoken"];
    }
    return "";
}

/* Uploads a new page or section over the existing one
 * Status: stable */
function wp_edit_section($title, $content, $summary, $section, $edtoken, $context,
	$timestamp = null) {
    if (!isset($context) || !isset($context"cookies"]))
	trigger_error("Must be logged in to edit pages", E_USER_ERROR);
    if (!wp_page_allows_bot($content, $context))
	trigger_error($title." excludes bot edits", E_USER_ERROR);
    $post = array(
	"action" => "edit",
	"format" => "php",
	"title" => $title,
	"text" => $content,
	"token" => $edtoken,
	"summary" => $summary,
    );
    if (array_key_exists("maxlag", $context))
	$post"maxlag" = (string)$context"maxlag"];
    if (array_key_exists("bot", $context) && $context"bot"])
	$post"bot" = "yes";
    if (isset($timestamp))
	$post"basetimestamp" = wp_format_post_date($timestamp);
    if (isset($section))
	$post"section" = $section;
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    if (isset($ret"error"])) {
	trigger_error($ret"error"]["code"], E_USER_NOTICE);
	return false;
    }
    if (isset($ret"edit"]) && $ret"edit"]["result" == "Success")
	return true;
    trigger_error("Unhandled query return status", E_USER_WARNING);
    return false;
}

/* Uploads a new page over the existing one
 * Status: stable */
function wp_edit_page($title, $content, $summary, $edtoken, $context,
	$timestamp = null) {
    return wp_edit_section($title, $content, $summary, null, $edtoken, $context, $timestamp);
}

/* Posts a new section to the page
 * Status: stable */
function wp_append_section($ptitle, $stitle, $content, $edtoken, $context) {
    return wp_edit_section($ptitle, $content, $stitle, "new", $edtoken, $context);
}

/* Keeps trying to modify the page until it is successful. The modifications are made by
 * the passed in function, with these parameters:
 * $new_page = $modify($old_page, $data); */
function wp_edit_war($title, $summary, $modify, $data, $ctx, $token = null,
	$old_page = null, $old_ts = null, $max_tries = null) {
    if (!isset($token))
	$token = wp_get_edit_token($title, $ctx);
    $tries = 0;
    while (true) {
	unset($last_ts);
	if (isset($old_page) && isset($old_ts)) {
	    $old = $old_page;
	    $last_ts = $old_ts;
	    unset($old_page);
	    unset($old_ts);
	}
	else
	    $old = wp_get($title, $ctx, $last_ts);
	if (!wp_page_allows_bot($old, $ctx))
	    trigger_error($title." excludes bot edits", E_USER_ERROR);
	$new = $modify($old, $data);
	if ($new == $old)
	    break;
	$edited = wp_edit_page($title, $new, $summary, $token, $ctx, $last_ts);
	$tries++;
	if ($edited)
	    break;
	if (isset($max_tries) && $tries >= $max_tries)
	    break;
    }
}

function wp_edit_test($title, $modify, $data, $ctx) {
    $old = wp_get($title, $ctx);
    return $modify($old, $data);
}

/* Returns an associative array with the name of the image repository for each file
 * Status: stable */
function wp_locate_files($files, $context) {
    if (empty($files))
	return null;
    $post = array(
	"action" => "query",
	"format" => "php",
	"prop" => "imageinfo",
	"titles" => implode("|", $files),
    );
    if (isset($context) && array_key_exists("maxlag", $context))
	$post"maxlag" = (string)$context"maxlag"];
    $ret = wp_post($post, $context);
    $ret = unserialize($ret);
    $pages = $ret"query"]["pages"];
    $info = array();
    foreach ($pages as $p)
	$info$p"title"]] = $p"imagerepository"];
    return $info;
}

/* Returns a list of pages in the category
 * Status: stable */
function wp_get_category_members($category, $context = null) {
    $ctitle = $category;
    if (!eregi("^Category:", $category))
	$ctitle = "Category:".$category;
    $mlist = array();
    while (true) {
	$post = array(
	    "action" => "query",
	    "format" => "php",
	    "list" => "categorymembers",
	    "cmtitle" => $ctitle,
	);
	if (isset($context) && array_key_exists("maxlag", $context))
	    $post"maxlag" = (string)$context"maxlag"];
	if (isset($context) && array_key_exists("qlimit", $context))
	    $post"cmlimit" = (string)$context"qlimit"];
	if (isset($continue))
	    $post"cmcontinue" = (string)$continue;
	$ret = wp_post($post, $context);
	$ret = unserialize($ret);
	if (array_key_exists("error", $ret)) {
	    trigger_error($ret"error"]["info"], E_USER_NOTICE);
	    return null;
	}
	$members = $ret"query"]["categorymembers"];
	foreach ($members as $m)
	    $mlist[] = $m"title"];
	if (array_key_exists("query-continue", $ret))
	    $continue = $ret"query-continue"]["categorymembers"]["cmcontinue"];
	else
	    break;
    }
    return $mlist;
}

/* Goes through the page history to find when subpages were transcluded. The list
 * function returns an array of transcluded subpages and takes the arguments:
 * list_fn($title, $contents);
 * Pass in an array of the subpages to look for as $current_tcs. */
function wp_transcluded_dates($title, $list_fn, $current_tcs, $context = null) {
    $tc_ts = array();
    $found_missing = array();
    while (true) {
	$post = array(
	    "action" => "query",
	    "format" => "php",
	    "prop" => "revisions",
	    "titles" => $title,
	    "rvprop" => "timestamp|content",
	);
	if (isset($context) && array_key_exists("qlimit", $context))
	    $post"rvlimit" = (string)$context"qlimit"];
	if (isset($context) && array_key_exists("maxlag", $context))
	    $post"maxlag" = (string)$context"maxlag"];
	if (isset($continue))
	    $post"rvstartid" = (string)$continue;
	$ret = wp_post($post, $context);
	$ret = unserialize($ret);
	$pages = $ret"query"]["pages"];
	$revs = array();
	foreach ($pages as $p) {
	    if ($p"title" != $title)
		continue;
	    $revs = $p"revisions"];
	    break;
	}
	foreach ($revs as $r) {
	    $time = strtotime($r"timestamp"]);
	    $tcs = $list_fn($title, $r"*"]);
	    if (empty($tcs))
		continue;
	    if (isset($current_tcs)) {
		foreach ($current_tcs as $tc) {
		    if (!in_array($tc, $tcs))
			$found_missing$tc = true;
		}
	    }
	    foreach ($tcs as $tc) {
		if (array_key_exists($tc, $found_missing))
		    continue;
		if (!array_key_exists($tc, $tc_ts))
		    $tc_ts$tc = time();
		if ($tc_ts$tc > $time)
		    $tc_ts$tc = $time;
	    }
	}
	if (isset($current_tcs)) {
	    $all_missing = true;
	    foreach ($current_tcs as $tc) {
		if (array_key_exists($tc, $found_missing))
		    continue;
		$all_missing = false;
		break;
	    }
	    if ($all_missing)
		break;
	}
	if (isset($ret"query-continue"]))
	    $continue = $ret"query-continue"]["revisions"]["rvstartid"];
	else
	    break;
    }
    return $tc_ts;
}
?>