aboutsummaryrefslogtreecommitdiffstats
path: root/etherpad
diff options
context:
space:
mode:
authorEgil Moeller <egil.moller@freecode.no>2010-04-11 02:25:39 +0200
committerEgil Moeller <egil.moller@freecode.no>2010-04-11 02:25:39 +0200
commit7cb7e6de9040e6f0d21390fede044200a0f1d198 (patch)
treed9f32fd198305ecd23d4afce08000489edd0187d /etherpad
parent903fc968581ca7309a860d9336762d2147342754 (diff)
downloadetherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.tar.gz
etherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.tar.xz
etherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.zip
Added an URL indexer, it currently only greps out URLs from pads and stores them in a separate, searchable table, but it doesn't actually provide a way to use this info.
Diffstat (limited to 'etherpad')
-rw-r--r--etherpad/src/plugins/urlIndexer/hooks.js39
-rw-r--r--etherpad/src/plugins/urlIndexer/main.js32
2 files changed, 71 insertions, 0 deletions
diff --git a/etherpad/src/plugins/urlIndexer/hooks.js b/etherpad/src/plugins/urlIndexer/hooks.js
new file mode 100644
index 0000000..922150e
--- /dev/null
+++ b/etherpad/src/plugins/urlIndexer/hooks.js
@@ -0,0 +1,39 @@
+import("etherpad.log");
+import("dispatch.{Dispatcher,PrefixMatcher,forward}");
+import("sqlbase.sqlobj");
+
+REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
+REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+REGEX_WORDCHAR.source+')');
+REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+REGEX_URLCHAR.source+'*(?![:.,;])'+REGEX_URLCHAR.source, 'g');
+
+function padModelWriteToDB(args) {
+ /* Update tags for the pad */
+
+ var new_urls = args.pad.text().match(REGEX_URL);
+ if (new_urls == null) new_urls = new Array();
+ var new_urls_str = new_urls.join(' ')
+
+ var old_urls_row = sqlobj.selectSingle("PAD_URL_CACHE", { PAD_ID: args.padId });
+ var old_urls_str;
+ if (old_urls_row !== null)
+ old_urls_str = old_urls_row['URLS'];
+ else
+ old_urls_str = '';
+
+ var old_urls = old_urls_str != '' ? old_urls_str.split(' ') : new Array();
+
+ if (new_urls_str != old_urls_str) {
+ log.info({message: 'Updating urls', new_urls:new_urls, old_urls:old_urls});
+
+ if (old_urls_row)
+ sqlobj.update("PAD_URL_CACHE", {PAD_ID: args.padId }, {URLS: new_urls.join(' ')});
+ else
+ sqlobj.insert("PAD_URL_CACHE", {PAD_ID: args.padId, URLS: new_urls.join(' ')});
+
+ sqlobj.deleteRows("PAD_URL", {PAD_ID: args.padId});
+
+ for (i = 0; i < new_urls.length; i++) {
+ sqlobj.insert("PAD_URL", {PAD_ID: args.padId, URL: new_urls[i]});
+ }
+ }
+} \ No newline at end of file
diff --git a/etherpad/src/plugins/urlIndexer/main.js b/etherpad/src/plugins/urlIndexer/main.js
new file mode 100644
index 0000000..79bb019
--- /dev/null
+++ b/etherpad/src/plugins/urlIndexer/main.js
@@ -0,0 +1,32 @@
+import("etherpad.log");
+import("plugins.urlIndexer.hooks");
+import("sqlbase.sqlobj");
+import("sqlbase.sqlcommon");
+
+function init() {
+ this.hooks = ['padModelWriteToDB'];
+ this.description = 'Indexes URLs linked to in pads so that they can be displayed outside pads, searched for etc.';
+ this.padModelWriteToDB = hooks.padModelWriteToDB;
+
+ this.install = install;
+ this.uninstall = uninstall;
+}
+
+function install() {
+ log.info("Installing urlIndexer");
+
+ sqlobj.createTable('PAD_URL', {
+ PAD_ID: 'varchar(128) character set utf8 collate utf8_bin not null references PAD_META(ID)',
+ URL: 'varchar(1024) character set utf8 collate utf8_bin not null',
+ });
+
+ sqlobj.createTable('PAD_URL_CACHE', {
+ PAD_ID: 'varchar(128) character set utf8 collate utf8_bin unique not null references PAD_META(ID)',
+ URLS: 'text collate utf8_bin not null',
+ });
+}
+
+function uninstall() {
+ log.info("Uninstalling urlIndexer");
+}
+