From 0741ab7082b1119c20f4913a65121a7b6319b684 Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Sat, 7 Feb 2015 12:44:08 -0800 Subject: Added character set recoding to utf-8 for list attributes for Romanian and Russian lists and bumped versions to 2.1.19rc3. --- Mailman/versions.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) (limited to 'Mailman/versions.py') diff --git a/Mailman/versions.py b/Mailman/versions.py index 873edad1..5ac03aeb 100755 --- a/Mailman/versions.py +++ b/Mailman/versions.py @@ -93,6 +93,93 @@ def UpdateOldVars(l, stored_state): if not hasattr(l, newname) and newdefault is not uniqueval: setattr(l, newname, newdefault) + def recode(mlist, f, t): + """If the character set for a list's preferred_language has changed, + attempt to recode old string values into the new character set. + + mlist is the list, f is the old charset and t is the new charset. + """ + for x in dir(mlist): + if x.startswith('_'): + continue + nv = doitem(getattr(mlist, x), f, t) + if nv: + setattr(mlist, x, nv) + + def doitem(v, f, t): + """Recursively process lists, tuples and dictionary values and + convert strings as needed. Return either the updated item or None + if no change.""" + if isinstance(v, str): + nv = convert(v, f, t) + if nv == v: + return None + else: + return nv + elif isinstance(v, list): + changed = False + nl = [] + for i in range(len(v)): + nv = doitem(v[i], f, t) + if nv: + changed = True + nl += [nv] + else: + nl += v[i] + if changed: + return nl + else: + return None + elif isinstance(v, tuple): + changed = False + nt = () + for i in range(len(v)): + nv = doitem(v[i], f, t) + if nv: + changed = True + nt += (nv,) + else: + nt += (v[i],) + if changed: + return nt + else: + return None + elif isinstance(v, dict): + changed = False + nd = {} + for k, ov in v.items(): + nv = doitem(ov, f, t) + if nv: + changed = True + nd[k] = nv + else: + nd[k] = ov + if changed: + return nd + else: + return None + else: + return None + + def convert(s, f, t): + """This does the actual character set conversion of the string s + from charset f to charset t.""" + + try: + u = unicode(s, f) + is_f = True + except ValueError: + is_f = False + try: + unicode(s, t) + is_t = True + except ValueError: + is_t = False + if is_f and not is_t: + return u.encode(t, 'replace') + else: + return s + # Migrate to 2.1b3, baw 17-Aug-2001 if hasattr(l, 'dont_respond_to_post_requests'): oldval = getattr(l, 'dont_respond_to_post_requests') @@ -322,6 +409,19 @@ def UpdateOldVars(l, stored_state): for name, pattern, description, emptyflag in stored_state['topics']: pattern = Utils.strip_verbose_pattern(pattern) l.topics.append((name, pattern, description, emptyflag)) + # + # Romanian and Russian had their character sets changed in 2.1.19 + # to utf-8. If there are any strings in the old encoding, try to recode + # them. + # + if stored_state['data_version'] < 108: + if l.preferred_language == 'ro': + if Utils.GetCharSet('ro') == 'utf-8': + recode(l, 'iso-8859-2', 'utf-8') + if l.preferred_language == 'ru': + if Utils.GetCharSet('ru') == 'utf-8': + recode(l, 'koi8-r', 'utf-8') + # # from_is_list was called author_is_list in 2.1.16rc2 (only). PreferStored('author_is_list', 'from_is_list', mm_cfg.DEFAULT_FROM_IS_LIST) -- cgit v1.2.3