From 2d195c70fd5cd7fe9662e9fb6f43830f4a8306f7 Mon Sep 17 00:00:00 2001 From: Jonas Haag <jonas@lophus.org> Date: Sun, 6 Jan 2013 22:35:04 +0100 Subject: [PATCH] Fix #43: --site-name should accept non-ASCII values --- klaus/utils.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/klaus/utils.py b/klaus/utils.py index e8fc049..1311a5b 100644 --- a/klaus/utils.py +++ b/klaus/utils.py @@ -4,6 +4,11 @@ import re import time import datetime import mimetypes +import locale +try: + import chardet +except ImportError: + chardet = None from pygments import highlight from pygments.lexers import get_lexer_for_filename, guess_lexer, ClassNotFound @@ -140,23 +145,27 @@ def guess_is_image(filename): def force_unicode(s): """ Does all kind of magic to turn `s` into unicode """ + # It's already unicode, don't do anything: if isinstance(s, unicode): return s + + # Try some default encodings: try: return s.decode('utf-8') except UnicodeDecodeError as exc: pass try: - return s.decode('iso-8859-1') + return s.decode(locale.getpreferredencoding()) except UnicodeDecodeError: pass - try: - import chardet + + if chardet is not None: + # Try chardet, if available encoding = chardet.detect(s)['encoding'] if encoding is not None: return s.decode(encoding) - except (ImportError, UnicodeDecodeError): - raise exc + + raise exc # Give up. def extract_author_name(email): -- GitLab