New module 'Transliteration'
This commit is contained in:
parent
0197478295
commit
09b74574f1
188 changed files with 4810 additions and 0 deletions
230
modules/transliteration/transliteration.module
Normal file
230
modules/transliteration/transliteration.module
Normal file
|
@ -0,0 +1,230 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Converts non-latin text to US-ASCII and sanitizes file names.
|
||||
*
|
||||
* @author Stefan M. Kudwien (http://drupal.org/user/48898)
|
||||
*/
|
||||
|
||||
/**
|
||||
* Implements hook_menu().
|
||||
*/
|
||||
function transliteration_menu() {
|
||||
$items['admin/settings/file-system/settings'] = array(
|
||||
'title' => 'Settings',
|
||||
'file path' => drupal_get_path('module', 'system'),
|
||||
'weight' => -10,
|
||||
'type' => MENU_DEFAULT_LOCAL_TASK,
|
||||
);
|
||||
$items['admin/settings/file-system/transliteration'] = array(
|
||||
'title' => 'Transliteration',
|
||||
'description' => 'Convert existing file names to US-ASCII.',
|
||||
'page callback' => 'drupal_get_form',
|
||||
'page arguments' => array('transliteration_retroactive'),
|
||||
'access arguments' => array('administer site configuration'),
|
||||
'file' => 'transliteration.admin.inc',
|
||||
'weight' => 10,
|
||||
'type' => MENU_LOCAL_TASK,
|
||||
);
|
||||
return $items;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements hook_form_FORM_ID_alter().
|
||||
*
|
||||
* Add transliteration settings to the file system configuration form.
|
||||
*/
|
||||
function transliteration_form_system_file_system_settings_alter(&$form, &$form_state) {
|
||||
$form['transliteration'] = array(
|
||||
'#type' => 'item',
|
||||
'#title' => t('Transliteration'),
|
||||
'#value' => '',
|
||||
);
|
||||
$form['transliteration']['transliteration_file_uploads'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Transliterate file names during upload.'),
|
||||
'#description' => t('Enable to convert file names to US-ASCII character set for cross-platform compatibility.'),
|
||||
'#default_value' => variable_get('transliteration_file_uploads', TRUE),
|
||||
);
|
||||
$form['transliteration']['transliteration_file_lowercase'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Lowercase transliterated file names.'),
|
||||
'#default_value' => variable_get('transliteration_file_lowercase', TRUE),
|
||||
'#description' => t('This is a recommended setting to prevent issues with case-insensitive file systems. It has no effect if transliteration has been disabled.'),
|
||||
);
|
||||
$form['buttons']['#weight'] = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements hook_form_FORM_ID_alter().
|
||||
*
|
||||
* Adds transliteration settings to the search settings form.
|
||||
*/
|
||||
function transliteration_form_search_admin_settings_alter(&$form, &$form_state) {
|
||||
$form['transliteration'] = array(
|
||||
'#type' => 'fieldset',
|
||||
'#title' => t('Transliteration'),
|
||||
);
|
||||
$form['transliteration']['transliteration_search'] = array(
|
||||
'#type' => 'checkbox',
|
||||
'#title' => t('Transliterate search index and searched strings.'),
|
||||
'#description' => t('Enable to allow searching and indexing using US-ASCII character set, i.e. to treat accented and unaccented letters the same.'),
|
||||
'#default_value' => variable_get('transliteration_search', TRUE),
|
||||
);
|
||||
$form['buttons']['#weight'] = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterate and sanitize a file name.
|
||||
*
|
||||
* The resulting file name has white space replaced with underscores, consists
|
||||
* of only US-ASCII characters, and is converted to lowercase (if configured).
|
||||
* If multiple files have been submitted as an array, the names will be
|
||||
* processed recursively.
|
||||
*
|
||||
* @param $filename
|
||||
* A file name, or an array of file names.
|
||||
* @param $source_langcode
|
||||
* Optional ISO 639 language code that denotes the language of the input and
|
||||
* is used to apply language-specific variations. If the source language is
|
||||
* not known at the time of transliteration, it is recommended to set this
|
||||
* argument to the site default language to produce consistent results.
|
||||
* Otherwise the current display language will be used.
|
||||
* @return
|
||||
* Sanitized file name, or array of sanitized file names.
|
||||
*
|
||||
* @see language_default()
|
||||
*/
|
||||
function transliteration_clean_filename($filename, $source_langcode = NULL) {
|
||||
if (is_array($filename)) {
|
||||
foreach ($filename as $key => $value) {
|
||||
$filename[$key] = transliteration_clean_filename($value, $source_langcode);
|
||||
}
|
||||
return $filename;
|
||||
}
|
||||
$filename = transliteration_get($filename, '', $source_langcode);
|
||||
// Replace whitespace.
|
||||
$filename = str_replace(' ', '_', $filename);
|
||||
// Remove remaining unsafe characters.
|
||||
$filename = preg_replace('![^0-9A-Za-z_.-]!', '', $filename);
|
||||
// Remove multiple consecutive non-alphabetical characters.
|
||||
$filename = preg_replace('/(_)_+|(\.)\.+|(-)-+/', '\\1\\2\\3', $filename);
|
||||
// Force lowercase to prevent issues on case-insensitive file systems.
|
||||
if (variable_get('transliteration_file_lowercase', TRUE)) {
|
||||
$filename = strtolower($filename);
|
||||
}
|
||||
return $filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transliterate text.
|
||||
*
|
||||
* Takes an input string in any language and character set, and tries to
|
||||
* represent it in US-ASCII characters by conveying, in Roman letters, the
|
||||
* pronunciation expressed by the text in some other writing system.
|
||||
*
|
||||
* @param $text
|
||||
* UTF-8 encoded text input.
|
||||
* @param $unknown
|
||||
* Replacement string for characters that do not have a suitable ASCII
|
||||
* equivalent.
|
||||
* @param $source_langcode
|
||||
* Optional ISO 639 language code that denotes the language of the input and
|
||||
* is used to apply language-specific variations. If the source language is
|
||||
* not known at the time of transliteration, it is recommended to set this
|
||||
* argument to the site default language to produce consistent results.
|
||||
* Otherwise the current display language will be used.
|
||||
* @return
|
||||
* Transliterated text.
|
||||
*
|
||||
* @see language_default()
|
||||
*/
|
||||
function transliteration_get($text, $unknown = '?', $source_langcode = NULL) {
|
||||
static $loaded = FALSE;
|
||||
if (!$loaded) {
|
||||
module_load_include('inc', 'transliteration');
|
||||
$loaded = TRUE;
|
||||
}
|
||||
return transliteration_process($text, $unknown, $source_langcode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of hook_init().
|
||||
*
|
||||
* Sanitize file names during upload.
|
||||
*/
|
||||
function transliteration_init() {
|
||||
if (!empty($_FILES['files']) && variable_get('transliteration_file_uploads', TRUE)) {
|
||||
// Figure out language, which is available in $_POST['language'] for node
|
||||
// forms.
|
||||
$langcode = NULL;
|
||||
if (!empty($_POST['language'])) {
|
||||
$languages = language_list();
|
||||
if (isset($languages[$_POST['language']])) {
|
||||
$langcode = $_POST['language'];
|
||||
}
|
||||
}
|
||||
foreach ($_FILES['files']['name'] as $field => $filename) {
|
||||
// Keep a copy of the unaltered file name.
|
||||
$_FILES['files']['orig_name'][$field] = $filename;
|
||||
$_FILES['files']['name'][$field] = transliteration_clean_filename($filename, $langcode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements hook_search_preprocess().
|
||||
*
|
||||
* Transliterates text added to the search index and user submitted search
|
||||
* keywords.
|
||||
*/
|
||||
function transliteration_search_preprocess($text) {
|
||||
if (variable_get('transliteration_search', TRUE)) {
|
||||
return transliteration_get($text, '', language_default('language'));
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements hook_filter().
|
||||
*/
|
||||
function transliteration_filter($op, $delta = 0, $format = -1, $text = '', $cache_id = 0) {
|
||||
switch ($op) {
|
||||
case 'list':
|
||||
return array(t('Convert all characters to US-ASCII'));
|
||||
case 'no cache':
|
||||
return FALSE;
|
||||
case 'process':
|
||||
return transliteration_get(
|
||||
$text,
|
||||
variable_get("transliteration_filter_no_known_transliteration_$format", '?')
|
||||
);
|
||||
case 'settings':
|
||||
return array(
|
||||
'filter_transliteration' => array(
|
||||
'#type' => 'fieldset',
|
||||
'#title' => 'Transliteration',
|
||||
'#collapsible' => TRUE,
|
||||
'#collapsed' => FALSE,
|
||||
"transliteration_filter_no_known_transliteration_$format" => array(
|
||||
'#type' => 'textfield',
|
||||
'#title' => t('Placeholder for characters with no known US-ASCII equivalent'),
|
||||
'#size' => 2,
|
||||
// The maximum length is 5 in order to accommodate unicode multibyte input.
|
||||
'#maxlength' => 5,
|
||||
'#default_value' => variable_get("transliteration_filter_no_known_transliteration_$format", '?'),
|
||||
),
|
||||
),
|
||||
);
|
||||
default:
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements hook_filter_tips().
|
||||
*/
|
||||
function transliteration_filter_tips($delta, $format, $long) {
|
||||
return t('Non-latin text (e.g., å, ö, 漢) will be converted to US-ASCII equivalents (a, o, ?).');
|
||||
}
|
Reference in a new issue