Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
//
3
// +----------------------------------------------------------------------+
4
// | PHP Version 4                                                        |
5
// +----------------------------------------------------------------------+
6
// | Copyright (c) 1997-2003 The PHP Group                                |
7
// +----------------------------------------------------------------------+
8
// | This source file is subject to version 2.02 of the PHP license,      |
9
// | that is bundled with this package in the file LICENSE, and is        |
10
// | available at through the world-wide-web at                           |
11
// | http://www.php.net/license/2_02.txt.                                 |
12
// | If you did not receive a copy of the PHP license and are unable to   |
13
// | obtain it through the world-wide-web, please send a note to          |
14
// | license@php.net so we can mail you a copy immediately.               |
15
// +----------------------------------------------------------------------+
16
// | Authors: Wolfram Kriesing <wolfram@kriesing.de>                      |
17
// +----------------------------------------------------------------------+
18
//
19
//  $Id: Translate.php 113824 2003-01-28 19:20:04Z cain $
20
 
21
// we have to move this to some more common place in PEAR
22
// this is just a quick hack here :-)
23
require_once( 'Tree/OptionsDB.php' );    // this contains all the methods like setOption, getOption, etc.
24
 
25
/**
26
*   Description
27
*
28
*   @package  Language
29
*   @access   public
30
*   @author   Wolfram Kriesing <wolfram@kriesing.de>
31
*   @version  2001/12/29
32
*/
33
class I18N_Messages_Translate extends Tree_OptionsDB
34
{
35
 
36
    var $options = array(   'tablePrefix' =>    'translate_'    // the DB-table name prefix, at the end we add the lang-string passed to the method
37
                            ,'sourceLanguage'=>  'en'           // the source language, the language used to retrieve the strings to translate from
38
                                                                // its also the table which is used to retreive the source string
39
 
40
                            // be case senstivie by default since not all languages write nouns and verbs etc.
41
                            // in lower case translating from german might fail if the case is not considered
42
                            ,'caseSensitive'=> true
43
 
44
                            ,'translatorUrl' =>  ''              // the url to a translator tool, only used if given
45
                         );
46
 
47
    /**
48
    *   Those are the delimiters surrounding translatable text.
49
    *   This way we prevent from translating each HTML-tag, which definetly wouldnt work :-)
50
    *   those delimiters might also work on any other markup language, like xml - but not tested
51
    *
52
    *   NOTE: if you have php inside such a tag then you have to use an extra filter
53
    *   since <a href="">< ?=$var? ></a> would find the php-tags and see them as delimiters
54
    *   which results in < ?=$var? > can not be translated, see sf.net/projects/simpletpl, there
55
    *   is a filter in 'SimpleTemplate/Filter/Basic::applyTranslateFunction' which solves this
56
    *   it wraps a given function/method around it so that it finally will be:
57
    *       <a href="">< ?=translateThis($var)? ></a>
58
    *
59
    *   @var    array   $possibleMarkUpDelimiters
60
    */
61
    var $possibleMarkUpDelimiters = array(
62
                                    // cant use this
63
                                    // '>[^<]*'                        ,'[^>]*<', // this mostly applies, that a text is inbetween '>' and '<'
64
                                    // because it would translate 'class="..."' too, if we have the word 'as' to be translated :-(
65
                                    // but this also means we have to handle stuff like &nbsp; of others specials chars, that dont start
66
                                    // and end with a < or > somehow ... i dont know how yet :-(
67
 
68
                                    // this mostly applies, that a text is inbetween '>' and '<'
69
                                    array('>\s*',   '\s*<')
70
 
71
                                    // same as above,
72
                                    // only that this leaves &nbsp; before and after the text outside html-tags as they are ...
73
                                    //
74
                                    // actually we need a more common thing here, which also takes care of other chars which
75
                                    // we dont want to bother the translation with
76
                                  //  ,array('>[\s*&nbsp;]*', '[\s*&nbsp;]*<')
77
                                  //the above is not really secure, since [] means any char inside
78
                                  // so '>nbsp translate this<' would be translated too
79
 
80
 
81
                                    // this is for input button's values
82
//FIXXXME this fails if the order of type and value attrbs is switched :-(
83
                                    ,array('<input type=["\']?(submit|button)["\']? [^>]*value=["\']?\s*' , '\s*["\']?[^>]*>' ,  1)
84
//                                    ,array('<\s*input .*type=submit .*value=["\']?\s*','\s*["\']?.*>')
85
//                                    array('<\s*input.*type=[\'"]?(button|submit)[\'"]?.*value=["\']?\s*','\s*["\']?.*>',1)
86
//                                    ,array('<\s*input.*value=["\']?\s*','\s*["\']?.*>')
87
 
88
// all the reg-exps here are limited, the ones below would work perfect on:
89
//    <a title="translate this" ...>  or <img alt="translate this" ....>
90
// but with the following they would have problems:
91
//    <a href="< ?php echo §foor ? >" title="translate this">
92
// since there is a < before the title-attribute, and if we build the regexp
93
// with .* instead of [^>]* before the attribute it might return ambigious results
94
 
95
// SOLUTION for now: put the attrbutes you want translated first inside the tag, like in the example above
96
 
97
                                    ,array('<img [^>]*alt=["\']?\s*' ,  '\s*["\']?.*>')
98
                                    ,array('<a [^>]*title=["\']?\s*' ,  '\s*["\']?.*>')
99
                                );
100
 
101
    /**
102
    *   @var    array   this contains the content from the DB, to prevent from multiple DB accesses
103
    */
104
    var $_translated = array('destLanguage'=>'','strings'=>array());
105
 
106
    /**
107
    *   @var    array   this array contains the translated strings but with the difference to $_translated
108
    *                   that the source strings is the index, so a lookup if a translation exists is much faster
109
    */
110
    var $_sourceStringIndexed = array();
111
 
112
    /**
113
    *
114
    *
115
    *   @access     public
116
    *   @author
117
    *   @version
118
    */
119
    function __construct( $dsn , $options )
120
    {
121
        parent::Tree_OptionsDB( $dsn , $options );
122
// FIXXME pass a resource to the constructor which can be used to translate the
123
// string, it should be possible to use XML, DB, or whatever
124
// currently, as you can see there is only a DB interface hardcoded in here
125
// this will be removed soon
126
    }
127
 
128
    /**
129
    *   for pre-ZE2 compatibility
130
    *
131
    *   @access     public
132
    *   @author
133
    *   @version
134
    */
135
    function I18N_Messages_Translate( $dsn , $options=array() )
136
    {
137
        return $this->__construct( $dsn , $options );
138
    }
139
 
140
    /**
141
    *   tries to translate a given string, but only exactly the string as it is in the DB
142
    *
143
    *   @access     public
144
    *   @author     Wolfram Kriesing <wolfram@kriesing.de>
145
    *   @version    01/12/29
146
    *   @param      string  $string     the string that shall be translated
147
    *   @param      string  $lang       iso-string for the destination language
148
    *   @return     string  the translated string
149
    */
150
    function simpleTranslate( $string , $lang )
151
    {
152
        if( $lang == $this->getOption('sourceLanguage') )   // we dont need to translate a string from the source language to the source language
153
            return $string;
154
 
155
        if( sizeof($this->_translated['strings'])>0 &&      // this checks if the DB content had been read already
156
            $this->_translated['destLanguage'] == $lang )   // for this language
157
        {
158
            if( sizeof($this->_sourceStringIndexed) == 0 )
159
            {
160
                foreach( $this->_translated['strings'] as $aSet)
161
                    $this->_sourceStringIndexed[$aSet['string']] = $aSet['translated'];
162
            }
163
            if( isset($this->_sourceStringIndexed[$string]) )
164
                return $this->_sourceStringIndexed[$string];
165
            return $string;
166
        }
167
// FIXXME may be it would be better just reading the entire DB-content once
168
// and using this array then ??? this uses up a lot of RAM and that for every user ... so i guess not OR?
169
// or use PEAR::Cache
170
        $query = sprintf(   "SELECT d.string FROM %s%s s,%s%s d WHERE s.string=%s AND s.id=d.id",
171
                            $this->getOption('tablePrefix'),$this->getOption('sourceLanguage'), // build the source language name
172
                            $this->getOption('tablePrefix'),$lang,
173
                            $this->dbh->quote($string) );    // build the destination language name
174
        $res = $this->dbh->getOne( $query );
175
        if( DB::isError($res) )
176
        {
177
//            return $this->raiseError('...');
178
            return $string; // return the actual string on failure
179
        }
180
 
181
        if( !$res )                                 // if no translation was found return the source string
182
            return $string;
183
 
184
        return $res;
185
    }
186
 
187
    /**
188
    *   tries to translate a given string, it also tries using the regexp's which might be in the DB
189
    *
190
    *   @access     public
191
    *   @author     Wolfram Kriesing <wolfram@kriesing.de>
192
    *   @version    01/12/29
193
    *   @param      string  $string     the string that shall be translated
194
    *   @param      string  $lang       iso-string for the destination language
195
    *   @return     string  the translated string
196
    */
197
    function translate( $string , $lang )
198
    {
199
//FIXXME extract the reg-exp thingy from the translateMarkup method and call
200
// it explicitly here, and once we have found a translation stop the process,
201
// so we dont translate stuff that is already translated again,
202
// i.e. 'Data saved' is translated into 'Daten gespeichert' and if we continue
203
// as we do now it will translate 'Date' into 'Datum' which results in 'Datumn gespeichert'
204
// which is big bullshit
205
// and the second thing: do only translate full words, then the above wouldnt happen neither
206
 
207
        $res = $this->simpleTranslate( $string , $lang );
208
        // if the select didnt translate the string we need to go thru all the strings
209
        // which contain parts of regular expressions, so we can leave out all
210
        // the pure strings, this should save some time
211
        if( $res == $string )
212
        {
213
            $temp = $this->possibleMarkUpDelimiters;    // remember the delimiters
214
            $this->possibleMarkUpDelimiters = array(''=>'');    // dont use any delimiters
215
// may be better using a property like 'useMarkupDelimiters'
216
            $res = $this->translateMarkUpString( $string , $lang ); // translate
217
            $this->possibleMarkUpDelimiters = $temp;    // set delimiters properly again
218
        }
219
 
220
        return $res;
221
    }
222
 
223
    /**
224
    *   returns the DB content for the source and the destination language given as paramter $lang
225
    *
226
    *   @access     public
227
    *   @author     Wolfram Kriesing <wolfram@kriesing.de>
228
    *   @version    02/01/08
229
    *   @param      string  $lang       iso-string for the destination language
230
    *   @return     array
231
    */
232
    function getAll( $lang )
233
    {
234
        if( sizeof($this->_translated['strings'])==0 ||      // this checks if the DB content had been read already
235
            $this->_translated['destLanguage'] != $lang )    // for this language
236
        {
237
//print "read again<br>";
238
            $this->_translated['destLanguage'] = $lang;
239
        }
240
        else
241
        {
242
            return $this->_translated['strings'];
243
        }
244
 
245
		// for the translation API, we need to have the long sentences at first, since translating a single word
246
		// might screw up the entire content, like translating 'i move to germany' and starting to tranlate the
247
		// word 'move' makes it impossible to properly translate the entire phrase
248
		// even though this problem can not really happen, since we check for delimiters around the string that
249
		// shall be translated see $posDelimiters
250
        $query = sprintf(   'SELECT d.string as translated,d.*,s.* '.   // d.string shall be named 'translated'
251
                                                                        // but we still need all the rest from the destination language table
252
                                                                        // and s.* overwrites d.string but we dont need it we have it in 'translated'
253
                            'FROM %s%s s,%s%s d WHERE s.id=d.id '.
254
                            'ORDER BY LENGTH(s.string) DESC',   // sort the results by the length of the strings, so we translate
255
                                                                // sentences first and single words at last
256
                            $this->getOption('tablePrefix'),$this->getOption('sourceLanguage'), // build the source language name
257
                            $this->getOption('tablePrefix'),$lang );    // build the destination language name
258
 
259
        $res = $this->dbh->getAll( $query );
260
        if( DB::isError($res) )
261
        {
262
//            return $this->raiseError('...');
263
            echo sprintf('ERROR - Translate::getAll<br>QUERY:%s<br>%s<br><br>',$query,$res->message);
264
            return false;
265
        }
266
        $this->_translated['destLanguage'] = $lang;
267
        $this->_translated['strings'] = $res;
268
 
269
        return $this->_translated['strings'];
270
    }
271
 
272
    /**
273
    *   translates all the strings that match any of the source language-string
274
    *   the input is mostly an HTML-file, and it is filtered so only real text
275
    *   is translated, at least i try it as good as i can :-)
276
    *
277
    *   @access     public
278
    *   @author     Wolfram Kriesing <wolfram@kriesing.de>
279
    *   @version    02/01/08
280
    *   @param      string  $input      the string that shall be translated, mostly an entire HTML-page
281
    *   @param      string  $lang       iso-string for the destination language
282
    *   @return     string  iso-string for the language
283
    *
284
    */
285
    function translateMarkUpString( $input , $lang )
286
    {
287
        if( $lang == $this->getOption('sourceLanguage') )   // we dont need to translate a string from the source language to the source language
288
        {
289
        	// this would be a cool feature, if i get it done :-)
290
            $url=$this->getOption('translatorUrl');
291
            if( $url )
292
            {
293
                $this->getAll( $lang );
294
                return $this->addTranslatorLinks( $input , $url );
295
            }
296
            return $input;
297
        }
298
 
299
        $this->getAll( $lang );          // get all the possible strings from the DB
300
 
301
        $addModifier = $this->getOption('caseSensitive') ? '' : 'i';
302
 
303
// FIXXME replace all spaces by something like this: (\s*|<br>|<br/>|<font.*>|</font>|<i>|</i>|<b>|</b>|&nbsp;)
304
// simply all those formatting tags which dont really cancel the phrase that should be translated
305
// and put them back in the translated string
306
// by filling $x in the right place and updating $lastSubpattern
307
// then it will be really cool and the text to translate will be recognized with any kind of space inbetween
308
// *NO*
309
// we dont want to do the above thing, since the formatting inside a text needs to be taken care of
310
// by the translator, this method here has no chance to know that 'this is a <b>user</b> from germany'
311
// has to become 'dies ist ein <b>Benutzer</b> aus Deutschland', the grammar of languages might be so different,
312
// that the marked part can be in a totally different place in the destination language string!
313
 
314
        if(is_array($this->_translated['strings']) && sizeof($this->_translated['strings']))
315
        foreach( $this->_translated['strings'] as $aString )             // search for each single string and try to translate it
316
        {
317
            $lastSubpattern = 2;
318
            // we use 2 strings that we search for, one is the real text as from the db
319
            // the second $htmlSourceString is the source string but with all non html characters
320
            // translated using htmlentities, in case someone has been programming proper html :-)
321
 
322
            // shall the translated string be converted to HTML, or does it may be contain HTML?
323
            if( isset($aString['convertToHtml']) && $aString['convertToHtml'] )
324
                $translated = htmlentities($aString['translated']);
325
            else
326
                $translated = $aString['translated'];
327
 
328
            if( $aString['numSubPattern'] )// if the string is a regExp, we need to update $lastSubpattern
329
            {
330
            	// we dont preg_quote the strings which contain regExps's
331
            	// if chars like '.' or alike which also appear in regExps they have to be
332
            	// escaped by the person who enters the source-string (may be we do that better one day)
333
                $sourceString = $aString['string'];
334
                $htmlSourceString = htmlentities($aString['string']);// we should not preg_quote the string
335
 
336
                $lastSubpattern = 2 + $aString['numSubPattern'];    // set $lastSubpattern properly
337
 
338
                // in the DB the spaceholders start with $1, but here we need it
339
                // to start with $2, that's what the following does
340
                preg_match_all ( '/\$(\d)/' , $translated , $res );
341
                $res[0] = array_reverse($res[0]);   // reverse the arrays, since we replace $1 by $2 and then $2 by $3 ...
342
                $res[1] = array_reverse($res[1]);   // ... if we wouldnt reverse all would become $<lastNumber>
343
                foreach( $res[0] as $index=>$aRes )
344
                {
345
                    $aRes = preg_quote($aRes);
346
                    $translated = preg_replace( '/'.$aRes.'/' , '\$'.($res[1][$index]+1) , $translated );
347
            	}
348
            }
349
            else
350
            {
351
                // in none regExp's source strings we better quote the chars which could be
352
                // mistakenly seen as regExp chars
353
                $sourceString = preg_quote(trim($aString['string']));
354
                $htmlSourceString = preg_quote(htmlentities(trim($aString['string'])));
355
                // escape all slashes, since preg_quote doenst do that :-(
356
                $sourceString = str_replace('/','\/',$sourceString);
357
                $htmlSourceString = str_replace('/','\/',$htmlSourceString);
358
            }
359
 
360
            foreach( $this->possibleMarkUpDelimiters as $delimiters )  // go thru all the delimiters and try to translate the strings
361
            {
362
// FIXXME there might be a major problem:
363
//   <td
364
//       {if($currentPageIndex==$key)}
365
//           class="naviItemSelected"    this line will also be tried to translated, since the line before and the one after
366
//                                       will start/end with php tags, which also start/end with a < or > which are possible delimtier :-(
367
//       {else}
368
//           class="naviItem"
369
//   nowrap>
370
//
371
//
372
                $numSubPatterns = array(0,0);
373
                $begin = $delimiters[0];
374
                $end = $delimiters[1];
375
                if( isset($delimiters[2]) )     $numSubPatterns[0] = $delimiters[2];
376
                if( isset($delimiters[3]) )     $numSubPatterns[1] = $delimiters[3];
377
 
378
                // replace all spaces in the source string by \s* so that there can be spaces
379
                // as many as one wants
380
                // and even newlines (the modifier s in the preg_replace takes care of that)
381
                $sourceString = preg_replace('/\s+/','\\s*',$sourceString);
382
                $htmlSourceString = preg_replace('/\s+/s','\\s*',$htmlSourceString);
383
 
384
                $_hashCode = md5($input);
385
 
386
                $input = preg_replace(  '/('.$begin.')'.$sourceString.'('.$end.')/sU'.$addModifier ,
387
                                        '$1'.$translated.'$'.($lastSubpattern+$numSubPatterns[0]) ,
388
                                        $input );
389
 
390
                // if the regExp above didnt have no effect try this one with all html characters translated
391
                // if we wouldnt check this i had the effect that something was translated twice ...
392
                // dont know exactly why but it did :-)
393
                if( $_hashCode == md5($input) )
394
                {
395
                    // try also to translate the string with all non-HTML-characters translated using htmlentities
396
                    // may be someone was creating proper html :-)
397
                    $input = preg_replace(  '/('.$begin.')'.$htmlSourceString.'('.$end.')/sU'.$addModifier ,
398
                                            '$1'.$translated.'$'.($lastSubpattern+$numSubPatterns[0]) ,
399
                                            $input );
400
                }
401
 
402
            }
403
        }
404
        return $input;
405
    }
406
 
407
    /**
408
    *
409
    *
410
    *   @access     public
411
    *   @author     Wolfram Kriesing <wolfram@kriesing.de>
412
    *   @version    02/04/14
413
    *   @param      string  the url to a translation tool
414
    *   @return
415
    */
416
/*    function addTranslatorLinks( $input , $url )
417
    {
418
        $linkBegin = '<a href="#" onClick="javascript:window.open(\''.$url.'?string=';
419
        $linkEnd =  '\',\'translate\',\'left=100,top=100,width=400,height=200\')" '.
420
                    'style="background-color:red; color:white; font-style:Courier; font-size:12px;">&nbsp;T&nbsp;</a>';
421
 
422
        foreach( $this->_translated['strings'] as $aString )             // search for each single string and try to translate it
423
        {
424
            $englishString = preg_quote($aString['string']);
425
 
426
            if( $aString['numSubPattern'] )         // if the string is a regExp, we need to update $lastSubpattern
427
            {
428
                $englishString = $aString['string'];// we should not preg_quote the string
429
                $lastSubpattern = '$'.( 2 + $aString['numSubPattern'] );    // set $lastSubpattern properly
430
            }
431
 
432
            $link = $linkBegin.urlencode($englishString).$linkEnd;
433
            $input = preg_replace( '/(\s*>\s*)('.$englishString.')(\s*<\/a>)/isU' , '$1$2$3'.$link , $input );
434
            $input = preg_replace( '/(<option.*>\s*)('.$englishString.')(.*<\/select>)/isU' , '$1$2$3'.$link , $input );
435
            $input = preg_replace(  '/(<input[^>]*type=.?(button|submit|reset)[^>]*value=.?\s*)'.
436
                                    '('.$englishString.')([^>]*>)/isU' , '$1$3$4'.$link , $input );
437
        }
438
        return $input;
439
    }
440
 
441
#        '>\s*'                          =>  '\s*<', // this mostly applies, that a text is inbetween '>' and '<'
442
#        '<\s*input .*value=["\']?\s*'   =>  '\s*["\']?.*>'  // this is for input button's values
443
*/
444
 
445
} // end of class
446
?>