Attempts to fix UTF-16 issues

This commit is contained in:
An Phan 2015-12-18 01:56:48 +08:00
parent edf02da199
commit bf01bf67ef
7 changed files with 105 additions and 6 deletions

13
app/Facades/Util.php Normal file
View file

@ -0,0 +1,13 @@
<?php
namespace App\Facades;
use Illuminate\Support\Facades\Facade;
class Util extends Facade
{
protected static function getFacadeAccessor()
{
return 'Util';
}
}

View file

@ -3,6 +3,7 @@
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
use App\Facades\Util;
/**
* @property int id The model ID
@ -26,6 +27,8 @@ class Artist extends Model
* This makes sure they are always sane.
*
* @param $value
*
* @return string
*/
public function getNameAttribute($value)
{
@ -42,6 +45,11 @@ class Artist extends Model
*/
public static function get($name)
{
// Remove the BOM from UTF-8/16/32, as it will mess up the database constraints.
if ($encoding = Util::detectUTFEncoding($name)) {
$name = iconv($encoding, 'UTF-8//IGNORE', $name);
}
$name = trim($name) ?: self::UNKNOWN_NAME;
return self::firstOrCreate(compact('name'), compact('name'));

View file

@ -0,0 +1,31 @@
<?php
namespace App\Providers;
use App\Services\Util;
use Illuminate\Support\ServiceProvider;
class UtilServiceProvider extends ServiceProvider
{
/**
* Bootstrap the application services.
*
* @return void
*/
public function boot()
{
//
}
/**
* Register the application services.
*
* @return void
*/
public function register()
{
app()->singleton('Util', function () {
return new Util();
});
}
}

46
app/Services/Util.php Normal file
View file

@ -0,0 +1,46 @@
<?php
namespace App\Services;
class Util
{
public function __construct()
{
defined('UTF8_BOM') or define('UTF8_BOM', chr(0xEF).chr(0xBB).chr(0xBF));
defined('UTF16_LITTLE_ENDIAN_BOM') or define('UTF16_LITTLE_ENDIAN_BOM', chr(0xFF).chr(0xFE));
defined('UTF16_BIG_ENDIAN_BOM') or define('UTF16_BIG_ENDIAN_BOM', chr(0xFE).chr(0xFF));
defined('UTF32_LITTLE_ENDIAN_BOM') or define('UTF32_LITTLE_ENDIAN_BOM', chr(0xFF).chr(0xFE).chr(0x00).chr(0x00));
defined('UTF32_BIG_ENDIAN_BOM') or define('UTF32_BIG_ENDIAN_BOM', chr(0x00).chr(0x00).chr(0xFE).chr(0xFF));
}
/**
* Detects higher UTF encoded strings.
*
* @param string $str
*
* @return string|false
*/
public function detectUTFEncoding($str)
{
switch (substr($str, 0, 2)) {
case UTF16_BIG_ENDIAN_BOM:
return 'UTF-16BE';
case UTF16_LITTLE_ENDIAN_BOM:
return 'UTF-16LE';
}
switch (substr($str, 0, 3)) {
case UTF8_BOM:
return 'UTF-8';
}
switch (substr($str, 0, 4)) {
case UTF32_BIG_ENDIAN_BOM:
return 'UTF-32BE';
case UTF32_LITTLE_ENDIAN_BOM:
return 'UTF-32LE';
}
return false;
}
}

View file

@ -149,6 +149,7 @@ return [
App\Providers\EventServiceProvider::class,
App\Providers\RouteServiceProvider::class,
App\Providers\MediaServiceProvider::class,
App\Providers\UtilServiceProvider::class,
],
@ -200,6 +201,7 @@ return [
'View' => Illuminate\Support\Facades\View::class,
'Media' => App\Facades\Media::class,
'Util' => App\Facades\Util::class,
],

View file

@ -1,7 +1,6 @@
<?php
use App\Models\Artist;
use App\Models\User;
use Illuminate\Foundation\Testing\DatabaseTransactions;
class ArtistTest extends TestCase
@ -34,12 +33,12 @@ class ArtistTest extends TestCase
$this->assertEquals(Artist::UNKNOWN_NAME, Artist::get('')->name);
}
public function testNameWithWeirdCharacters()
public function testUtf16Names()
{
// Don't really think this is even necessary if the user has set a proper utf8 encoding
// for the database.
$name = '<27><>Ой°Ы&囧rz';
$artist = factory(Artist::class)->create(['name' => $name]);
$name = file_get_contents(dirname(__FILE__) . '/stubs/utf16');
$artist = Artist::get($name);
$artist = Artist::get($name); // to make sure there's no constraint exception
$this->assertEquals($artist->id, Artist::get($name)->id);
}

BIN
tests/stubs/utf16 Normal file

Binary file not shown.