mirror of
https://github.com/Poniverse/Pony.fm.git
synced 2024-11-22 04:58:01 +01:00
#100: Duplication detection
This commit is contained in:
parent
d7a59b4131
commit
3526403661
1 changed files with 265 additions and 23 deletions
|
@ -3,6 +3,7 @@
|
|||
namespace Poniverse\Ponyfm\Console\Commands;
|
||||
|
||||
use Auth;
|
||||
use Carbon\Carbon;
|
||||
use Config;
|
||||
use DB;
|
||||
use File;
|
||||
|
@ -139,6 +140,10 @@ class ImportPonify extends Command
|
|||
|
||||
$getId3 = new getID3;
|
||||
|
||||
// Enable file hashing
|
||||
$getId3->option_md5_data = true;
|
||||
$getId3->option_md5_data_source = true;
|
||||
|
||||
// all tags read by getID3, including the cover art
|
||||
$allTags = $getId3->analyze($file->getPathname());
|
||||
|
||||
|
@ -148,20 +153,83 @@ class ImportPonify extends Command
|
|||
// normalized tags used by Pony.fm
|
||||
$parsedTags = [];
|
||||
|
||||
if ($file->getExtension() === 'mp3') {
|
||||
list($parsedTags, $rawTags) = $this->getId3Tags($allTags);
|
||||
} else {
|
||||
if ($file->getExtension() === 'm4a') {
|
||||
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
|
||||
list($parsedTags, $rawTags) = $this->parseTags($file);
|
||||
|
||||
//==========================================================================================================
|
||||
// Check to see if we have this track already, if so, compare hashes of the two files
|
||||
//==========================================================================================================
|
||||
|
||||
$artist = User::where('display_name', '=', $artist_name)->first();
|
||||
$artistId = null;
|
||||
|
||||
$this->comment("Checking for duplicates");
|
||||
|
||||
if ($artist) {
|
||||
$artistId = $artist->id;
|
||||
}
|
||||
|
||||
$existingTrack = Track::where('title', '=', $parsedTags['title'])
|
||||
->where('user_id', '=', $artistId)
|
||||
->first();
|
||||
|
||||
if ($existingTrack) {
|
||||
// We got one!!
|
||||
// Ok, let's not get too excited
|
||||
// First let's see if we have a matching file type
|
||||
|
||||
$importFormat = $this->getFormat($file->getExtension());
|
||||
if ($importFormat == null) {
|
||||
// No idea what this is, skip file
|
||||
$this->comment(sprintf("Not an audio file (%s), skipping...", $importFormat));
|
||||
continue;
|
||||
}
|
||||
|
||||
$existingFile = null;
|
||||
|
||||
foreach ($existingTrack->trackFiles as $trackFile) {
|
||||
if ($trackFile->format == $importFormat) {
|
||||
$existingFile = $trackFile;
|
||||
}
|
||||
}
|
||||
|
||||
if ($existingFile === null) {
|
||||
// Can't find a matching format
|
||||
// Check to see if we have a better quality file
|
||||
|
||||
} else {
|
||||
$this->comment("Found existing file");
|
||||
|
||||
// Found a matching format, are they the same?
|
||||
$getId3_existing = new getID3;
|
||||
$getId3_existing->option_md5_data = true;
|
||||
$getId3_existing->option_md5_data_source = true;
|
||||
$existingFileTags = $getId3->analyze($existingFile->getFile());
|
||||
|
||||
$importHash = array_key_exists('md5_data_source', $allTags) ? $allTags['md5_data_source'] : $allTags['md5_data'];
|
||||
$targetHash = array_key_exists('md5_data_source', $existingFileTags) ? $existingFileTags['md5_data_source'] : $existingFileTags['md5_data'];
|
||||
|
||||
$this->info("Archive hash: " . $importHash);
|
||||
$this->info("Pony.fm hash: " . $targetHash);
|
||||
|
||||
if ($importHash == $targetHash) {
|
||||
// Audio is identical, no need to reupload
|
||||
// We can update the metadata though
|
||||
// TODO: Update metadata
|
||||
$this->comment("Versions are the same. Skipping...\n");
|
||||
continue;
|
||||
} else {
|
||||
// Audio is different. Replace if it came from MLPMA
|
||||
// TODO: Replace file
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$this->comment("No duplicates");
|
||||
}
|
||||
|
||||
//==========================================================================================================
|
||||
// Create new user for the artist if one doesn't exist
|
||||
//==========================================================================================================
|
||||
|
||||
$artist = User::where('display_name', '=', $artist_name)->first();
|
||||
|
||||
if (!$artist) {
|
||||
$artist = new User;
|
||||
$artist->display_name = $artist_name;
|
||||
|
@ -220,12 +288,7 @@ class ImportPonify extends Command
|
|||
$this->comment('Transcoding the track!');
|
||||
Auth::loginUsingId($artist->id);
|
||||
|
||||
$getID3 = new getID3;
|
||||
$getID3->analyze($file->getPathname());
|
||||
|
||||
$mime = null;
|
||||
|
||||
if (isset($getID3->info['mime_type'])) $mime = $getID3->info['mime_type'];
|
||||
$mime = $allTags['mime_type'];
|
||||
|
||||
$trackFile = new UploadedFile($file->getPathname(), $file->getFilename(), $mime, null, null, true);
|
||||
|
||||
|
@ -245,13 +308,81 @@ class ImportPonify extends Command
|
|||
}
|
||||
}
|
||||
|
||||
protected function hashAudio($filepath) {
|
||||
$hash = hash_file('crc32b', $filepath);
|
||||
$array = unpack('N', pack('H*', $hash));
|
||||
return $array[1];
|
||||
}
|
||||
|
||||
protected function getFormat($extension) {
|
||||
foreach(Track::$Formats as $name => $format) {
|
||||
if ($format['extension'] == $extension) {
|
||||
return $name;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public function parseTags($file)
|
||||
{
|
||||
$audioCodec = $file->getExtension();
|
||||
|
||||
//==========================================================================================================
|
||||
// Extract the original tags.
|
||||
//==========================================================================================================
|
||||
$getId3 = new getID3;
|
||||
|
||||
// all tags read by getID3, including the cover art
|
||||
$allTags = $getId3->analyze($file->getPathname());
|
||||
|
||||
// $rawTags => tags specific to a file format (ID3 or Atom), pre-normalization but with cover art removed
|
||||
// $parsedTags => normalized tags used by Pony.fm
|
||||
|
||||
if ($audioCodec === 'mp3') {
|
||||
list($parsedTags, $rawTags) = $this->getId3Tags($allTags);
|
||||
} elseif (Str::startsWith($audioCodec, ['aac', 'alac'])) {
|
||||
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
|
||||
} elseif (in_array($audioCodec, ['vorbis', 'flac'])) {
|
||||
list($parsedTags, $rawTags) = $this->getVorbisTags($allTags);
|
||||
} elseif (Str::startsWith($audioCodec, ['pcm', 'adpcm'])) {
|
||||
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
|
||||
} else {
|
||||
// Assume the file is untagged if it's in an unknown format.
|
||||
$parsedTags = [
|
||||
'title' => null,
|
||||
'artist' => null,
|
||||
'band' => null,
|
||||
'genre' => null,
|
||||
'track_number' => null,
|
||||
'album' => null,
|
||||
'year' => null,
|
||||
'release_date' => null,
|
||||
'comments' => null,
|
||||
'lyrics' => null,
|
||||
];
|
||||
$rawTags = [];
|
||||
}
|
||||
|
||||
|
||||
return [$parsedTags, $rawTags];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $rawTags
|
||||
* @return array
|
||||
*/
|
||||
protected function getId3Tags($rawTags)
|
||||
{
|
||||
if (array_key_exists('tags', $rawTags) && array_key_exists('id3v2', $rawTags['tags'])) {
|
||||
$tags = $rawTags['tags']['id3v2'];
|
||||
} elseif (array_key_exists('tags', $rawTags) && array_key_exists('id3v1', $rawTags['tags'])) {
|
||||
$tags = $rawTags['tags']['id3v1'];
|
||||
} else {
|
||||
$tags = [];
|
||||
}
|
||||
|
||||
|
||||
$comment = null;
|
||||
|
||||
if (isset($tags['comment'])) {
|
||||
|
@ -267,15 +398,22 @@ class ImportPonify extends Command
|
|||
$tags['comment'][0] = $comment;
|
||||
}
|
||||
|
||||
$trackNumber = 1;
|
||||
if (isset($tags['track_number'])) {
|
||||
$trackNumberComponents = explode('/', $tags['track_number'][0]);
|
||||
$trackNumber = $trackNumberComponents[0];
|
||||
}
|
||||
|
||||
return [
|
||||
[
|
||||
'title' => $tags['title'][0],
|
||||
'artist' => $tags['artist'][0],
|
||||
'title' => isset($tags['title']) ? $tags['title'][0] : null,
|
||||
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
|
||||
'band' => isset($tags['band']) ? $tags['band'][0] : null,
|
||||
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
|
||||
'track_number' => isset($tags['track_number']) ? $tags['track_number'][0] : null,
|
||||
'track_number' => $trackNumber,
|
||||
'album' => isset($tags['album']) ? $tags['album'][0] : null,
|
||||
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
|
||||
'release_date' => isset($tags['release_date']) ? $this->parseDateString($tags['release_date'][0]) : null,
|
||||
'comments' => $comment,
|
||||
'lyrics' => isset($tags['unsynchronised_lyric']) ? $tags['unsynchronised_lyric'][0] : null,
|
||||
],
|
||||
|
@ -289,9 +427,57 @@ class ImportPonify extends Command
|
|||
*/
|
||||
protected function getAtomTags($rawTags)
|
||||
{
|
||||
if (array_key_exists('tags', $rawTags) && array_key_exists('quicktime', $rawTags['tags'])) {
|
||||
$tags = $rawTags['tags']['quicktime'];
|
||||
} else {
|
||||
$tags = [];
|
||||
}
|
||||
|
||||
$trackNumber = null;
|
||||
$trackNumber = 1;
|
||||
if (isset($tags['track_number'])) {
|
||||
$trackNumberComponents = explode('/', $tags['track_number'][0]);
|
||||
$trackNumber = $trackNumberComponents[0];
|
||||
}
|
||||
|
||||
if (isset($tags['release_date'])) {
|
||||
$releaseDate = $this->parseDateString($tags['release_date'][0]);
|
||||
} elseif (isset($tags['creation_date'])) {
|
||||
$releaseDate = $this->parseDateString($tags['creation_date'][0]);
|
||||
} else {
|
||||
$releaseDate = null;
|
||||
}
|
||||
|
||||
return [
|
||||
[
|
||||
'title' => isset($tags['title']) ? $tags['title'][0] : null,
|
||||
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
|
||||
'band' => isset($tags['band']) ? $tags['band'][0] : null,
|
||||
'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null,
|
||||
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
|
||||
'track_number' => $trackNumber,
|
||||
'album' => isset($tags['album']) ? $tags['album'][0] : null,
|
||||
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
|
||||
'release_date' => $releaseDate,
|
||||
'comments' => isset($tags['comments']) ? $tags['comments'][0] : null,
|
||||
'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null,
|
||||
],
|
||||
$tags
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $rawTags
|
||||
* @return array
|
||||
*/
|
||||
protected function getVorbisTags($rawTags)
|
||||
{
|
||||
if (array_key_exists('tags', $rawTags) && array_key_exists('vorbiscomment', $rawTags['tags'])) {
|
||||
$tags = $rawTags['tags']['vorbiscomment'];
|
||||
} else {
|
||||
$tags = [];
|
||||
}
|
||||
|
||||
$trackNumber = 1;
|
||||
if (isset($tags['track_number'])) {
|
||||
$trackNumberComponents = explode('/', $tags['track_number'][0]);
|
||||
$trackNumber = $trackNumberComponents[0];
|
||||
|
@ -299,18 +485,74 @@ class ImportPonify extends Command
|
|||
|
||||
return [
|
||||
[
|
||||
'title' => $tags['title'][0],
|
||||
'artist' => $tags['artist'][0],
|
||||
'title' => isset($tags['title']) ? $tags['title'][0] : null,
|
||||
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
|
||||
'band' => isset($tags['band']) ? $tags['band'][0] : null,
|
||||
'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null,
|
||||
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
|
||||
'track_number' => $trackNumber,
|
||||
'album' => isset($tags['album']) ? $tags['album'][0] : null,
|
||||
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
|
||||
'release_date' => isset($tags['date']) ? $this->parseDateString($tags['date'][0]) : null,
|
||||
'comments' => isset($tags['comments']) ? $tags['comments'][0] : null,
|
||||
'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null,
|
||||
],
|
||||
$tags
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a potentially-partial date string into a proper date object.
|
||||
*
|
||||
* The tagging formats we deal with base their date format on ISO 8601, but
|
||||
* the timestamp may be incomplete.
|
||||
*
|
||||
* @link https://code.google.com/p/mp4v2/wiki/iTunesMetadata
|
||||
* @link https://wiki.xiph.org/VorbisComment#Date_and_time
|
||||
* @link http://id3.org/id3v2.4.0-frames
|
||||
*
|
||||
* @param string $dateString
|
||||
* @return null|Carbon
|
||||
*/
|
||||
protected function parseDateString(string $dateString)
|
||||
{
|
||||
switch (Str::length($dateString)) {
|
||||
// YYYY
|
||||
case 4:
|
||||
try {
|
||||
return Carbon::createFromFormat('Y', $dateString)
|
||||
->month(1)
|
||||
->day(1);
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// YYYY-MM
|
||||
case 7:
|
||||
try {
|
||||
return Carbon::createFromFormat('Y m', str_replace("-", " ", $dateString))
|
||||
->day(1);
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// YYYY-MM-DD
|
||||
case 10:
|
||||
try {
|
||||
return Carbon::createFromFormat('Y m d', str_replace("-", " ", $dateString));
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
return null;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
// We might have an ISO-8601 string in our hooves.
|
||||
// If not, give up.
|
||||
try {
|
||||
return Carbon::createFromFormat(Carbon::ISO8601, $dateString);
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue