#100: Duplication detection

This commit is contained in:
Josef Citrine 2017-02-02 01:35:53 +00:00
parent d7a59b4131
commit 3526403661

View file

@ -3,6 +3,7 @@
namespace Poniverse\Ponyfm\Console\Commands;
use Auth;
use Carbon\Carbon;
use Config;
use DB;
use File;
@ -139,6 +140,10 @@ class ImportPonify extends Command
$getId3 = new getID3;
// Enable file hashing
$getId3->option_md5_data = true;
$getId3->option_md5_data_source = true;
// all tags read by getID3, including the cover art
$allTags = $getId3->analyze($file->getPathname());
@ -148,20 +153,83 @@ class ImportPonify extends Command
// normalized tags used by Pony.fm
$parsedTags = [];
if ($file->getExtension() === 'mp3') {
list($parsedTags, $rawTags) = $this->getId3Tags($allTags);
} else {
if ($file->getExtension() === 'm4a') {
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
list($parsedTags, $rawTags) = $this->parseTags($file);
//==========================================================================================================
// Check to see if we have this track already, if so, compare hashes of the two files
//==========================================================================================================
$artist = User::where('display_name', '=', $artist_name)->first();
$artistId = null;
$this->comment("Checking for duplicates");
if ($artist) {
$artistId = $artist->id;
}
$existingTrack = Track::where('title', '=', $parsedTags['title'])
->where('user_id', '=', $artistId)
->first();
if ($existingTrack) {
// We got one!!
// Ok, let's not get too excited
// First let's see if we have a matching file type
$importFormat = $this->getFormat($file->getExtension());
if ($importFormat == null) {
// No idea what this is, skip file
$this->comment(sprintf("Not an audio file (%s), skipping...", $importFormat));
continue;
}
$existingFile = null;
foreach ($existingTrack->trackFiles as $trackFile) {
if ($trackFile->format == $importFormat) {
$existingFile = $trackFile;
}
}
if ($existingFile === null) {
// Can't find a matching format
// Check to see if we have a better quality file
} else {
$this->comment("Found existing file");
// Found a matching format, are they the same?
$getId3_existing = new getID3;
$getId3_existing->option_md5_data = true;
$getId3_existing->option_md5_data_source = true;
$existingFileTags = $getId3->analyze($existingFile->getFile());
$importHash = array_key_exists('md5_data_source', $allTags) ? $allTags['md5_data_source'] : $allTags['md5_data'];
$targetHash = array_key_exists('md5_data_source', $existingFileTags) ? $existingFileTags['md5_data_source'] : $existingFileTags['md5_data'];
$this->info("Archive hash: " . $importHash);
$this->info("Pony.fm hash: " . $targetHash);
if ($importHash == $targetHash) {
// Audio is identical, no need to reupload
// We can update the metadata though
// TODO: Update metadata
$this->comment("Versions are the same. Skipping...\n");
continue;
} else {
// Audio is different. Replace if it came from MLPMA
// TODO: Replace file
}
}
} else {
$this->comment("No duplicates");
}
//==========================================================================================================
// Create new user for the artist if one doesn't exist
//==========================================================================================================
$artist = User::where('display_name', '=', $artist_name)->first();
if (!$artist) {
$artist = new User;
$artist->display_name = $artist_name;
@ -220,12 +288,7 @@ class ImportPonify extends Command
$this->comment('Transcoding the track!');
Auth::loginUsingId($artist->id);
$getID3 = new getID3;
$getID3->analyze($file->getPathname());
$mime = null;
if (isset($getID3->info['mime_type'])) $mime = $getID3->info['mime_type'];
$mime = $allTags['mime_type'];
$trackFile = new UploadedFile($file->getPathname(), $file->getFilename(), $mime, null, null, true);
@ -245,13 +308,81 @@ class ImportPonify extends Command
}
}
protected function hashAudio($filepath) {
$hash = hash_file('crc32b', $filepath);
$array = unpack('N', pack('H*', $hash));
return $array[1];
}
protected function getFormat($extension) {
foreach(Track::$Formats as $name => $format) {
if ($format['extension'] == $extension) {
return $name;
}
}
return null;
}
public function parseTags($file)
{
$audioCodec = $file->getExtension();
//==========================================================================================================
// Extract the original tags.
//==========================================================================================================
$getId3 = new getID3;
// all tags read by getID3, including the cover art
$allTags = $getId3->analyze($file->getPathname());
// $rawTags => tags specific to a file format (ID3 or Atom), pre-normalization but with cover art removed
// $parsedTags => normalized tags used by Pony.fm
if ($audioCodec === 'mp3') {
list($parsedTags, $rawTags) = $this->getId3Tags($allTags);
} elseif (Str::startsWith($audioCodec, ['aac', 'alac'])) {
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
} elseif (in_array($audioCodec, ['vorbis', 'flac'])) {
list($parsedTags, $rawTags) = $this->getVorbisTags($allTags);
} elseif (Str::startsWith($audioCodec, ['pcm', 'adpcm'])) {
list($parsedTags, $rawTags) = $this->getAtomTags($allTags);
} else {
// Assume the file is untagged if it's in an unknown format.
$parsedTags = [
'title' => null,
'artist' => null,
'band' => null,
'genre' => null,
'track_number' => null,
'album' => null,
'year' => null,
'release_date' => null,
'comments' => null,
'lyrics' => null,
];
$rawTags = [];
}
return [$parsedTags, $rawTags];
}
/**
* @param array $rawTags
* @return array
*/
protected function getId3Tags($rawTags)
{
if (array_key_exists('tags', $rawTags) && array_key_exists('id3v2', $rawTags['tags'])) {
$tags = $rawTags['tags']['id3v2'];
} elseif (array_key_exists('tags', $rawTags) && array_key_exists('id3v1', $rawTags['tags'])) {
$tags = $rawTags['tags']['id3v1'];
} else {
$tags = [];
}
$comment = null;
if (isset($tags['comment'])) {
@ -267,15 +398,22 @@ class ImportPonify extends Command
$tags['comment'][0] = $comment;
}
$trackNumber = 1;
if (isset($tags['track_number'])) {
$trackNumberComponents = explode('/', $tags['track_number'][0]);
$trackNumber = $trackNumberComponents[0];
}
return [
[
'title' => $tags['title'][0],
'artist' => $tags['artist'][0],
'title' => isset($tags['title']) ? $tags['title'][0] : null,
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
'band' => isset($tags['band']) ? $tags['band'][0] : null,
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
'track_number' => isset($tags['track_number']) ? $tags['track_number'][0] : null,
'track_number' => $trackNumber,
'album' => isset($tags['album']) ? $tags['album'][0] : null,
'year' => isset($tags['year']) ? (int)$tags['year'][0] : null,
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
'release_date' => isset($tags['release_date']) ? $this->parseDateString($tags['release_date'][0]) : null,
'comments' => $comment,
'lyrics' => isset($tags['unsynchronised_lyric']) ? $tags['unsynchronised_lyric'][0] : null,
],
@ -289,9 +427,57 @@ class ImportPonify extends Command
*/
protected function getAtomTags($rawTags)
{
if (array_key_exists('tags', $rawTags) && array_key_exists('quicktime', $rawTags['tags'])) {
$tags = $rawTags['tags']['quicktime'];
} else {
$tags = [];
}
$trackNumber = null;
$trackNumber = 1;
if (isset($tags['track_number'])) {
$trackNumberComponents = explode('/', $tags['track_number'][0]);
$trackNumber = $trackNumberComponents[0];
}
if (isset($tags['release_date'])) {
$releaseDate = $this->parseDateString($tags['release_date'][0]);
} elseif (isset($tags['creation_date'])) {
$releaseDate = $this->parseDateString($tags['creation_date'][0]);
} else {
$releaseDate = null;
}
return [
[
'title' => isset($tags['title']) ? $tags['title'][0] : null,
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
'band' => isset($tags['band']) ? $tags['band'][0] : null,
'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null,
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
'track_number' => $trackNumber,
'album' => isset($tags['album']) ? $tags['album'][0] : null,
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
'release_date' => $releaseDate,
'comments' => isset($tags['comments']) ? $tags['comments'][0] : null,
'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null,
],
$tags
];
}
/**
* @param array $rawTags
* @return array
*/
protected function getVorbisTags($rawTags)
{
if (array_key_exists('tags', $rawTags) && array_key_exists('vorbiscomment', $rawTags['tags'])) {
$tags = $rawTags['tags']['vorbiscomment'];
} else {
$tags = [];
}
$trackNumber = 1;
if (isset($tags['track_number'])) {
$trackNumberComponents = explode('/', $tags['track_number'][0]);
$trackNumber = $trackNumberComponents[0];
@ -299,18 +485,74 @@ class ImportPonify extends Command
return [
[
'title' => $tags['title'][0],
'artist' => $tags['artist'][0],
'title' => isset($tags['title']) ? $tags['title'][0] : null,
'artist' => isset($tags['artist']) ? $tags['artist'][0] : null,
'band' => isset($tags['band']) ? $tags['band'][0] : null,
'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null,
'genre' => isset($tags['genre']) ? $tags['genre'][0] : null,
'track_number' => $trackNumber,
'album' => isset($tags['album']) ? $tags['album'][0] : null,
'year' => isset($tags['year']) ? (int)$tags['year'][0] : null,
'year' => isset($tags['year']) ? (int) $tags['year'][0] : null,
'release_date' => isset($tags['date']) ? $this->parseDateString($tags['date'][0]) : null,
'comments' => isset($tags['comments']) ? $tags['comments'][0] : null,
'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null,
],
$tags
];
}
/**
* Parses a potentially-partial date string into a proper date object.
*
* The tagging formats we deal with base their date format on ISO 8601, but
* the timestamp may be incomplete.
*
* @link https://code.google.com/p/mp4v2/wiki/iTunesMetadata
* @link https://wiki.xiph.org/VorbisComment#Date_and_time
* @link http://id3.org/id3v2.4.0-frames
*
* @param string $dateString
* @return null|Carbon
*/
protected function parseDateString(string $dateString)
{
switch (Str::length($dateString)) {
// YYYY
case 4:
try {
return Carbon::createFromFormat('Y', $dateString)
->month(1)
->day(1);
} catch (\InvalidArgumentException $e) {
return null;
}
// YYYY-MM
case 7:
try {
return Carbon::createFromFormat('Y m', str_replace("-", " ", $dateString))
->day(1);
} catch (\InvalidArgumentException $e) {
return null;
}
// YYYY-MM-DD
case 10:
try {
return Carbon::createFromFormat('Y m d', str_replace("-", " ", $dateString));
} catch (\InvalidArgumentException $e) {
return null;
}
break;
default:
// We might have an ISO-8601 string in our hooves.
// If not, give up.
try {
return Carbon::createFromFormat(Carbon::ISO8601, $dateString);
} catch (\InvalidArgumentException $e) {
return null;
}
}
}
}