From 35264036616a0ab50a61708b855a4c8a74a24437 Mon Sep 17 00:00:00 2001 From: Josef Citrine Date: Thu, 2 Feb 2017 01:35:53 +0000 Subject: [PATCH] #100: Duplication detection --- app/Console/Commands/ImportPonify.php | 288 ++++++++++++++++++++++++-- 1 file changed, 265 insertions(+), 23 deletions(-) diff --git a/app/Console/Commands/ImportPonify.php b/app/Console/Commands/ImportPonify.php index 656671ec..1928e98c 100644 --- a/app/Console/Commands/ImportPonify.php +++ b/app/Console/Commands/ImportPonify.php @@ -3,6 +3,7 @@ namespace Poniverse\Ponyfm\Console\Commands; use Auth; +use Carbon\Carbon; use Config; use DB; use File; @@ -139,6 +140,10 @@ class ImportPonify extends Command $getId3 = new getID3; + // Enable file hashing + $getId3->option_md5_data = true; + $getId3->option_md5_data_source = true; + // all tags read by getID3, including the cover art $allTags = $getId3->analyze($file->getPathname()); @@ -148,20 +153,83 @@ class ImportPonify extends Command // normalized tags used by Pony.fm $parsedTags = []; - if ($file->getExtension() === 'mp3') { - list($parsedTags, $rawTags) = $this->getId3Tags($allTags); - } else { - if ($file->getExtension() === 'm4a') { - list($parsedTags, $rawTags) = $this->getAtomTags($allTags); + list($parsedTags, $rawTags) = $this->parseTags($file); + + //========================================================================================================== + // Check to see if we have this track already, if so, compare hashes of the two files + //========================================================================================================== + + $artist = User::where('display_name', '=', $artist_name)->first(); + $artistId = null; + + $this->comment("Checking for duplicates"); + + if ($artist) { + $artistId = $artist->id; + } + + $existingTrack = Track::where('title', '=', $parsedTags['title']) + ->where('user_id', '=', $artistId) + ->first(); + + if ($existingTrack) { + // We got one!! + // Ok, let's not get too excited + // First let's see if we have a matching file type + + $importFormat = $this->getFormat($file->getExtension()); + if ($importFormat == null) { + // No idea what this is, skip file + $this->comment(sprintf("Not an audio file (%s), skipping...", $importFormat)); + continue; } + + $existingFile = null; + + foreach ($existingTrack->trackFiles as $trackFile) { + if ($trackFile->format == $importFormat) { + $existingFile = $trackFile; + } + } + + if ($existingFile === null) { + // Can't find a matching format + // Check to see if we have a better quality file + + } else { + $this->comment("Found existing file"); + + // Found a matching format, are they the same? + $getId3_existing = new getID3; + $getId3_existing->option_md5_data = true; + $getId3_existing->option_md5_data_source = true; + $existingFileTags = $getId3->analyze($existingFile->getFile()); + + $importHash = array_key_exists('md5_data_source', $allTags) ? $allTags['md5_data_source'] : $allTags['md5_data']; + $targetHash = array_key_exists('md5_data_source', $existingFileTags) ? $existingFileTags['md5_data_source'] : $existingFileTags['md5_data']; + + $this->info("Archive hash: " . $importHash); + $this->info("Pony.fm hash: " . $targetHash); + + if ($importHash == $targetHash) { + // Audio is identical, no need to reupload + // We can update the metadata though + // TODO: Update metadata + $this->comment("Versions are the same. Skipping...\n"); + continue; + } else { + // Audio is different. Replace if it came from MLPMA + // TODO: Replace file + } + } + } else { + $this->comment("No duplicates"); } //========================================================================================================== // Create new user for the artist if one doesn't exist //========================================================================================================== - $artist = User::where('display_name', '=', $artist_name)->first(); - if (!$artist) { $artist = new User; $artist->display_name = $artist_name; @@ -220,12 +288,7 @@ class ImportPonify extends Command $this->comment('Transcoding the track!'); Auth::loginUsingId($artist->id); - $getID3 = new getID3; - $getID3->analyze($file->getPathname()); - - $mime = null; - - if (isset($getID3->info['mime_type'])) $mime = $getID3->info['mime_type']; + $mime = $allTags['mime_type']; $trackFile = new UploadedFile($file->getPathname(), $file->getFilename(), $mime, null, null, true); @@ -245,13 +308,81 @@ class ImportPonify extends Command } } + protected function hashAudio($filepath) { + $hash = hash_file('crc32b', $filepath); + $array = unpack('N', pack('H*', $hash)); + return $array[1]; + } + + protected function getFormat($extension) { + foreach(Track::$Formats as $name => $format) { + if ($format['extension'] == $extension) { + return $name; + } + } + + return null; + } + + public function parseTags($file) + { + $audioCodec = $file->getExtension(); + + //========================================================================================================== + // Extract the original tags. + //========================================================================================================== + $getId3 = new getID3; + + // all tags read by getID3, including the cover art + $allTags = $getId3->analyze($file->getPathname()); + + // $rawTags => tags specific to a file format (ID3 or Atom), pre-normalization but with cover art removed + // $parsedTags => normalized tags used by Pony.fm + + if ($audioCodec === 'mp3') { + list($parsedTags, $rawTags) = $this->getId3Tags($allTags); + } elseif (Str::startsWith($audioCodec, ['aac', 'alac'])) { + list($parsedTags, $rawTags) = $this->getAtomTags($allTags); + } elseif (in_array($audioCodec, ['vorbis', 'flac'])) { + list($parsedTags, $rawTags) = $this->getVorbisTags($allTags); + } elseif (Str::startsWith($audioCodec, ['pcm', 'adpcm'])) { + list($parsedTags, $rawTags) = $this->getAtomTags($allTags); + } else { + // Assume the file is untagged if it's in an unknown format. + $parsedTags = [ + 'title' => null, + 'artist' => null, + 'band' => null, + 'genre' => null, + 'track_number' => null, + 'album' => null, + 'year' => null, + 'release_date' => null, + 'comments' => null, + 'lyrics' => null, + ]; + $rawTags = []; + } + + + return [$parsedTags, $rawTags]; + } + /** * @param array $rawTags * @return array */ protected function getId3Tags($rawTags) { - $tags = $rawTags['tags']['id3v2']; + if (array_key_exists('tags', $rawTags) && array_key_exists('id3v2', $rawTags['tags'])) { + $tags = $rawTags['tags']['id3v2']; + } elseif (array_key_exists('tags', $rawTags) && array_key_exists('id3v1', $rawTags['tags'])) { + $tags = $rawTags['tags']['id3v1']; + } else { + $tags = []; + } + + $comment = null; if (isset($tags['comment'])) { @@ -267,15 +398,22 @@ class ImportPonify extends Command $tags['comment'][0] = $comment; } + $trackNumber = 1; + if (isset($tags['track_number'])) { + $trackNumberComponents = explode('/', $tags['track_number'][0]); + $trackNumber = $trackNumberComponents[0]; + } + return [ [ - 'title' => $tags['title'][0], - 'artist' => $tags['artist'][0], + 'title' => isset($tags['title']) ? $tags['title'][0] : null, + 'artist' => isset($tags['artist']) ? $tags['artist'][0] : null, 'band' => isset($tags['band']) ? $tags['band'][0] : null, 'genre' => isset($tags['genre']) ? $tags['genre'][0] : null, - 'track_number' => isset($tags['track_number']) ? $tags['track_number'][0] : null, + 'track_number' => $trackNumber, 'album' => isset($tags['album']) ? $tags['album'][0] : null, - 'year' => isset($tags['year']) ? (int)$tags['year'][0] : null, + 'year' => isset($tags['year']) ? (int) $tags['year'][0] : null, + 'release_date' => isset($tags['release_date']) ? $this->parseDateString($tags['release_date'][0]) : null, 'comments' => $comment, 'lyrics' => isset($tags['unsynchronised_lyric']) ? $tags['unsynchronised_lyric'][0] : null, ], @@ -289,9 +427,57 @@ class ImportPonify extends Command */ protected function getAtomTags($rawTags) { - $tags = $rawTags['tags']['quicktime']; + if (array_key_exists('tags', $rawTags) && array_key_exists('quicktime', $rawTags['tags'])) { + $tags = $rawTags['tags']['quicktime']; + } else { + $tags = []; + } - $trackNumber = null; + $trackNumber = 1; + if (isset($tags['track_number'])) { + $trackNumberComponents = explode('/', $tags['track_number'][0]); + $trackNumber = $trackNumberComponents[0]; + } + + if (isset($tags['release_date'])) { + $releaseDate = $this->parseDateString($tags['release_date'][0]); + } elseif (isset($tags['creation_date'])) { + $releaseDate = $this->parseDateString($tags['creation_date'][0]); + } else { + $releaseDate = null; + } + + return [ + [ + 'title' => isset($tags['title']) ? $tags['title'][0] : null, + 'artist' => isset($tags['artist']) ? $tags['artist'][0] : null, + 'band' => isset($tags['band']) ? $tags['band'][0] : null, + 'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null, + 'genre' => isset($tags['genre']) ? $tags['genre'][0] : null, + 'track_number' => $trackNumber, + 'album' => isset($tags['album']) ? $tags['album'][0] : null, + 'year' => isset($tags['year']) ? (int) $tags['year'][0] : null, + 'release_date' => $releaseDate, + 'comments' => isset($tags['comments']) ? $tags['comments'][0] : null, + 'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null, + ], + $tags + ]; + } + + /** + * @param array $rawTags + * @return array + */ + protected function getVorbisTags($rawTags) + { + if (array_key_exists('tags', $rawTags) && array_key_exists('vorbiscomment', $rawTags['tags'])) { + $tags = $rawTags['tags']['vorbiscomment']; + } else { + $tags = []; + } + + $trackNumber = 1; if (isset($tags['track_number'])) { $trackNumberComponents = explode('/', $tags['track_number'][0]); $trackNumber = $trackNumberComponents[0]; @@ -299,18 +485,74 @@ class ImportPonify extends Command return [ [ - 'title' => $tags['title'][0], - 'artist' => $tags['artist'][0], + 'title' => isset($tags['title']) ? $tags['title'][0] : null, + 'artist' => isset($tags['artist']) ? $tags['artist'][0] : null, 'band' => isset($tags['band']) ? $tags['band'][0] : null, 'album_artist' => isset($tags['album_artist']) ? $tags['album_artist'][0] : null, 'genre' => isset($tags['genre']) ? $tags['genre'][0] : null, 'track_number' => $trackNumber, 'album' => isset($tags['album']) ? $tags['album'][0] : null, - 'year' => isset($tags['year']) ? (int)$tags['year'][0] : null, + 'year' => isset($tags['year']) ? (int) $tags['year'][0] : null, + 'release_date' => isset($tags['date']) ? $this->parseDateString($tags['date'][0]) : null, 'comments' => isset($tags['comments']) ? $tags['comments'][0] : null, 'lyrics' => isset($tags['lyrics']) ? $tags['lyrics'][0] : null, ], $tags ]; } + + /** + * Parses a potentially-partial date string into a proper date object. + * + * The tagging formats we deal with base their date format on ISO 8601, but + * the timestamp may be incomplete. + * + * @link https://code.google.com/p/mp4v2/wiki/iTunesMetadata + * @link https://wiki.xiph.org/VorbisComment#Date_and_time + * @link http://id3.org/id3v2.4.0-frames + * + * @param string $dateString + * @return null|Carbon + */ + protected function parseDateString(string $dateString) + { + switch (Str::length($dateString)) { + // YYYY + case 4: + try { + return Carbon::createFromFormat('Y', $dateString) + ->month(1) + ->day(1); + } catch (\InvalidArgumentException $e) { + return null; + } + + // YYYY-MM + case 7: + try { + return Carbon::createFromFormat('Y m', str_replace("-", " ", $dateString)) + ->day(1); + } catch (\InvalidArgumentException $e) { + return null; + } + + // YYYY-MM-DD + case 10: + try { + return Carbon::createFromFormat('Y m d', str_replace("-", " ", $dateString)); + } catch (\InvalidArgumentException $e) { + return null; + } + break; + + default: + // We might have an ISO-8601 string in our hooves. + // If not, give up. + try { + return Carbon::createFromFormat(Carbon::ISO8601, $dateString); + } catch (\InvalidArgumentException $e) { + return null; + } + } + } }