From 224f4580ae8d388eeaf7defdfbfbf2d9610fd34c Mon Sep 17 00:00:00 2001 From: Xevion Date: Wed, 21 Aug 2019 18:43:04 -0500 Subject: [PATCH] learning.MD --- .vscode/settings.json | 2 +- LEARNING.md | 30 ++++++++++++ README.md | 8 +++- package/__pycache__/xmp.cpython-37.pyc | Bin 1252 -> 1124 bytes package/app.py | 64 +++++++++++++++---------- 5 files changed, 78 insertions(+), 26 deletions(-) create mode 100644 LEARNING.md diff --git a/.vscode/settings.json b/.vscode/settings.json index ebbb9f4..274aa4e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { - "python.pythonPath": "X:\\Users\\Xevion\\Anaconda3\\python.exe" + "python.pythonPath": "F:\\Installations\\WPy64-3740\\python-3.7.4.amd64\\python.exe" } \ No newline at end of file diff --git a/LEARNING.md b/LEARNING.md new file mode 100644 index 0000000..6e9a0cb --- /dev/null +++ b/LEARNING.md @@ -0,0 +1,30 @@ +# photo-tagging + +## First Objectives + +This project really was an experience, I'll not ever deny that. + +My first goal was to basically be able to read and image, upload to Google and receive back labels. +This was accomplished very quickly, and the next step was either learning how to parse the weird `.XMP` files I had found in the folder. +I did not understand them and why they were detached from the `.NEF`, and only later did I learn later that only `RAW` format files use this XMP file, which I only recently learned was called a `Sidecar file` in filesystems. + +I started off with trying to parse, read, and write tags to and from an XMP file quickly. I had much trouble with this, as I could not find a working (Windows) Python module for `XMP` parsing fast enough, as the only one available required `exempi`, which was not officially supported for Windows at the time. To this date, I still have not figured out how I could have gone about it. Please message me if you know how, I'd still love to see if Exempi had what I was looking for. + +I even tried an `XML` parser, but after learning that it was in fact a `XML` parser, I dropped it, oblivious to the fact that `XML` is essentially `XMP`. + +After a long while of unsuccessful searches on the subject, I gave up and looked into how I could do it myself - with minimal effort... + + *I turned to Regex.* + +Surprisingly, Regex worked pretty well, and added tags without problem 99% of the time. However, I would only learn a long while after that there were serious caveats to what I was doing here. + +After "successfully" figuring out XMP parsing, I moved onto making a proper package and drew of a plan for how I could parse files. This one took a while since I'd never done image resizing or anything with input/output of files on this kind of scale (small scale) before. + +After a couple of hours, I had a working prototype which took files, compressed, uploaded to Google for labelling, and received a set of labels while keeping processing/uploading times at a minimum. It was rather speedy for what I could expect, at around 0.5 images a second. + +The real punch in the gut comes now - Time to implement that actual tagging operation. I learned here that only `.NEF` files carried the `.XMP` sidecar file, and had to split up my tagging operation into IPTC tagging and XMP based tagging, and I had to think about the complex file enviroment I would need to parse - What if two files had the same name, different extension, and one was a `.NEF` with a `.XMP` sidecar file? Does the `.NEF` get priority? Do we quit? Do we give an alert? What happens here? + +I also had to think about turning this application into a commandline utility for quickly tagging all the photos on my machine - Would it overwrite files in place? What happens if my utility goes haywire? Would I be storing backups of the `.XMP` files? What happens when it's a heavily modified `.XMP` file, those can get very large! + +Before I could even think about all that, I had to verify that I could work with `.XMP` files in the first place, as my current setup using Regex was failing when it came down to adding tags to something that has never been tagged before. Worse yet, I had and still have no idea what most of the stuff present in a `.XMP` file means, so it was clear that I had to resort to something with true XMP read and write capabilities. + diff --git a/README.md b/README.md index f3cb28e..05bf07c 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,10 @@ Automatic compression of photos for minimal data usage on both your and Google's Support for .NEF RAW file compression with .XMP metadata files. -- With added support for all non .NEF files with basic \ No newline at end of file +- With added support for all non .NEF files with basic + +## Learning + +I've typed up a good file with everything I've learned about XMP files, Adobe, and just how complex this problem really was for me. + +[./LEARNING.md](./LEARNING.md) \ No newline at end of file diff --git a/package/__pycache__/xmp.cpython-37.pyc b/package/__pycache__/xmp.cpython-37.pyc index e0639aae9dc1c3200140ba32cf342bfec2a16bb2..99686cf84224a839e02bcbaba3a8d2c447278e29 100644 GIT binary patch delta 256 zcmaFD`GkYliIVrL*O1{qhwuz(?jaUn=8lV25!o}a6mCgUxR)V!49 z^30M9Ae$vEGbeQ=LlMj5^-OjWw>WHa5_6MM675((MgTEG5gU-$nB>W1Py`Z#5UfDf zFHW2E)V$P+f+9OPAh(zaNU$(e$&?pmmZXLxrWa4f&G36KEVlJ*ME)oLrqqtHlO5%a8E=esa v5&;SniGv7k5TO7f5H 1: - print('More than 1 possible binding file for \'{}\'...'.format(file)) - print('\n'.join(['>>> {}'.format(possible) for possible in possibles])) - # Zero possible files, user error, likely - elif len(possibles) <= 0: - print('Zero possible files for \'{}\'. skipping...'.format(file)) - # Process individual file - else: - print('Processing file {}, \'{}\''.format(index + 1, possibles[0]), end=' | ') - process_file(file_name=possibles[0], xmp=file) - time.sleep(0.3) + name, ext = os.path.splitext(file) + ext = ext.upper() + # Raw files contain their metadata in an XMP file usually + if ext in ['.NEF', '.CR2']: + # Get all possible files + identicals = [possible for possible in files + if possible.startswith(os.path.splitext(file)[0]) + and not possible.endswith(os.path.splitext(file)[1]) + and not possible.upper().endswith('.XMP')] + + # Alert the user that there are duplicates in the directory and ask whether or not to continue + if len(identicals) > 0: + print('Identical files were found in the directory, continue?') + print(',\n\t'.join(identicals)) + + xmps = [possible for possible in files + if possible.startswith(os.path.splitext(file)[0]) + and possible.upper().endswith('.XMP')] + + # Skip and warn if more than 1 possible files, user error + if len(xmps) > 1: + print('More than 1 possible XMP metadata file for \'{}\'...'.format(file)) + print(',\n'.join(['\t{}'.format(possible) for possible in xmps])) + # Zero possible files, user error, likely + elif len(xmps) <= 0: + print('No matching XMP metadata file for \'{}\'. skipping...'.format(file)) + # Process individual file + else: + print('Processing file {}, \'{}\''.format(index + 1, xmps[0]), end=' | ') + process_file(file_name=file, xmp_name=xmps[0]) + elif ext in ['.JPEG', '.JPG', '.PNG']: + print('Processing file {}, \'{}\''.format(index + 1, file), end=' | ') + process_file(file_name=file) + except: os.rmdir(temp_path) raise