From 224f4580ae8d388eeaf7defdfbfbf2d9610fd34c Mon Sep 17 00:00:00 2001
From: Xevion <xevioni@yandex.com>
Date: Wed, 21 Aug 2019 18:43:04 -0500
Subject: [PATCH] learning.MD

---
 .vscode/settings.json                  |   2 +-
 LEARNING.md                            |  30 ++++++++++++
 README.md                              |   8 +++-
 package/__pycache__/xmp.cpython-37.pyc | Bin 1252 -> 1124 bytes
 package/app.py                         |  64 +++++++++++++++----------
 5 files changed, 78 insertions(+), 26 deletions(-)
 create mode 100644 LEARNING.md

diff --git a/.vscode/settings.json b/.vscode/settings.json
index ebbb9f4..274aa4e 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,3 @@
 {
-    "python.pythonPath": "X:\\Users\\Xevion\\Anaconda3\\python.exe"
+    "python.pythonPath": "F:\\Installations\\WPy64-3740\\python-3.7.4.amd64\\python.exe"
 }
\ No newline at end of file
diff --git a/LEARNING.md b/LEARNING.md
new file mode 100644
index 0000000..6e9a0cb
--- /dev/null
+++ b/LEARNING.md
@@ -0,0 +1,30 @@
+# photo-tagging
+
+## First Objectives
+
+This project really was an experience, I'll not ever deny that.
+
+My first goal was to basically be able to read and image, upload to Google and receive back labels.
+This was accomplished very quickly, and the next step was either learning how to parse the weird `.XMP` files I had found in the folder.
+I did not understand them and why they were detached from the `.NEF`, and only later did I learn later that only `RAW` format files use this XMP file, which I only recently learned was called a `Sidecar file` in filesystems.
+
+I started off with trying to parse, read, and write tags to and from an XMP file quickly. I had much trouble with this, as I could not find a working (Windows) Python module for `XMP` parsing fast enough, as the only one available required `exempi`, which was not officially supported for Windows at the time. To this date, I still have not figured out how I could have gone about it. Please message me if you know how, I'd still love to see if Exempi had what I was looking for.
+
+I even tried an `XML` parser, but after learning that it was in fact a `XML` parser, I dropped it, oblivious to the fact that `XML` is essentially `XMP`.
+
+After a long while of unsuccessful searches on the subject, I gave up and looked into how I could do it myself - with minimal effort...
+
+ *I turned to Regex.*
+
+Surprisingly, Regex worked pretty well, and added tags without problem 99% of the time. However, I would only learn a long while after that there were serious caveats to what I was doing here.
+
+After "successfully" figuring out XMP parsing, I moved onto making a proper package and drew of a plan for how I could parse files. This one took a while since I'd never done image resizing or anything with input/output of files on this kind of scale (small scale) before.
+
+After a couple of hours, I had a working prototype which took files, compressed, uploaded to Google for labelling, and received a set of labels while keeping processing/uploading times at a minimum. It was rather speedy for what I could expect, at around 0.5 images a second.
+
+The real punch in the gut comes now - Time to implement that actual tagging operation. I learned here that only `.NEF` files carried the `.XMP` sidecar file, and had to split up my tagging operation into IPTC tagging and XMP based tagging, and I had to think about the complex file enviroment I would need to parse - What if two files had the same name, different extension, and one was a `.NEF` with a `.XMP` sidecar file? Does the `.NEF` get priority? Do we quit? Do we give an alert?  What happens here?
+
+I also had to think about turning this application into a commandline utility for quickly tagging all the photos on my machine - Would it overwrite files in place? What happens if my utility goes haywire? Would I be storing backups of the `.XMP` files? What happens when it's a heavily modified `.XMP` file, those can get very large!
+
+Before I could even think about all that, I had to verify that I could work with `.XMP` files in the first place, as my current setup using Regex was failing when it came down to adding tags to something that has never been tagged before. Worse yet, I had and still have no idea what most of the stuff present in a `.XMP` file means, so it was clear that I had to resort to something with true XMP read and write capabilities.
+
diff --git a/README.md b/README.md
index f3cb28e..05bf07c 100644
--- a/README.md
+++ b/README.md
@@ -16,4 +16,10 @@ Automatic compression of photos for minimal data usage on both your and Google's
 
 Support for .NEF RAW file compression with .XMP metadata files.
 
-- With added support for all non .NEF files with basic 
\ No newline at end of file
+- With added support for all non .NEF files with basic 
+
+## Learning
+
+I've typed up a good file with everything I've learned about XMP files, Adobe, and just how complex this problem really was for me.
+
+[./LEARNING.md](./LEARNING.md)
\ No newline at end of file
diff --git a/package/__pycache__/xmp.cpython-37.pyc b/package/__pycache__/xmp.cpython-37.pyc
index e0639aae9dc1c3200140ba32cf342bfec2a16bb2..99686cf84224a839e02bcbaba3a8d2c447278e29 100644
GIT binary patch
delta 256
zcmaFD`GkYliI<m)0SH!n@`=4Nkynz@V4}LLToiK(a}-MoOB8DgYYJNnLlj#IV=#jz
z`%92gzs;<So=hw&8H%_juD4V8#c7kCnwMHpP-G_slr3fg5-bc=(&a^&C8-g<0h8;P
zkJN+gC;}7QK-Mh|m?S4i2S^(OBOenF6AL2?BOjv-BZwuz$ik$+q{CDs3zXGlzr~ba
xe2clbvbcyJ$S)EE5h5Uh4Mc#{K+H$C$tE{HrBoy*)ehvYVh$i70k(#P834r4Fev~4

delta 384
zcmaFD@r0AtiI<m)0SLDA`oun)$ScWcF;QJMJB7K0A&NPLC5k15HHtNbEs8CLJ%s}(
z#-73$%%I8n5~Rg1nGr+*0f=S>VrL*O1{qhwuz(?jaUn=8lV25!o}a6mCgUxR)V!49
z^30M9Ae$vEGbeQ=LlMj5^-OjWw>WHa5_6MM675((MgTEG5gU-$nB>W1Py`Z#5UfDf
zFHW2E)V$P+f+9OPAh(zaNU$(e$&?pmmZXLxrWa4<Wj<1ms^=CbT$U50M+!(VF!C|;
zFtISQF!C|VF!C_7FiJ48Fe@<YFc&ERWi>f&G36KEVlJ*ME)oLrqqtHlO5%a8E=esa
v5&;SniGv7k5TO7f5H<o?$_zyyw*vi@o1apelWGTYB{*m#m^gq|0wEIs5`Ide

diff --git a/package/app.py b/package/app.py
index 79550e4..efce2e0 100644
--- a/package/app.py
+++ b/package/app.py
@@ -15,7 +15,7 @@ output_path = os.path.join(sys.path[0], 'package', 'processing', 'output')
 # 3) Read XMP, then write new tags to it
 # 4) Delete temporary file, move NEF/JPEG and XMP
 
-def process_file(file_name, xmp):
+def process_file(file_name, xmp_name=None):
     global client
 
     # Remove the temporary file
@@ -64,17 +64,16 @@ def process_file(file_name, xmp):
         response = client.label_detection(image=image)
         labels = [label.description for label in response.label_annotations]
         print('\tLabels: {}'.format(', '.join(labels)))
-        if ext == '.NEF':
+        if xmp_name:
             print('\tWriting {} tags to output XMP...'.format(len(labels)))
-            xmp.writeXMP(os.path.join(input_path, xmp), os.path.join(output_path, xmp), labels)
+            xmp.writeXMP(os.path.join(input_path, xmp_name), os.path.join(output_path, xmp_name), labels)
         else:
             print('\tWriting {} tags to output {}'.format(len(labels), ext[1:].upper()))
-            info = iptcinfo3.IPTCInfo(os.path.join(input_path, xmp))
+            info = iptcinfo3.IPTCInfo(os.path.join(input_path, file_name))
             info['keywords'].extend(labels)
             info.save()
-            # Remove the strange ghost file
-            os.remove(os.path.join(input_path, xmp) + '~')
             print('\tMoving associated original image file...')
+
         # Copy dry-run
         shutil.copy2(os.path.join(input_path, file_name), os.path.join(output_path, file_name))
         # os.rename(os.path.join(input_path, file_name), os.path.join(output_path, file_name))
@@ -103,7 +102,7 @@ def run():
 
     # Find files we want to process based on if they have a corresponding .XMP
     files = os.listdir(input_path)
-    select = [file for file in files if os.path.splitext(file)[1] == '.xmp']
+    select = [file for file in files if os.path.splitext(file)[1] != '.xmp']
 
     # Create the 'temp' directory
     print(f'Initializing file processing for {len(select)} files...')
@@ -112,23 +111,40 @@ def run():
     try:
         # Process files
         for index, file in progressbar.progressbar(list(enumerate(select)), redirect_stdout=True, term_width=110):
-            # Get all possible files
-            possibles = [possible for possible in files if
-            possible.startswith(os.path.splitext(file)[0])
-            and not possible.endswith(os.path.splitext(file)[1])]
-            
-            # Skip and warn if more than 1 possible files, user error
-            if len(possibles) > 1:
-                print('More than 1 possible binding file for \'{}\'...'.format(file))
-                print('\n'.join(['>>> {}'.format(possible) for possible in possibles]))
-            # Zero possible files, user error, likely
-            elif len(possibles) <= 0:
-                print('Zero possible files for \'{}\'. skipping...'.format(file))
-            # Process individual file
-            else:
-                print('Processing file {}, \'{}\''.format(index + 1, possibles[0]), end=' | ')
-                process_file(file_name=possibles[0], xmp=file)
-                time.sleep(0.3)
+            name, ext = os.path.splitext(file)
+            ext = ext.upper()
+            # Raw files contain their metadata in an XMP file usually
+            if ext in ['.NEF', '.CR2']:
+                # Get all possible files
+                identicals = [possible for possible in files
+                            if possible.startswith(os.path.splitext(file)[0])
+                            and not possible.endswith(os.path.splitext(file)[1])
+                            and not possible.upper().endswith('.XMP')]
+
+                # Alert the user that there are duplicates in the directory and ask whether or not to continue
+                if len(identicals) > 0:
+                    print('Identical files were found in the directory, continue?')
+                    print(',\n\t'.join(identicals))
+
+                xmps = [possible for possible in files
+                        if possible.startswith(os.path.splitext(file)[0])
+                        and possible.upper().endswith('.XMP')]
+
+                # Skip and warn if more than 1 possible files, user error
+                if len(xmps) > 1:
+                    print('More than 1 possible XMP metadata file for \'{}\'...'.format(file))
+                    print(',\n'.join(['\t{}'.format(possible) for possible in xmps]))
+                # Zero possible files, user error, likely
+                elif len(xmps) <= 0:
+                    print('No matching XMP metadata file for \'{}\'. skipping...'.format(file))
+                # Process individual file
+                else:
+                    print('Processing file {}, \'{}\''.format(index + 1, xmps[0]), end=' | ')
+                    process_file(file_name=file, xmp_name=xmps[0])
+            elif ext in ['.JPEG', '.JPG', '.PNG']:
+                print('Processing file {}, \'{}\''.format(index + 1, file), end=' | ')
+                process_file(file_name=file)
+
     except:
         os.rmdir(temp_path)
         raise