Initial push, will update soon.

4b36de8c · Raymond · 378d406a · 4b36de8c · 4b36de8c · 4b36de8c
Commit 4b36de8c authored Oct 29, 2020 by Raymond
--- a/src/Python/.gitignore
+++ b/src/Python/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+OLD/
+.ipynb
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
\ No newline at end of file
--- a/src/Python/Cluster.py
+++ b/src/Python/Cluster.py
+import numpy as np
+def Regularize(DistanceMat):
+    D = DistanceMat;
+    W, H = np.shape(D)
+    for i in range(W):
+        for j in range(H):
+            D[i,j] = np.max((D[i,j], D[j,i]));
+        D[i,i] = 0;
+    bound = D;
+    bW, bH = np.shape(bound);
+    for i in range(bW):
+        bound[i,i] = 0;
+    a = np.min(bound)
+    degenerate = 1 if a == 0 else 0
+    return D, degenerate;
\ No newline at end of file
--- a/src/Python/NCD.py
+++ b/src/Python/NCD.py
+from ctypes import *
+import os
+from matplotlib import pyplot as py
+import numpy as np
+from PIL import Image
+def imList(im): 
+    if not isinstance(im, list):
+        X=[] #Create an empty list to store the numpy array (image)
+        if(len(np.shape(im)) > 2): #This is explicitly used for the gnist class. 
+            #Current gnist functions return an image with 
+            for ima in im:
+                X.append(ima)
+        else:
+            X.append(im) #Store the image in a list (For future preparation when we need to import multiple images)
+    else: 
+        X=im.copy()
+    return X
+def flifPress(im):
+    LIB = CDLL ("./lib/libflif.dll") #import libflif library
+    X=imList(im) #Function shown above, appends images to a list. Currently WORKS. 
+    ####     Zero Padding:
+    [FW2, FH2] = max(np.shape(matx) for matx in X); #Find the largest dimensions in the list of images. 
+    for i in range(len(X)):
+        [curW, curH] = np.shape(X[i]); #Current W/H
+        padW = FW2 - curW +1; padH = FH2 - curH+1; #Difference between the max and the current dimensions + 1
+        X[i] = np.pad(X[i],((0,padW),(0,padH)))
+    X = np.asarray(X) #Convert back the list to a numpy array because calculations run faster 
+    ######## Start up FLIF #################
+    create_enc = LIB.flif_create_encoder #Set up function 
+    create_enc.restype = POINTER(c_void_p) #Create new pointer class to return void pointer. 
+    enc = create_enc() #Actually start up FLIF
+    import_image_GRAY=LIB.flif_import_image_GRAY #Set up import gray image function from FLIF library.
+    import_image_GRAY.restype=POINTER(c_void_p) #Designate to return a memory encoded pointer. 
+    fim=[] #Flif Pointer. However, may need to fix this later as it currently returns void pointers as FLIF_pointers is a custom pointer
+    for count in range(len(X)):
+        [W, H] = np.shape(X[count]) #New Width/New Height of Image (could be shortened to just FW2+1 & FH2+1)
+        pim = X[count].ctypes.data_as(c_void_p); #PIM is the pointer to the current image.
+        temp=import_image_GRAY(W, H, pim, W) #Current flif pointer
+        fim.append(temp) #Append the flif pointer into the FIM list. 
+        encoder_add_image_move= LIB.flif_encoder_add_image_move
+        encoder_add_image_move(enc, fim[count]) #Use function to return a pointer
+    #####For Output##########
+    pdest=pointer(c_uint8())#Setting up Double Pointer. Will need to pass this on as a byref for double pointer
+    prsz=c_void_p(0)
+    encode_mem=LIB.flif_encoder_encode_memory
+    encode_mem(enc, byref(pdest), byref(prsz))
+    nBytes=prsz.value
+    #########Clean Up#######################
+    for fimi in fim: 
+        LIB.flif_destroy_image(fimi)
+    LIB.flif_destroy_encoder(enc)
+    LIB.flif_free_memory(pdest)
+    return nBytes
+def imNCDM(X):
+    if not isinstance(X, list):
+        raise TypeError('input X requires a list of images')
+    if len(X) <= 1:
+        return 0 
+    GX = flifPress(X); #Total bytes for entire image list. 
+    cxi = [flifPress(x) for x in X] #Fliff press individual images in the list
+    cxi = np.asarray(cxi)
+    gx= np.min(cxi)
+    gExclude = []
+    for ni in range(len(X)):
+        xExclude = X.copy()
+        del xExclude[ni] #Remove the particular image at this index
+        gExclude.append(flifPress(xExclude))
+    return ((GX - gx) / np.max(np.asarray(gExclude)))
\ No newline at end of file
--- a/src/Python/README.md
+++ b/src/Python/README.md
+# flifPress
+https://github.com/FLIF-hub/FLIF - Free losless image libary 
+## Set Up
+Set up your virtual environment by using virtualenv
+In your virtual env, install the requirements with
+##### pip install -r requirements.txt
+Once the necessary libaries are set up, you should be able to run functions from gnist.py and NCD.py.
+Note, if you're running from the command line, you will not be able to see the images (I suggest running from jupyter). However, the calculations will still work
+## Functions
+This repository contains two different files:  
+apps.py --Random apps folder that contains auxillary functions utilizing apps.py
+NCD.py -- contains functions pertaining FLIF library as well as other auxillary functions (imList, flifPress, imNCDM, Regularize). This file utilizes the FLIF library. 
+### Usage of apps.py/ (See jupyter notebook for example usage)
+getmnist(image_path, label_path) - class function that pulls image/label data from a ubyte file in your working directory. If no path for the image/label is entered as paramater, it defaults to the hand written digits files found in the working directory's library.
+getmnist().show(index) - If you know the particular image's index you want to pull from the file, you may use this function to pull the data pertaining to that image. This function will also show you that particular image.  
+getmnist().training(nsamples) - Creates a randomized but uniformly distributed training set.   
+getmnist().getmnist(target, cardinality) - Creates a training set based off target and cardinality
+getmnist().getmnistdistance(Training) - Returns distance matrix for the particular training set.  
+### Running from command line
+Open up your python terminal and import gnist
+##### from apps import *
+Once this is done, you will be able to run any of the gnist or NCD file from the command line. Please note, you will not be able to view images unless you have already set up a default viewer for your console
--- a/src/Python/apps.py
+++ b/src/Python/apps.py
+import numpy as np
+from matplotlib import pyplot as plt
+import idx2numpy
+import random as rand
+from NCD import *
+from PIL import Image
+import cv2 as cv2
+import pandas as pd
+from Cluster import *
+class getmnist:
+    def __init__(self, image_path = None, label_path = None):
+        ###    Set Image/Label Path otherwise it defaults to the handwritten digits file
+        if (not image_path and not label_path):
+            image_path = './lib/train-images.idx3-ubyte'
+            label_path = './lib/train-labels.idx1-ubyte'
+        #Import images directly with the idx2numpy library
+        #Using the initialized function to call the image is not recommended as it's the raw image file, prior to preproccessing the image for fliffpress
+        (self.images, self.labels) = (idx2numpy.convert_from_file(image_path), idx2numpy.convert_from_file(label_path))
+    def show(self, index):
+        #Function allows you to call a particular image from the dataset using it's index. 
+        if not isinstance(index, int):
+            raise TypeError('index requires int')
+        #Binarize the image
+        thresh, grayImage = cv2.threshold(self.images[index], 0, 255, cv2.THRESH_BINARY) #work around gray2ind
+        image = grayImage; label = self.labels[index];
+        plt.figure;
+        plt.imshow(image, cmap = 'gray')
+        plt.title('Label: ' + str(label), fontsize = 20, fontweight = 'bold');
+        return image
+    def training(self, nsamples):
+        if not isinstance(nsamples, int):
+            raise TypeError('nsamples requires int')
+        Training = []
+        Stored = []
+        while len(Training) < nsamples *10:
+            i = round((len(self.images) - 1)*rand.random())
+            if (self.images[i].all, self.labels[i].all) in Training:
+                #Check if the data set was  already inputed into the dataset
+                continue
+            if Stored.count(self.labels[i]) >= nsamples:
+                #Maintain Uniform distribution of true labels
+                continue
+            nt = {}
+            #### Preprocess image
+            thresh, grayImage = cv2.threshold(self.images[i], 0, 255, cv2.THRESH_BINARY) #work around gray2ind
+            nt['label'] = self.labels[i]
+            nt['im'] = grayImage
+            nt['i'] = i
+            Stored.append(self.labels[i])
+            Training.append(nt)
+        return np.asarray(Training)
+    def getmnist(self, targ, cardinality):
+        if not (isinstance(targ, np.ndarray) or isinstance(targ, list)):
+            raise TypeError("Input Targ requires list or numpy array")
+        target = np.asarray(targ) if isinstance(targ, list) else targ
+        #Import Training Set using gnist().training
+        M = self.training(cardinality)
+        Training = sorted(M, key = lambda i: i['label'])
+        reTrain = []; idxGT = [];
+        for index in Training:
+            if index['label'] in target:
+                reTrain.append(index)
+                idxGT.append(index['label']+1)
+        return np.asarray(reTrain), np.asarray(idxGT)
+    def mnistdistance(self, Training):
+        d = np.zeros((len(Training), len(Training)))
+        for i in range(len(Training)):
+            for j in range(len(Training)):
+                if (i == j):
+                    continue
+                d[i,j] = imNCDM([Training[i]['im'], Training[j]['im']])
+        return Regularize(d)[0]
+if __name__ == "__main__":
+    target = [0,1]
+    cardinality = 10
+    TrainSet, idxGT = getmnist().getmnist(target, cardinality)
+    print(TrainSet)
\ No newline at end of file
--- a/src/Python/goNISTk.py
+++ b/src/Python/goNISTk.py
+from NCD import *
+from Cluster import *
+from apps import getmnist
+import numpy as np
+target = [0,1]
+cardinality = 10
+for n in range(100):
+    xDigits = getmnist().getmnist(target, cardinality)
+    dDigits = getmnist().mnistdistance(xDigits[0])
\ No newline at end of file
--- a/src/Python/requirements.txt
+++ b/src/Python/requirements.txt
+numpy==1.18.5
+matplotlib==3.3.0
+Pillow==7.2.0
+idx2numpy==1.2.2
+opencv-python==4.4.0.42
+pandas==1.1.1
+notebook==6.1.1
\ No newline at end of file