view Preprocessing/preomr.py @ 2:46fb79167a61 tip

Main Code
author Victor Padilla <victor.padilla.mc@gmail.com>
date Mon, 04 May 2015 22:56:18 +0200
parents
children
line wrap: on
line source
import sys
import cv2
import numpy as np
import math
import copy

from gamera.core import *
from gamera.toolkits.musicstaves import musicstaves_rl_roach_tatem
from gamera.toolkits.musicstaves import musicstaves_rl_fujinaga
from gamera.toolkits.musicstaves import stafffinder_miyao
from gamera.toolkits.musicstaves import stafffinder_dalitz
from gamera.toolkits.musicstaves import stafffinder_projections
from gamera.plugins import numpy_io
init_gamera()

#import ossiafinder_dalitz

def intersect(r1,r2):
    """Returns the intersection of two rectangles"""
    x1 = max(r1['x'], r2['x'])
    y1 = max(r1['y'], r2['y'])
    x2 = min(r1['x'] + r1['width'],  r2['x'] + r2['width'])
    y2 = min(r1['y'] + r1['height'], r2['y'] + r2['height'])
    result = {"x": x1, "y": y1, "width": x2 - x1, "height": y2-y1}
    result['area'] = result['width'] * result['height']
    return(result)

def ydist(r1,r2):
    """distance on y-axis between two non-interecting rectangles"""
    top1 = r1['y']
    bottom1 = r1['y'] + r1['height']

    top2 = r2['y']
    bottom2 = r2['y'] + r2['height']
    return(min(abs(top1-bottom2), abs(top2-bottom1)))
    

def show(img, factor=0.5):
    """ show an image until the escape key is pressed
    :param factor: scale factor (default 0.5, half size)
    """
    if factor != 1.0:
        img = cv2.resize(img, (0,0), fx=factor, fy=factor) 

    cv2.imwrite('show.png',img)
#    while(1):
#        k = cv2.waitKey(0)
#        if k==27:    # Esc key to quit
#            cv2.destroyAllWindows()
#            exit()
#        if k==32:    # Space to stop
#            cv2.destroyAllWindows()
#            break


def max_staff_height(blob):
    result = 0
    for staff in blob['staves']:
        top = staff[0].y_list[0]
        bottom = staff[-1].y_list[0]
        result = max(result, bottom-top)
    return(result)


def deskew(img):
    """Deskews the given image based on lines detected with opencv's
    HoughLines function."""
    print "Deskewing."
    imgHeight, imgWidth, imgDepth = img.shape
    img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    img_edges = cv2.Canny(img_gray,50,150,apertureSize = 3)
    minLineLength = int(imgWidth*0.5)
    houghThresh = int(imgWidth*0.15)
    maxLineGap = 10
    #lines = cv2.HoughLinesP(img_edges,1,np.pi/(180*1),houghThresh,minLineLength,maxLineGap)
    lines = cv2.HoughLines(img_edges,1,np.pi/(180*3),houghThresh)

    angles = []
    for rho,theta in lines[0]:
        angles.append((theta - (np.pi / 2)))
        
        a = np.cos(theta)
        b = np.sin(theta)
        x0 = a*rho
        y0 = b*rho
        x1 = int(x0 + imgWidth*(-b))
        y1 = int(y0 + imgWidth*(a))
        x2 = int(x0 - imgWidth*(-b))
        y2 = int(y0 - imgWidth*(a))
        #cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2)

    middle = np.median(angles)
    middle_deg = middle * (180/np.pi)
    
    rotation = cv2.getRotationMatrix2D((imgWidth/2,imgHeight/2),middle_deg,1.0)
    
    # rotate while inverted. the background is filled with zeros
    # (black), this inversion means that ends up white
    deskewed = cv2.bitwise_not(cv2.warpAffine(cv2.bitwise_not(img),
                                              rotation,
                                              (imgWidth,imgHeight))
                           )
    return(deskewed)

class PreOMR(object):
    stavelineWidthThresh = 0.5
    
    def __init__(self, infile, deskew=False):
        self.debug = True
        self.infile = infile
        self.img = cv2.imread(self.infile)
        if deskew:
            self.img = deskew(self.img)
        self.original = self.img
        self.imgHeight, self.imgWidth, self.imgDepth = self.img.shape
        self.img_gray = cv2.cvtColor(self.img,cv2.COLOR_BGR2GRAY)
        if self.debug:
            self.debug_img = self.img.copy()
        ret2,self.img_binary = cv2.threshold(self.img_gray, 
                                             0,255,cv2.
                                             THRESH_BINARY+cv2.
                                             THRESH_OTSU)
    
    def staffline_removal(self):
        gamera_img = numpy_io.from_numpy(self.img)
        #self.save('tmp.png')
        #gamera_img = load_image('tmp.png')

        #ms = musicstaves_rl_roach_tatem.MusicStaves_rl_roach_tatem(gamera_img)
        ms = musicstaves_rl_fujinaga.MusicStaves_rl_fujinaga(gamera_img)
        cv2.imwrite('tmp.png', self.img)
        ms.remove_staves(crossing_symbols = 'bars')
        ms.image.save_PNG("tmpb.png")
        staffless = cv2.imread("tmp.png", cv2.CV_LOAD_IMAGE_GRAYSCALE)
        return(staffless)

    def find_staves(self, img):
        gamera_img = numpy_io.from_numpy(img)
        #sf = stafffinder_projections.StaffFinder_projections(gamera_img)
        #sf.find_staves(follow_wobble=True,preprocessing=0)
        #sf = stafffinder_dalitz.StaffFinder_dalitz(gamera_img)
        sf = stafffinder_miyao.StaffFinder_miyao(gamera_img)
        sf.find_staves()
        #sf.find_staves(debug=2)

        staves = sf.get_skeleton()
#        if self.debug:
#            for i, staff in enumerate(staves):
#                print "Staff %d has %d staves:" % (i+1, len(staff))
#                for j, line in enumerate(staff):
#                    print("    %d. line at (%d,%d)" % (j+1,line.left_x,line.y_list[0]))
        return(staves)

    def find_blobs(self, img_binary):
        """Find blobs in the given image, returned as a list of associative
        lists containing various cheap metrics for each blob."""
        
        blobs = []
        img_inverted = cv2.bitwise_not(img_binary)
        contours, hierarchy = cv2.findContours(img_inverted,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
        for (i, c) in enumerate(contours):
            blob = {}
            blobs.append(blob)

            blob['area'] = cv2.contourArea(c)

            m = cv2.moments(c)
            if m['m00'] == 0: # When would this be true?
                blob['x'] = 0
                blob['y'] = 0
            else:
                blob['x'] = m['m10'] / m['m00']
                blob['y'] = m['m01'] / m['m00']

            blob['contour'] = c

            rect = cv2.boundingRect(c)
            blob['rect'] = {'x': rect[0], 
                            'y': rect[1], 
                            'width': rect[2], 
                            'height': rect[3]
                           }
            blob['boundingRect'] = rect
            blob['hull'] = hull = cv2.convexHull(c)
            blob['hull_area'] = abs(cv2.contourArea(hull))
            blob['system'] = False
            blob['parent'] = None
            #blob['perimeter'] = perimeter = cv2.arcLength(c, True)
            #blob['roundness'] = (perimeter * 0.282) / math.sqrt(area)
            #(centre, axes, orientation) = cv2.fitEllipse(c)
            #blob['orientation'] = orientation / 180
            #print "orientation: %f" % orientation
            #blob['aspect'] = float(rect[1]) / float(rect[3])

        return blobs

    def find_bars(self, system):
        staffless = self.staffline_removal()
        blobs = self.blobs

        """Finds the barlines in the system, given a binary image, a hash of
        info about the system, and blobs detected in the image.
        
        """
        img = system['image']

        for staff in system['staves']:
            min_x = 0
            max_x = self.imgWidth

            for line in staff:
                min_x = max(min_x, line.left_x)
                max_x = min(max_x, line.left_x + len(line.y_list))

                if self.debug:
                    for (i,y) in enumerate(line.y_list):
                        x = line.left_x + i
                        cv2.line(self.debug_img,(x,y),(x,y),(0,255,0),3)
                
#            cv2.line(img,(0,int(start)),(imgWidth,int(start)),(0,255,255),3)
#            cv2.line(img,(0,int(stop)),(imgWidth,int(stop)),(0,255,255),3)
#            cv2.line(img,(0,int(first_staveline)),(imgWidth,int(first_staveline)),(255,255,0),3)
#            cv2.line(img,(0,int(last_staveline)),(imgWidth,int(last_staveline)),(255,255,0),3)

            # assuming single staff for now..
            barlines = [0]
            system['barlines'] = barlines

            x_projection = []
            
            for x in range(min_x, max_x):
                first_staveline = staff[0].y_list[x - staff[0].left_x]
                last_staveline = staff[-1].y_list[x - staff[-1].left_x]

                #print("Stavelines: first %d last %d" % (first_staveline, last_staveline))
                stave_height = last_staveline - first_staveline

                # mean distance between stavelines
                avg_inter = float(stave_height) / float(len(staff)-1)
                #print("avg_inter: %f" % (avg_inter,))

                # where to look a bit above and below the stave for whitespace
                # above a barline
                gap = avg_inter / 2.0
                start = first_staveline - gap
                stop = last_staveline + gap

                # above stave, stave and below stave
                top = float(gap - 
                            cv2.countNonZero(staffless[start:first_staveline, 
                                                       x:x+1])) / float(gap)
                mid = float(stave_height - 
                            cv2.countNonZero(staffless[first_staveline:last_staveline,
                                                       x:x+1])
                           ) / float(stave_height)
                bot = float(gap - 
                            cv2.countNonZero(staffless[last_staveline:stop, x:x+1])
                           ) / float(gap)
                x_projection.append((top,mid,bot))
    
            barline_start = -1
            gap_dist = avg_inter/4
            gap_min = (avg_inter/float(stave_height)) * 0.3
            gap_tolerance = int(avg_inter/10)
            
            margin = int(avg_inter*2)
            
            for x in range(min_x+margin, max_x-margin):
                (top,mid,bot) = x_projection[x - min_x]
                #if self.debug:
                    #cv2.line(system['image'],(x,first_staveline),(x,int(first_staveline+((last_staveline-first_staveline)*mid))),(255,255,0),1)

                # found start of barline candidate
                if top < 0.6 and bot < 0.6 and mid > 0.95:
                    if barline_start < 0:
                        barline_start = x
                else:
                    if barline_start > 0: 
                        # check there is nothing either side of 'barline'
                        barline_stop = x-1
                        barline_mid = barline_stop - ((barline_stop - barline_start)/2)
                        #print("barline start %d stop %d mid %d" % (barline_start, barline_stop, barline_mid))
                        left = int(max(0,barline_start-gap_dist))
                        right = int(min(system['width']-1,(x-1)+gap_dist))
                    
                        total = 0
                        for i in range(left-gap_tolerance, left+gap_tolerance+1):
                            total = total + x_projection[i-min_x][1]
                        left_avg = total / ((gap_tolerance*2)+1)

                        total = 0
                        for i in range(right-gap_tolerance, right+gap_tolerance+1):
                            total = total + x_projection[i-min_x][1]
                        right_avg = total / ((gap_tolerance*2)+1)
                    
                        cv2.line(img,(left,first_staveline),(left,last_staveline),(255,0,255),1)
                        cv2.line(img,(right,first_staveline),(right,last_staveline),(255,0,255),1)

                        if (left_avg <= gap_min and right_avg <= gap_min):
                            #print("success: left_avg %f right_avg %f" % (left_avg, right_avg))
                            cv2.line(img,(barline_mid,first_staveline),(barline_mid,last_staveline),(255,0,0),3)
                            barlines.append(barline_mid)
                        else:
                            #print("fail: left_avg %f right_avg %f" % (left_avg, right_avg))
                            cv2.line(img,(barline_mid,first_staveline),(barline_mid,last_staveline),(0,255,0),3)
                        #show(img)
                        barline_start = -1
            (x1, y1, x2, y2) = system['location']
            #show(system['image'][y1:y2, x1:x2])

    def extract_bars(self, system, blobs):
        """Given information about a system (including identified barlines),
        and all the blobs on a page returns a list of bars in the system,
        each an associative array containing image and location.
        
        """

        img = system['image']

        barlines = system['barlines']

        result = []

        for i in range(0,len(barlines)):
            barstart = barlines[i]
            if i == (len(barlines)-1):
                barstop = system['width']
            else:
                barstop = barlines[i+1]
            #print("barstart %d barstop %d" % (barstart, barstop))
            contours = [system['contour']]
            x1 = barstart
            y1 = system['location'][1]
            x2 = barstop
            y2 = system['location'][3]
            h = y2 - y1
            w = x2 - x1
            #print("height %d width %d" % (h, w))
            for blob in blobs:
                if blob['parent'] == system:
                    if blob['rect']['x'] >= barstart and blob['rect']['x'] + blob['rect']['width'] <= barstop:
                        contours.append(blob['contour'])

            mask = np.zeros((self.imgHeight,self.imgWidth,1), np.uint8)

            cv2.drawContours(mask, contours, -1, 255, -1);
        
            inv = cv2.bitwise_not(img)
            dest = cv2.bitwise_and(inv,inv,mask = mask)
            dest = cv2.bitwise_not(dest)
            img_bar = dest[y1:y2, x1:x2]
            bar = {'image': img_bar,
                   'page': dest,
                   'location': [x1,y1,x2,y2]
            }
            result.append(bar)
            #show(img_bar)
        return(result)



    def find_staveblobs(self, cutstaves=False,img=None):
        if img == None:
            img = self.img
            img_binary = self.img_binary
        else:
            img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            ret2,img_binary = cv2.threshold(img_gray, 
                                            0,255,cv2.
                                            THRESH_BINARY+cv2.
                                            THRESH_OTSU)

        staves = self.find_staves(img)
        blobs = self.find_blobs(img_binary)

        staveblobs = []
        otherblobs = []

        if self.debug:
            for staff in staves:
                for line in staff:
                    y = line.y_list[0]
                    cv2.line(self.debug_img,(0,y),(self.imgWidth,y),(0,255,0),3)
        for blob in blobs:
            rect = blob['rect']
            blob['staves'] = []
            blob['system'] = False
            # large enough to contain a stave?
            blob['large'] = False
            if rect['width'] > (self.imgWidth * self.stavelineWidthThresh):
                blob['large'] = True
                if self.debug:
                    cv2.drawContours(self.debug_img,[blob['contour']],-1, (0, 255,255),2)
                for staff in staves:
                    inside = True
                    for staveline in staff:
                        leftmost = staveline.y_list[0]
                        # all stafflines have to be in blob
                        if leftmost < rect['y'] or leftmost > (rect['y'] + rect['height']):
                            inside = False
                            break
                    if inside:
                        blob['system'] = True
                        blob['staves'].append(staff)
            if blob['system']:
                staveblobs.append(blob)
                print("found system with %d staves" % len(blob['staves']))
                if self.debug:
                    cv2.drawContours(self.debug_img,[blob['contour']],-1, (0, 0,255), 2)
            else:
                otherblobs.append(blob)

        return(staveblobs, otherblobs)

    def find_systems(self):
        img = self.img_binary
        #print "finding staves"
        (staveblobs, otherblobs) = self.find_staveblobs()
        #print("found %d staves" % (len(staveblobs),))
        blobs = staveblobs + otherblobs
        self.blobs = blobs
        # systems = []
        systems = staveblobs
        
        # attach disconnected bits in bounding box
        tidied = 0
        for blob in blobs:
            if not blob['system']:
                blob['parent'] = None
                for system in systems:
                    rect = intersect(system['rect'], blob['rect'])
                    if (rect['height'] > 0 and rect['width'] > 0):
                        # Biggest intersection wins
                        if (blob['parent'] == None) or (rect['area'] > blob['intersection']['area']):
                            blob['parent'] = system
                            blob['intersection'] = rect

                # Just assign to closest bounding rectangle on y-axis
                if blob['parent'] == None:
                    mindist = None
                    for system in systems:
                        dist = ydist(system['rect'], blob['rect'])
                        if mindist == None or mindist > dist:
                            blob['parent'] = system
                            mindist = dist
                    if blob['parent'] == None:
                        print "wtf"
                    else:
                        tidied = tidied + 1
        #print "tidied %d" % tidied

        # create new image for systems
        for system in systems:
            contours = [system['contour']]
            x1 = system['rect']['x']
            y1 = system['rect']['y']
            x2 = system['rect']['x'] + system['rect']['width']
            y2 = system['rect']['y'] + system['rect']['height']

            children = 0
            for blob in blobs:
                if blob['parent'] == system:
                    children = children + 1
                    contours.append(blob['contour'])
                    # include blob in image size/location
                    x1 = min(x1, blob['rect']['x'])
                    y1 = min(y1, blob['rect']['y'])
                    x2 = max(x2, blob['rect']['x'] + blob['rect']['width'])
                    y2 = max(y2, blob['rect']['y'] + blob['rect']['height'])

            #print("found %d children" % children)

            mask = np.zeros((self.imgHeight,self.imgWidth,1), np.uint8)

            cv2.drawContours(mask, contours, -1, 255, -1);
            #src = img[x1:y1, x2:y2]
            #srcMask = mask[y1:y2, x1:x2]
            kernel = np.ones((4,4),np.uint8)
            mask=cv2.dilate(mask,kernel,iterations=3)

            inv = cv2.bitwise_not(self.img)
            dest = cv2.bitwise_and(inv,inv,mask = mask)
            dest = cv2.bitwise_not(dest)

            (h,w,d) = dest.shape
            system['image'] = dest
            system['location'] = (x1, y1, x2, y2)
            system['height'] = h
            system['width'] = w

            min_x = self.imgWidth

            for staff in system['staves']:
                for line in staff:
                    min_x = min(min_x, line.left_x)

            system['stave_min_x'] = min_x

            #self.find_bars(system)
            #system['bar_images'] = self.extract_bars(system, blobs)
        if self.debug:
            cv2.imwrite('debug.png', self.debug_img)
        return(systems,blobs)

    def blob_image(self,img,blob):
        r = blob['rect']
        y1 = r['y']
        x1 = r['x']
        y2 = r['y'] + r['height']
        x2 = r['x'] + r['width']
        return(img[y1:y2, x1:x2])

#    def join_broken_staves(self):
#        img = self.img
#        (staveblobs, otherblobs) = self.find_staveblobs()
#        for i in range(0, len(staveblobs)-1):
#            for j in range(i, len(staveblobs)):
#                a = staveblobs[i]
#                b = staveblobs[j]
#                atop = a['rect']['x']
#                abot = a['rect']['x'] + a['rect']['height']
#                btop = b['rect']['x']
#                bbot = b['rect']['x'] + b['rect']['height']
#                if atop > btop and a
        

    def remove_ossia(self):
        img = self.img
        
        ossia_mask = np.ones(self.img.shape[:2], dtype="uint8") * 255

        (staveblobs, otherblobs) = self.find_staveblobs()
        staff_heights = map(lambda s: max_staff_height(s), staveblobs)
        staff_height = max(staff_heights)
        height_thresh = staff_height * 0.75

        ossias = filter(lambda s: max_staff_height(s) < height_thresh, staveblobs)
        
        print("blobs %d/%d" % (len(staveblobs), len(otherblobs)))
        #staves = self.find_staves(img)

        working_img = img.copy()

        for blob in staveblobs:
            miny = self.imgHeight
            for staff in blob['staves']:
                staffline = staff[0]
                miny = min(min(staffline.y_list),miny)
            cv2.line(working_img, (0,miny-4), (self.imgWidth,miny-4), (255,255,255), 4) 

        cv2.imwrite('test.png', working_img)
        
        (staveblobs, otherblobs) = self.find_staveblobs(img=working_img)
        print("blobs %d/%d" % (len(staveblobs), len(otherblobs)))
        i = 0
#        for blob in otherblobs[112:113]:
        for blob in otherblobs:
            if blob['rect']['width'] < (self.imgWidth / 50):
                continue
#            if blob['rect']['width'] > (self.imgWidth / 2):
#                continue

            src = self.img
            mask = np.zeros((self.imgHeight,self.imgWidth,1), np.uint8)
            cv2.drawContours(mask, [blob['contour']], -1, (255,255,255), -1);
            inv = cv2.bitwise_not(src)
            dest = cv2.bitwise_and(inv,inv,mask = mask)
            dest = cv2.bitwise_not(dest)
            cropped = self.blob_image(dest, blob)

            gi = numpy_io.from_numpy(cropped)
            #sf = stafffinder_projections.StaffFinder_projections(gi)
            #sf = stafffinder_miyao.StaffFinder_miyao(gi)
            sf = stafffinder_dalitz.StaffFinder_dalitz(gi)
            sf.find_staves()
            staves = sf.get_skeleton()

            if (len(staves) > 0):
                maxlines = max(map(len, staves))
            else:
                maxlines = 0
            if maxlines >= 4:
                print("aha ossia with %d lines" % (maxlines,))
                ossias.append(blob)
        for ossia in ossias:
            if self.debug:
                fn = 'removed_%d.png' % i
                cv2.imwrite(fn, cropped)
                i = i + 1
            cv2.drawContours(ossia_mask, [ossia['contour']], -1, 0, -1)

        # erode a little to get rid of 'ghosting' around ossia
        kernel = np.ones((4,4),np.uint8)
        ossia_mask=cv2.erode(ossia_mask,kernel,iterations=4)
        
        #cv2.imwrite('posterode.png', mask)
        
        result = img.copy()
        inverted = cv2.bitwise_not(result)
        result = cv2.bitwise_or(inverted,inverted,mask=ossia_mask)
        result = cv2.bitwise_not(result)

        if self.debug:
            cv2.imwrite('debug.png', self.debug_img)

        self.img = result

    def split_movements(self, outfileA, outfileB):
        # 2% of page width
        indentThresh = 0.02 * self.imgWidth

        systems, blobs = self.find_systems()
        
        # Top - down order
        systems = sorted(systems, key=lambda system: system['rect']['y'])

        xs = []
        for system in systems:
            xs.append(system['stave_min_x'])
        threshold = min(xs) + indentThresh
        
        # Skip the first one, we don't split if the movement starts at top
        # of page
        found = None
        for i in range(0,len(systems)):
            #cv2.imwrite("system%d.png" %i, systems[i]['image'])
            if xs[i] > threshold:
                if found != None:
                    print "Oops, more than one movement found."
                found = i
                print("New movement at system %d" % (i+1))

        if (found == 0):
            self.save_systems(outfileA, systems)
        else:
            self.save_systems(outfileA, systems[:found])
            self.save_systems(outfileB, systems[found:])
        return(found)

    def save(self, outfile):
        cv2.imwrite(outfile, self.img)

    def save_systems(self, outfile, systems):
        print "saving %s" % outfile
        contours = []
        for system in systems:
            contours.append(system['contour'])

            for blob in self.blobs:
                if blob['parent'] == system:
                    contours.append(blob['contour'])

        mask = np.zeros((self.imgHeight,self.imgWidth,1), np.uint8)
        
        cv2.drawContours(mask, contours, -1, 255, -1);

        kernel = np.ones((4,4),np.uint8)
        mask=cv2.dilate(mask,kernel,iterations=1)
        
        inv = cv2.bitwise_not(self.img)
        dest = cv2.bitwise_and(inv,inv,mask = mask)
        dest = cv2.bitwise_not(dest)
        cv2.imwrite(outfile,dest)