#!/usr/bin/env python33
# -*- coding: utf-8 -*-

"""
USAGE: python log2any.py [template name] [ARG0~9]
    -template name : refer log2any_template.py
    -ARG0~9 : system stdin , default empty string

DESCRIPTION: Convert log file to any format you like.

"""

import sys
import re
import optparse
import io

import log2any_patterns
import log2any_template


#ENCODE = 'latin-1'
#ENCODE = 'cp949'
#ENCODE = 'euc_kr'
#ENCODE = 'iso2022_jp_2'
#ENCODE = 'iso2022_kr'
#ENCODE = 'iso8859_16'
#ENCODE = 'big5'
ENCODE = 'utf-8'


def createPattern(match):
    value = match.group()
    value = value[1:-1]
    #print(log2any_patterns.patterns_dict[value])
    #if log2any_patterns.patterns_dict[value] == log2any_patterns.attribute:
        #log2any_patterns.patterns_dict[value] = attribute
    return log2any_patterns.patterns_dict[value]


def replaceSlash(value):
    returnList = []
    for x in value:
        #print(x)
        if x:
            # if x[-1] == '\\':
            #     reSlash = x[:-1] + '\\\\'
            #     returnList.append(reSlash)
            # else:
            #     returnList.append(x)
            returnList.append(x.replace('\\', '\\\\'))
        else:
            returnList.append(x)
    return returnList


def changePatternAttribute(attribute):
    for pattern in log2any_patterns.patterns_dict:
        if log2any_patterns.patterns_dict[pattern] == log2any_patterns.attribute:
            log2any_patterns.patterns_dict[pattern] = '(' + r':[-0-9 ]+' * attribute + ')'
            log2any_patterns.attribute = '(' + r':[-0-9 ]+' * attribute + ')'
    return


def totalAttribute(data, template):
    #data = sys.stdin.readlines()
    attr = {}
    #templateSplit = template.split(',')
    #if '{__item_property__}' in templateSplit
    #for x in templateSplit:
    #print(template)

    templateList = []
    for x in template:
        if '{__item_property__}' in x['textFrom']:
            textSplit = x['textFrom'].split(',')
            item_property = ''
            for y in textSplit:
                if '{__item_property__}' in y:
                    item_property = y.count(':')
            templateList.append([x['logType'], item_property])
    
    for log in data:
        for x in templateList:
            if x[0] in log:
                top = ''
                for y in log.split(','):
                    if y.count(':') > top.count(':'):
                        top = y
                colonCount =  top.count(':') - x[1]
                #print(colonCount)
                if colonCount in attr:
                    attr[colonCount] += 1
                else:
                    attr[colonCount] = 1
        #print(log)
        #top = ''
        #for x in log.split(','):
        #    if x.count(':') > top.count(':'):
        #        top = x
        #print(top)
    #print(attr)
    reValues = sorted(attr.items() ,key=lambda d: d[1], reverse=True)
    #print(reValues)
    if len(reValues) == 0:
        return 0
    else:
        #return reValues[0][0]
        return reValues

def scoreAttribute(data, AttributeList, templateName):
    score = {}
    #print(template)
    if not AttributeList:
        return 0
    for attr in AttributeList:
        changePatternAttribute(attr[0])
        template = log2any_template.TemplateDict[templateName]
        score = {}
        reobj = {}
        reLogType = []
        for temp in template:
            redict = {}
            textFrom = temp['textFrom']
            PatternRe = re.compile(r"{__\w+__}")
            PatternKeys = PatternRe.findall(textFrom)
            PatternKeys = list(map(lambda x:x[1:-1] , PatternKeys))
            LogPattern = PatternRe.sub(createPattern, textFrom)
            LogPattern = r'^' + LogPattern + r'$'
            LogRe = re.compile(LogPattern)
            redict['LogRe'] = LogRe
            redict['PatternKeys'] = PatternKeys
            redict['textTo'] = temp['textTo']
            reobj[temp['logType']] = redict
            reLogType.append(temp['logType'])
        for line in data[:100]:
            reTemplate = None
            for LogType in reLogType:
                GrepMatch = line.find(LogType)
                if GrepMatch is not -1:
                    reTemplate = reobj[LogType]
                    break
            if reTemplate is None:
                continue
            LogMatch = reTemplate['LogRe'].match(line)
            if LogMatch:
                if attr[0] in score:
                    score[attr[0]] += 1
                else:
                    score[attr[0]] = 1
                #print(LogMatch)
    reValues = sorted(score.items() ,key=lambda d: d[1], reverse=True)
    #print(reValues)
    if len(reValues) == 0:
        return 0
    else:
        return reValues[0][0]

"""
def testPattern(sysargv):
    TemplateDict = log2any_template.TemplateDict
    template = sysargv[2]
    PatternRe = re.compile(r"{__\w+__}")
    textFrom = TemplateDict[template]['textFrom']
    PatternKeys = PatternRe.findall(textFrom)
    PatternKeys = map(lambda x:x[1:-1] , PatternKeys)
    LogPattern = PatternRe.sub(createPattern, textFrom)
    LogRe = re.compile(LogPattern)
    textTo = TemplateDict[template]['textTo']
    LogType = TemplateDict[template]['logType']
    data = sys.stdin.readlines()
    line = data[0]
    line = line[:-1] # remove \n
    LogMatch = LogRe.match(line)
    if LogMatch:
        print "-- match --"
        LogValue = LogMatch.groups()
        tran = dict(zip(PatternKeys, LogValue))
        print textTo.format(**tran)
    else:
        LineCount = line.count(',')
        TextFromCount = textFrom.count(',')
        print "data count: %d"%LineCount
        print "pattern count: %d\n"%TextFromCount
        if LineCount == TextFromCount:
            LineSplit = line.split(',')
            TextFromSplit = textFrom.split(',')
            PatternsDict = log2any_patterns.patterns_dict
            flag = 0
            for x,y in zip(LineSplit, TextFromSplit):
                print "%s"%x
                print "%s"%y
                count = y.count('$')
                end = flag + count
                ReList = []
                for key in PatternKeys[flag:end]:
                    ReList.append(PatternsDict[key])
                print ReList
                flag = end
            print line.split(',')
            print textFrom.split(',')

    sys.exit()
"""

def convertLog(log):
    if not log:
        return
    #print(log.encode('big5'))
    #print(log.encode('utf-8'))
    #log = log.encode('utf-8')
    #log = log.encode('utf-8').decode('utf-8')

    #TemplateDict = log2any_template.TemplateDict
    #if template not in TemplateDict:
    #    print("Not find this template")
    #    return
    #input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
    SysEnv = {}
    for x in range(10):
        SysEnv['ARG' + str(x)] = '1'
    if len(sys.argv) > 2:
        ArgvLen = len(sys.argv)
        for EnvIndex, ArgIndex in enumerate(range(2, ArgvLen)):
            try:
                SysEnv['ARG' + str(EnvIndex)] = sys.argv[ArgIndex]
            except IndexError:
                SysEnv['ARG' + str(EnvIndex)] = ''
    SysEnv = log2any_template.sysenv_key_update(SysEnv)
    #PatternRe = re.compile(r"{__\w+__}")
    PatternRe = re.compile(r"{\w+}")
    #input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
    TemplateDict = log2any_template.TemplateDict
    template = 'log2spark'
    TemplateList = TemplateDict[template]
    reobj = {}
    reLogType = []
    for temp in TemplateList:
        redict = {}
        textFrom = temp['textFrom']
        PatternKeys = PatternRe.findall(textFrom)
        PatternKeys = list(map(lambda x:x[1:-1] , PatternKeys))
        LogPattern = PatternRe.sub(createPattern, textFrom)
        LogPattern = r'^' + LogPattern + r'$'
        LogRe = re.compile(LogPattern)
        redict['LogRe'] = LogRe
        redict['PatternKeys'] = PatternKeys
        redict['textTo'] = temp['textTo']
        reobj[temp['logType']] = redict
        reLogType.append(temp['logType'])
    #reLogType = 
    # LogstashData = []
    #MergeFlag = -2
    #data = sys.stdin.readlines()
    #NoMatch = ''
    NoMatchPrefix = '==NoMatch=='
    #NoTemplate = ''
    NoTemplatePrefix = '==NoTemplate=='
    reTemplate = None
    for LogType in reLogType:
        GrepMatch = log.find(LogType)
        if GrepMatch is not -1:
            reTemplate = reobj[LogType]
    if reTemplate is None:
        #sys.stderr.write(NoTemplatePrefix + log)
        return ()
    LogMatch = reTemplate['LogRe'].match(log)
    #print('LogMatch:',LogMatch.groups())
    if LogMatch:
        LogValue = LogMatch.groups()
        #LogDict = [m.groupdict() for m in reTemplate['LogRe'].finditer(log)]
        #print(LogDict)
        #print(dir(LogMatch))
        #print(LogValue)
        tran = dict(zip(reTemplate['PatternKeys'], LogValue))
        #print(tran)
        return tran
        #return LogValue
        '''
        #LogValue = [x.encode('utf-8') for x in LogValue]
        #l = [isinstance(x, unicode) for x in LogValue]
        #print(l)

        LogValue = replaceSlash(LogValue)
        #LogValue = LogValue.encode('utf-8')
        tran = dict(zip(reTemplate['PatternKeys'], LogValue))
        tran.update(SysEnv)
        #print(reTemplate['textTo'].format(**tran))
        #print(tran)
        #return tran
        return reTemplate['textTo'].format(**tran)
        # LogstashData.append(tran)
        '''
    else:
        return ()
        #sys.stderr.write(NoMatchPrefix + log)
    # sys.stderr.write('---------NO_MATCH---------\n')
    #sys.stderr.write(NoMatch)
    # sys.stderr.write('---------NO_TEMPLATE---------\n')
    #sys.stderr.write(NoTemplate)


    
    
if __name__ == "__main__":
    operation = optparse.OptionParser()
    operation.add_option('-t', '--total', dest='total_attribute', action='store_true', default=False, help='sum of attribute')
    (opt, argv) = operation.parse_args()
    #print (opt, argv)
    if sys.stdin.isatty():
        #print "No input data"
        print("No input data")
        sys.exit(0)
    #if len(sys.argv) > 1:
    #    template = sys.argv[1]
    #else:
        #print "Please input template "
    #    print("Please input template ")
    #    sys.exit()
    if argv:
        template = argv[0]
    else:
        #print "Please input template "
        print("Please input template ")
        sys.exit()
    TemplateDict = log2any_template.TemplateDict
    #if not TemplateDict.has_key(template):
    if template not in TemplateDict:
        #print "Not find this template"
        print("Not find this template")
        sys.exit()
    if opt.total_attribute:
        #data = sys.stdin.readlines()
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding=ENCODE)
        data = input_stream.readlines()
        AttributeList = totalAttribute(data, TemplateDict[template])
        attribute = scoreAttribute(data, AttributeList, template)
        print(attribute)
        sys.exit()
    if template == '-t':
        testPattern(sys.argv)
        sys.exit()

    SysEnv = {}
    for x in range(10):
        SysEnv['ARG' + str(x)] = ''
    if len(sys.argv) > 2:
        ArgvLen = len(sys.argv)
        for EnvIndex, ArgIndex in enumerate(range(2, ArgvLen)):
            try:
                SysEnv['ARG' + str(EnvIndex)] = sys.argv[ArgIndex]
            except IndexError:
                SysEnv['ARG' + str(EnvIndex)] = ''
    SysEnv = log2any_template.sysenv_key_update(SysEnv)
    PatternRe = re.compile(r"{__\w+__}")
    input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding=ENCODE, errors='ignore')
    #input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding=ENCODE, errors='replace')
    data = input_stream.readlines()
    '''
    print(dir(input_stream))
    while 1:
      
        line = input_stream.next()
        try:
            pass
            #line = input_stream.readline()
            #if not line:
            #    break
        except:
            sys.stderr.write('==EncodeError==\n')
            line = 'aaa\n'
        if not line:
            break
        data.append(line)
    '''
    #for x in range(len(input_stream)):
    #try:
    #    data = input_stream.readlines()
    #except:
    #    print('encoding error')
    if log2any_patterns.automatic_calculation:
        AttributeList = totalAttribute(data, TemplateDict[template])
        attribute = scoreAttribute(data, AttributeList, template)
        changePatternAttribute(attribute)
    DataCount = len(data)
  
    TemplateList = TemplateDict[template]
    reobj = {}
    reLogType = []
    for temp in TemplateList:
        redict = {}
        textFrom = temp['textFrom']
        PatternKeys = PatternRe.findall(textFrom)
        #import itertools
        PatternKeys = list(map(lambda x:x[1:-1] , PatternKeys))
        #PatternKeys = itertools.starmap(lambda x:x[1:-1] , PatternKeys)
        LogPattern = PatternRe.sub(createPattern, textFrom)
        LogPattern = r'^' + LogPattern + r'$'
        #print the complate pattern
        #print(LogPattern)        
        LogRe = re.compile(LogPattern)
        redict['LogRe'] = LogRe
        redict['PatternKeys'] = PatternKeys
        redict['textTo'] = temp['textTo']
        reobj[temp['logType']] = redict
        reLogType.append(temp['logType'])

    # LogstashData = []
    MergeFlag = -2
    #data = sys.stdin.readlines()
    NoMatch = ''
    NoMatchPrefix = '==NoMatch=='
    NoTemplate = ''
    NoTemplatePrefix = '==NoTemplate=='
    for index, line in enumerate(data):
        if index == MergeFlag:
            continue
        reTemplate = None
        for LogType in reLogType:
            GrepMatch = line.find(LogType)
            if GrepMatch is not -1:
                reTemplate = reobj[LogType]
                break
        if reTemplate is None:
            NoTemplate = NoTemplate + NoTemplatePrefix + line
            continue
        LogMatch = reTemplate['LogRe'].match(line)
        if LogMatch:
            LogValue = LogMatch.groups()
            LogValue = replaceSlash(LogValue)
            tran = dict(zip(reTemplate['PatternKeys'], LogValue))
            tran.update(SysEnv)
            print(reTemplate['textTo'].format(**tran))
            # LogstashData.append(tran)
        else:
            if index + 1 >= DataCount:
                NoMatch = NoMatch + NoMatchPrefix + line
                continue
            else:
                MergeLine = line.replace('\n', '') + data[index + 1]
            reLogMatch = reTemplate['LogRe'].match(MergeLine)
            if reLogMatch:
                NextMatch = reTemplate['LogRe'].match(data[index + 1])
                if NextMatch:
                    NoMatch = NoMatch + NoMatchPrefix + line
                else:
                    LogValue = reLogMatch.groups()
                    LogValue = replaceSlash(LogValue)
                    tran = dict(zip(reTemplate['PatternKeys'], LogValue))
                    tran.update(SysEnv)
                    #print reTemplate['textTo'].format(**tran)
                    print(reTemplate['textTo'].format(**tran))
                    MergeFlag = index + 1
                    # LogstashData.append(tran)
            else:
                NoMatch = NoMatch + NoMatchPrefix + line
    # sys.stderr.write('---------NO_MATCH---------\n')
    sys.stderr.write(NoMatch)
    # sys.stderr.write('---------NO_TEMPLATE---------\n')
    sys.stderr.write(NoTemplate)
    # for x in LogstashData:
    #     f.write("%s\n"%x)
    # f.close()
    #print sys.stdout.encoding
