-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathccUniversalTopicDictionary.py
More file actions
99 lines (81 loc) · 3.78 KB
/
ccUniversalTopicDictionary.py
File metadata and controls
99 lines (81 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
'''
Created on 15 Nov 2011
@author: ag00087
'''
'''
Created on 15 Nov 2011
@author: ag00087
'''
# import libraries
import numpy as np
import argparse
import sys
from ash.cocluster.cocluster import coclust
# parse options
parser = OptionParser()
parser.add_option('-d','--dataset',action='store',type='string',dest='dataset',metavar='dataset',help='visual dataset')
parser.add_option('-w','--nCodeword',action='store',type='int',dest='nCodeword',default=1024,metavar='nCodeword',help='number of code words: 1024,2048,4096,8192,16384')
parser.add_option('-t','--nTopic',action='store',type='int',dest='nTopic',metavar='nTopic',default=128,help='number of topic words')
parser.add_option('-s','--scheme',action='store',type='string',dest='ccType',metavar='ccType',default='e',help='ccType')
parser.add_option('-q','--quiet',action='store_false',dest='verbose',default=True)
parser.add_option('-x','--nFoldXVal',type='int',metavar='nFold',dest='nFold',default=10,help='number of stratified cross validation iterations')
parser.add_option('-p','--figformat',type='string',metavar='figfmt',dest='figfmt',default='pdf',help='type of output graph image, png, svg, jpg')
parser.add_option('-r','--rowClusters',type='int',metavar='nRowCluster',dest='nRowCluster',default=10,help='number of row clusters')
parser.add_option('-b','--beta',type='float',metavar='beta',dest='beta',default=1.0,help='f-beta value, default is 1.0')
parser.add_option('-n','--neighbors',type='int',metavar='neighbors',dest='neighbors',default=10,help='number of neighbours for knn classifier')
parser.add_option('-k','--kernelType',type='string',metavar='kernelType',dest='kernelType',default='rbf',help='svm kernel type')
#configure data paths
rootDir = '/vol/vssp/diplecs/ash/Data/'
imgWrdDir = '/ImgWrdMat/'
ucbDir = '/UniversalCB/'
utdDir = '/UniversalTopicDictionary/'
# global variables
catidfname = 'catidlist.txt'
ucbext = '.ucb'
imgWrdext = '.iwm'
utdext = '.utd'
def getCatMap(dataset):
catidfpath = rootDir+dataset+'/'+catidfname
catnames = np.genfromtxt(catidfpath,delimiter=',',dtype='|S32',usecols=[0])
catnum = np.genfromtxt(catidfpath,delimiter=',',dtype=np.int,usecols=[1])
catmap = dict(zip(catnames,catnum))
return catmap
def ccUniversalTopicDictionary():
#acquire program agruments
(options, args) = parser.parse_args(sys.argv[1:]) #@UnusedVariable
dataset = options.dataset
nRowCluster = options.nRowCluster
nTopic = options.nTopic
ccType = options.ccType
kernelType = options.kernelType
nFold = options.nFold
nCodeword = options.nCodeword
beta = options.beta
#echo arguments
if(options.verbose):
print dataset,nRowCluster,nTopic,ccType,kernelType,beta,nFold,nCodeword
print options
#configure data path and other parameters
dataPath = rootDir+dataset+imgWrdDir
resultPath = rootDir+dataset+utdDir+dataset
catmap = getCatMap(dataset)
catList = catmap.keys()
dataext = str(nCodeword)+imgWrdext
resultext = str(nCodeword)+utdext
# initialise empty iwm matrix and append each category to it
iwmData = np.empty()
for catName in catList:
iwmFileName = dataPath+catName+dataext
iwmCatData = np.loadtxt(iwmFileName,dtype=np.int16,delimiter=' ')
#stack the category data to the existing data-set data
iwmData = np.concatenate((iwmData,iwmCatData),axis=0)
pass
if(options.verbose): print 'co-clustering...'
ccData = coclust(iwmData, dataset, nRowCluster, nTopic, ccType)
# the indices of co-clusters columns
ccCol = np.array([int(i) for i in ccData[1].split()])
resultFileName = resultPath+resultext
np.savetxt(resultFileName, ccCol, fmt=np.int, delimiter=' ')
if __name__ == '__main__':
ccUniversalTopicDictionary()
pass