-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgs_raw_replace.pl
More file actions
executable file
·129 lines (118 loc) · 3.47 KB
/
gs_raw_replace.pl
File metadata and controls
executable file
·129 lines (118 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env perl
# Ahthor: Zhao
# Date: 2012-05-04
# Purpose: 将gs导出文件的raw数据替换为另一个atv里面的raw数据
# Note: all_raw.txt 为原始数值对照的文件,导出时只选raw data,注释什么不要,
# 可以加快计算
use warnings;
use Data::Table;
use 5.010;
use Smart::Comments;
use Statistics::Basic qw/average/;
use threads;
use Thread::Semaphore;
$cpus = 10;
$semaphore = new Thread::Semaphore($cpus);
$all_file = 'all_raw.txt';
$name_file = 'name.txt';
@files = glob 'mRNA/*.txt lncRNA/*.txt';
say 'Replace raw data in:';
say join("\n",@files);
say "using coresponding data in $all_file";
$all_tbl = fromTSV($all_file);
$name_tbl = fromTSV($name_file);
$is_group = $name_tbl->nofCol == 2 ? 0 : 1;
foreach $file (@files) {
$semaphore->down();
$thread = async {
$tbl = fromTSV($file);
@header = $tbl->header;
$row_num = $tbl->nofRow - 1;
foreach $r ( 0 .. $row_num ) {
$match = $all_tbl->match_pattern('$_->[0] eq "'.$tbl->elm($r,$header[0]).'"');
unless ( $match->nofRow == 1 ) {
say "row number not correct for ",$tbl->elm($r,$header[0]);
}
%raw_val = ();
foreach $i ( 0 .. $name_tbl->nofRow - 1 ) {
if ($file =~ /sample/) {
if ($is_group == 1) {
$colID = join('','[',$name_tbl->elm($i,'Sample'),', ',$name_tbl->elm($i,'Group'),'](raw)');
}
else{
$colID = join('','[',$name_tbl->elm($i,'Sample'),'](raw)');
}
next if ( $tbl->colIndex($colID) == -1 );
$colID_all = join('',$name_tbl->elm($i,'Samples'),'(raw)');
$raw_val = $match->elm(0,$colID_all);
$tbl->setElm($r,$colID,$raw_val);
}
elsif ($file =~ /group/) {
$gn = $name_tbl->elm($i,'Group');
$colID = join('','[',$gn,'](raw)');
next if ( $tbl->colIndex($colID) == -1 );
$colID_all = join('',$name_tbl->elm($i,'Samples'),'(raw)');
push(@{$raw_val{$gn}}, $match->elm(0,$colID_all));
}
else {
die 'OMG!';
}
}
foreach $gn ( keys %raw_val ) {
$colID = join('','[',$gn,'](raw)');
$tbl->setElm($r,$colID,average($raw_val{$gn}) * 1);
}
if ( $r % 500 == 0 ) {
&bar($r,$row_num);
say $file;
}
}
outputTSV($tbl,$file.'.mo');
$file =~ s/\(/\\(/g;
$file =~ s/\)/\\)/g;
`grep \\# $file > $file.com`;
`cat $file.mo >> $file.com`;
`rm -f $file.mo`;
say "$file done";
$semaphore->up();
};
$thread->detach();
}
&waitquit;
sub waitquit {
my $num = 0;
while ( $num < $cpus ) {
$semaphore->down();
$num++;
}
$semaphore->up($cpus);
}
sub fromTSV{
my $file = $_[0] || die "File not declared!";
my %fileGuessOS = ( 0 => "UNIX", 1 => "DOS", 2 => "MAC" );
print "read $file in ",$fileGuessOS{Data::Table::fromFileGuessOS($file)}," format.\n";
return Data::Table::fromTSV($file,1,undef,
{OS=>Data::Table::fromFileGuessOS($file),
skip_pattern=>'^\s*#'});
}
sub outputTSV{
my ($table,$file,$header) = @_;
print "outputTSV() parameter ERROR!" unless defined $table;
$header = defined $header ? $header : 1;
if( defined $file ){
$table->tsv($header, {OS=>0, file=>$file});
}else{
print $table->tsv($header, {OS=>0, file=>undef});
}
return $table->tsv($header, {OS=>0, file=>undef});
}
sub bar {
local $| = 1;
my $i = $_[0] || return 0;
my $n = $_[1] || return 0;
print "\r["
. ( "#" x int( ( $i / $n ) * 50 ) )
. ( " " x ( 50 - int( ( $i / $n ) * 50 ) ) ) . "]";
printf( "%2.1f%%", $i / $n * 100 );
local $| = 0;
}