From d5883c6bf619c554b29b3a791ddecba9629b5f78 Mon Sep 17 00:00:00 2001 From: Usr-cn-jfsanchez Date: Mon, 4 Sep 2017 17:42:33 +0200 Subject: [PATCH 1/3] Implemented threads in Extension step --- Cwd.pm | 855 +++++++++++++++++++++++++++ Parallel/ForkManager.pm | 416 +++++++++++++ Parallel/ForkManager/callback.pl | 48 ++ Parallel/ForkManager/parallel_get.pl | 17 + bin/ExtendOrFormatContigs.pl | 182 ++++-- 5 files changed, 1463 insertions(+), 55 deletions(-) create mode 100644 Cwd.pm create mode 100644 Parallel/ForkManager.pm create mode 100755 Parallel/ForkManager/callback.pl create mode 100755 Parallel/ForkManager/parallel_get.pl diff --git a/Cwd.pm b/Cwd.pm new file mode 100644 index 0000000..f27a3a0 --- /dev/null +++ b/Cwd.pm @@ -0,0 +1,855 @@ +package Cwd; + +=head1 NAME + +Cwd - get pathname of current working directory + +=head1 SYNOPSIS + + use Cwd; + my $dir = getcwd; + + use Cwd 'abs_path'; + my $abs_path = abs_path($file); + +=head1 DESCRIPTION + +This module provides functions for determining the pathname of the +current working directory. It is recommended that getcwd (or another +*cwd() function) be used in I code to ensure portability. + +By default, it exports the functions cwd(), getcwd(), fastcwd(), and +fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace. + + +=head2 getcwd and friends + +Each of these functions are called without arguments and return the +absolute path of the current working directory. + +=over 4 + +=item getcwd + + my $cwd = getcwd(); + +Returns the current working directory. + +Exposes the POSIX function getcwd(3) or re-implements it if it's not +available. + +=item cwd + + my $cwd = cwd(); + +The cwd() is the most natural form for the current architecture. For +most systems it is identical to `pwd` (but without the trailing line +terminator). + +=item fastcwd + + my $cwd = fastcwd(); + +A more dangerous version of getcwd(), but potentially faster. + +It might conceivably chdir() you out of a directory that it can't +chdir() you back into. If fastcwd encounters a problem it will return +undef but will probably leave you in a different directory. For a +measure of extra security, if everything appears to have worked, the +fastcwd() function will check that it leaves you in the same directory +that it started in. If it has changed it will C with the message +"Unstable directory path, current directory changed +unexpectedly". That should never happen. + +=item fastgetcwd + + my $cwd = fastgetcwd(); + +The fastgetcwd() function is provided as a synonym for cwd(). + +=item getdcwd + + my $cwd = getdcwd(); + my $cwd = getdcwd('C:'); + +The getdcwd() function is also provided on Win32 to get the current working +directory on the specified drive, since Windows maintains a separate current +working directory for each drive. If no drive is specified then the current +drive is assumed. + +This function simply calls the Microsoft C library _getdcwd() function. + +=back + + +=head2 abs_path and friends + +These functions are exported only on request. They each take a single +argument and return the absolute pathname for it. If no argument is +given they'll use the current working directory. + +=over 4 + +=item abs_path + + my $abs_path = abs_path($file); + +Uses the same algorithm as getcwd(). Symbolic links and relative-path +components ("." and "..") are resolved to return the canonical +pathname, just like realpath(3). + +=item realpath + + my $abs_path = realpath($file); + +A synonym for abs_path(). + +=item fast_abs_path + + my $abs_path = fast_abs_path($file); + +A more dangerous, but potentially faster version of abs_path. + +=back + +=head2 $ENV{PWD} + +If you ask to override your chdir() built-in function, + + use Cwd qw(chdir); + +then your PWD environment variable will be kept up to date. Note that +it will only be kept up to date if all packages which use chdir import +it from Cwd. + + +=head1 NOTES + +=over 4 + +=item * + +Since the path separators are different on some operating systems ('/' +on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec +modules wherever portability is a concern. + +=item * + +Actually, on Mac OS, the C, C and C +functions are all aliases for the C function, which, on Mac OS, +calls `pwd`. Likewise, the C function is an alias for +C. + +=back + +=head1 AUTHOR + +Originally by the perl5-porters. + +Maintained by Ken Williams + +=head1 COPYRIGHT + +Copyright (c) 2004 by the Perl 5 Porters. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +Portions of the C code in this library are copyright (c) 1994 by the +Regents of the University of California. All rights reserved. The +license on this code is compatible with the licensing of the rest of +the distribution - please see the source code in F for the +details. + +=head1 SEE ALSO + +L + +=cut + +use strict; +use Exporter; +use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); + +$VERSION = '3.47'; +my $xs_version = $VERSION; +$VERSION =~ tr/_//; + +@ISA = qw/ Exporter /; +@EXPORT = qw(cwd getcwd fastcwd fastgetcwd); +push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32'; +@EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath); + +# sys_cwd may keep the builtin command + +# All the functionality of this module may provided by builtins, +# there is no sense to process the rest of the file. +# The best choice may be to have this in BEGIN, but how to return from BEGIN? + +if ($^O eq 'os2') { + local $^W = 0; + + *cwd = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd; + *getcwd = \&cwd; + *fastgetcwd = \&cwd; + *fastcwd = \&cwd; + + *fast_abs_path = \&sys_abspath if defined &sys_abspath; + *abs_path = \&fast_abs_path; + *realpath = \&fast_abs_path; + *fast_realpath = \&fast_abs_path; + + return 1; +} + +# Need to look up the feature settings on VMS. The preferred way is to use the +# VMS::Feature module, but that may not be available to dual life modules. + +my $use_vms_feature; +BEGIN { + if ($^O eq 'VMS') { + if (eval { local $SIG{__DIE__}; require VMS::Feature; }) { + $use_vms_feature = 1; + } + } +} + +# Need to look up the UNIX report mode. This may become a dynamic mode +# in the future. +sub _vms_unix_rpt { + my $unix_rpt; + if ($use_vms_feature) { + $unix_rpt = VMS::Feature::current("filename_unix_report"); + } else { + my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; + $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; + } + return $unix_rpt; +} + +# Need to look up the EFS character set mode. This may become a dynamic +# mode in the future. +sub _vms_efs { + my $efs; + if ($use_vms_feature) { + $efs = VMS::Feature::current("efs_charset"); + } else { + my $env_efs = $ENV{'DECC$EFS_CHARSET'} || ''; + $efs = $env_efs =~ /^[ET1]/i; + } + return $efs; +} + + +# If loading the XS stuff doesn't work, we can fall back to pure perl +unless (defined &getcwd) { + eval { + if ( $] >= 5.006 ) { + require XSLoader; + XSLoader::load( __PACKAGE__, $xs_version); + } else { + require DynaLoader; + push @ISA, 'DynaLoader'; + __PACKAGE__->bootstrap( $xs_version ); + } + }; +} + +# Big nasty table of function aliases +my %METHOD_MAP = + ( + VMS => + { + cwd => '_vms_cwd', + getcwd => '_vms_cwd', + fastcwd => '_vms_cwd', + fastgetcwd => '_vms_cwd', + abs_path => '_vms_abs_path', + fast_abs_path => '_vms_abs_path', + }, + + MSWin32 => + { + # We assume that &_NT_cwd is defined as an XSUB or in the core. + cwd => '_NT_cwd', + getcwd => '_NT_cwd', + fastcwd => '_NT_cwd', + fastgetcwd => '_NT_cwd', + abs_path => 'fast_abs_path', + realpath => 'fast_abs_path', + }, + + dos => + { + cwd => '_dos_cwd', + getcwd => '_dos_cwd', + fastgetcwd => '_dos_cwd', + fastcwd => '_dos_cwd', + abs_path => 'fast_abs_path', + }, + + # QNX4. QNX6 has a $os of 'nto'. + qnx => + { + cwd => '_qnx_cwd', + getcwd => '_qnx_cwd', + fastgetcwd => '_qnx_cwd', + fastcwd => '_qnx_cwd', + abs_path => '_qnx_abs_path', + fast_abs_path => '_qnx_abs_path', + }, + + cygwin => + { + getcwd => 'cwd', + fastgetcwd => 'cwd', + fastcwd => 'cwd', + abs_path => 'fast_abs_path', + realpath => 'fast_abs_path', + }, + + epoc => + { + cwd => '_epoc_cwd', + getcwd => '_epoc_cwd', + fastgetcwd => '_epoc_cwd', + fastcwd => '_epoc_cwd', + abs_path => 'fast_abs_path', + }, + + MacOS => + { + getcwd => 'cwd', + fastgetcwd => 'cwd', + fastcwd => 'cwd', + abs_path => 'fast_abs_path', + }, + ); + +$METHOD_MAP{NT} = $METHOD_MAP{MSWin32}; + + +# Find the pwd command in the expected locations. We assume these +# are safe. This prevents _backtick_pwd() consulting $ENV{PATH} +# so everything works under taint mode. +my $pwd_cmd; +foreach my $try ('/bin/pwd', + '/usr/bin/pwd', + '/QOpenSys/bin/pwd', # OS/400 PASE. + ) { + + if( -x $try ) { + $pwd_cmd = $try; + last; + } +} + +# Android has a built-in pwd. Using $pwd_cmd will DTRT if +# this perl was compiled with -Dd_useshellcmds, which is the +# default for Android, but the block below is needed for the +# miniperl running on the host when cross-compiling, and +# potentially for native builds with -Ud_useshellcmds. +if ($^O =~ /android/) { + # If targetsh is executable, then we're either a full + # perl, or a miniperl for a native build. + if (-x $Config::Config{targetsh}) { + $pwd_cmd = "$Config::Config{targetsh} -c pwd" + } + else { + $pwd_cmd = "$Config::Config{sh} -c pwd" + } +} + +my $found_pwd_cmd = defined($pwd_cmd); +unless ($pwd_cmd) { + # Isn't this wrong? _backtick_pwd() will fail if someone has + # pwd in their path but it is not /bin/pwd or /usr/bin/pwd? + # See [perl #16774]. --jhi + $pwd_cmd = 'pwd'; +} + +# Lazy-load Carp +sub _carp { require Carp; Carp::carp(@_) } +sub _croak { require Carp; Carp::croak(@_) } + +# The 'natural and safe form' for UNIX (pwd may be setuid root) +sub _backtick_pwd { + # Localize %ENV entries in a way that won't create new hash keys + my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV); + local @ENV{@localize}; + + my $cwd = `$pwd_cmd`; + # Belt-and-suspenders in case someone said "undef $/". + local $/ = "\n"; + # `pwd` may fail e.g. if the disk is full + chomp($cwd) if defined $cwd; + $cwd; +} + +# Since some ports may predefine cwd internally (e.g., NT) +# we take care not to override an existing definition for cwd(). + +unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) { + # The pwd command is not available in some chroot(2)'ed environments + my $sep = $Config::Config{path_sep} || ':'; + my $os = $^O; # Protect $^O from tainting + + + # Try again to find a pwd, this time searching the whole PATH. + if (defined $ENV{PATH} and $os ne 'MSWin32') { # no pwd on Windows + my @candidates = split($sep, $ENV{PATH}); + while (!$found_pwd_cmd and @candidates) { + my $candidate = shift @candidates; + $found_pwd_cmd = 1 if -x "$candidate/pwd"; + } + } + + # MacOS has some special magic to make `pwd` work. + if( $os eq 'MacOS' || $found_pwd_cmd ) + { + *cwd = \&_backtick_pwd; + } + else { + *cwd = \&getcwd; + } +} + +if ($^O eq 'cygwin') { + # We need to make sure cwd() is called with no args, because it's + # got an arg-less prototype and will die if args are present. + local $^W = 0; + my $orig_cwd = \&cwd; + *cwd = sub { &$orig_cwd() } +} + + +# set a reasonable (and very safe) default for fastgetcwd, in case it +# isn't redefined later (20001212 rspier) +*fastgetcwd = \&cwd; + +# A non-XS version of getcwd() - also used to bootstrap the perl build +# process, when miniperl is running and no XS loading happens. +sub _perl_getcwd +{ + abs_path('.'); +} + +# By John Bazik +# +# Usage: $cwd = &fastcwd; +# +# This is a faster version of getcwd. It's also more dangerous because +# you might chdir out of a directory that you can't chdir back into. + +sub fastcwd_ { + my($odev, $oino, $cdev, $cino, $tdev, $tino); + my(@path, $path); + local(*DIR); + + my($orig_cdev, $orig_cino) = stat('.'); + ($cdev, $cino) = ($orig_cdev, $orig_cino); + for (;;) { + my $direntry; + ($odev, $oino) = ($cdev, $cino); + CORE::chdir('..') || return undef; + ($cdev, $cino) = stat('.'); + last if $odev == $cdev && $oino == $cino; + opendir(DIR, '.') || return undef; + for (;;) { + $direntry = readdir(DIR); + last unless defined $direntry; + next if $direntry eq '.'; + next if $direntry eq '..'; + + ($tdev, $tino) = lstat($direntry); + last unless $tdev != $odev || $tino != $oino; + } + closedir(DIR); + return undef unless defined $direntry; # should never happen + unshift(@path, $direntry); + } + $path = '/' . join('/', @path); + if ($^O eq 'apollo') { $path = "/".$path; } + # At this point $path may be tainted (if tainting) and chdir would fail. + # Untaint it then check that we landed where we started. + $path =~ /^(.*)\z/s # untaint + && CORE::chdir($1) or return undef; + ($cdev, $cino) = stat('.'); + die "Unstable directory path, current directory changed unexpectedly" + if $cdev != $orig_cdev || $cino != $orig_cino; + $path; +} +if (not defined &fastcwd) { *fastcwd = \&fastcwd_ } + + +# Keeps track of current working directory in PWD environment var +# Usage: +# use Cwd 'chdir'; +# chdir $newdir; + +my $chdir_init = 0; + +sub chdir_init { + if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') { + my($dd,$di) = stat('.'); + my($pd,$pi) = stat($ENV{'PWD'}); + if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) { + $ENV{'PWD'} = cwd(); + } + } + else { + my $wd = cwd(); + $wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32'; + $ENV{'PWD'} = $wd; + } + # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar) + if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) { + my($pd,$pi) = stat($2); + my($dd,$di) = stat($1); + if (defined $pd and defined $dd and $di == $pi and $dd == $pd) { + $ENV{'PWD'}="$2$3"; + } + } + $chdir_init = 1; +} + +sub chdir { + my $newdir = @_ ? shift : ''; # allow for no arg (chdir to HOME dir) + $newdir =~ s|///*|/|g unless $^O eq 'MSWin32'; + chdir_init() unless $chdir_init; + my $newpwd; + if ($^O eq 'MSWin32') { + # get the full path name *before* the chdir() + $newpwd = Win32::GetFullPathName($newdir); + } + + return 0 unless CORE::chdir $newdir; + + if ($^O eq 'VMS') { + return $ENV{'PWD'} = $ENV{'DEFAULT'} + } + elsif ($^O eq 'MacOS') { + return $ENV{'PWD'} = cwd(); + } + elsif ($^O eq 'MSWin32') { + $ENV{'PWD'} = $newpwd; + return 1; + } + + if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in + $ENV{'PWD'} = cwd(); + } elsif ($newdir =~ m#^/#s) { + $ENV{'PWD'} = $newdir; + } else { + my @curdir = split(m#/#,$ENV{'PWD'}); + @curdir = ('') unless @curdir; + my $component; + foreach $component (split(m#/#, $newdir)) { + next if $component eq '.'; + pop(@curdir),next if $component eq '..'; + push(@curdir,$component); + } + $ENV{'PWD'} = join('/',@curdir) || '/'; + } + 1; +} + + +sub _perl_abs_path +{ + my $start = @_ ? shift : '.'; + my($dotdots, $cwd, @pst, @cst, $dir, @tst); + + unless (@cst = stat( $start )) + { + _carp("stat($start): $!"); + return ''; + } + + unless (-d _) { + # Make sure we can be invoked on plain files, not just directories. + # NOTE that this routine assumes that '/' is the only directory separator. + + my ($dir, $file) = $start =~ m{^(.*)/(.+)$} + or return cwd() . '/' . $start; + + # Can't use "-l _" here, because the previous stat was a stat(), not an lstat(). + if (-l $start) { + my $link_target = readlink($start); + die "Can't resolve link $start: $!" unless defined $link_target; + + require File::Spec; + $link_target = $dir . '/' . $link_target + unless File::Spec->file_name_is_absolute($link_target); + + return abs_path($link_target); + } + + return $dir ? abs_path($dir) . "/$file" : "/$file"; + } + + $cwd = ''; + $dotdots = $start; + do + { + $dotdots .= '/..'; + @pst = @cst; + local *PARENT; + unless (opendir(PARENT, $dotdots)) + { + # probably a permissions issue. Try the native command. + require File::Spec; + return File::Spec->rel2abs( $start, _backtick_pwd() ); + } + unless (@cst = stat($dotdots)) + { + _carp("stat($dotdots): $!"); + closedir(PARENT); + return ''; + } + if ($pst[0] == $cst[0] && $pst[1] == $cst[1]) + { + $dir = undef; + } + else + { + do + { + unless (defined ($dir = readdir(PARENT))) + { + _carp("readdir($dotdots): $!"); + closedir(PARENT); + return ''; + } + $tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir")) + } + while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] || + $tst[1] != $pst[1]); + } + $cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ; + closedir(PARENT); + } while (defined $dir); + chop($cwd) unless $cwd eq '/'; # drop the trailing / + $cwd; +} + + +my $Curdir; +sub fast_abs_path { + local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage + my $cwd = getcwd(); + require File::Spec; + my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir); + + # Detaint else we'll explode in taint mode. This is safe because + # we're not doing anything dangerous with it. + ($path) = $path =~ /(.*)/s; + ($cwd) = $cwd =~ /(.*)/s; + + unless (-e $path) { + _croak("$path: No such file or directory"); + } + + unless (-d _) { + # Make sure we can be invoked on plain files, not just directories. + + my ($vol, $dir, $file) = File::Spec->splitpath($path); + return File::Spec->catfile($cwd, $path) unless length $dir; + + if (-l $path) { + my $link_target = readlink($path); + die "Can't resolve link $path: $!" unless defined $link_target; + + $link_target = File::Spec->catpath($vol, $dir, $link_target) + unless File::Spec->file_name_is_absolute($link_target); + + return fast_abs_path($link_target); + } + + return $dir eq File::Spec->rootdir + ? File::Spec->catpath($vol, $dir, $file) + : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file; + } + + if (!CORE::chdir($path)) { + _croak("Cannot chdir to $path: $!"); + } + my $realpath = getcwd(); + if (! ((-d $cwd) && (CORE::chdir($cwd)))) { + _croak("Cannot chdir back to $cwd: $!"); + } + $realpath; +} + +# added function alias to follow principle of least surprise +# based on previous aliasing. --tchrist 27-Jan-00 +*fast_realpath = \&fast_abs_path; + + +# --- PORTING SECTION --- + +# VMS: $ENV{'DEFAULT'} points to default directory at all times +# 06-Mar-1996 Charles Bailey bailey@newman.upenn.edu +# Note: Use of Cwd::chdir() causes the logical name PWD to be defined +# in the process logical name table as the default device and directory +# seen by Perl. This may not be the same as the default device +# and directory seen by DCL after Perl exits, since the effects +# the CRTL chdir() function persist only until Perl exits. + +sub _vms_cwd { + return $ENV{'DEFAULT'}; +} + +sub _vms_abs_path { + return $ENV{'DEFAULT'} unless @_; + my $path = shift; + + my $efs = _vms_efs; + my $unix_rpt = _vms_unix_rpt; + + if (defined &VMS::Filespec::vmsrealpath) { + my $path_unix = 0; + my $path_vms = 0; + + $path_unix = 1 if ($path =~ m#(?<=\^)/#); + $path_unix = 1 if ($path =~ /^\.\.?$/); + $path_vms = 1 if ($path =~ m#[\[<\]]#); + $path_vms = 1 if ($path =~ /^--?$/); + + my $unix_mode = $path_unix; + if ($efs) { + # In case of a tie, the Unix report mode decides. + if ($path_vms == $path_unix) { + $unix_mode = $unix_rpt; + } else { + $unix_mode = 0 if $path_vms; + } + } + + if ($unix_mode) { + # Unix format + return VMS::Filespec::unixrealpath($path); + } + + # VMS format + + my $new_path = VMS::Filespec::vmsrealpath($path); + + # Perl expects directories to be in directory format + $new_path = VMS::Filespec::pathify($new_path) if -d $path; + return $new_path; + } + + # Fallback to older algorithm if correct ones are not + # available. + + if (-l $path) { + my $link_target = readlink($path); + die "Can't resolve link $path: $!" unless defined $link_target; + + return _vms_abs_path($link_target); + } + + # may need to turn foo.dir into [.foo] + my $pathified = VMS::Filespec::pathify($path); + $path = $pathified if defined $pathified; + + return VMS::Filespec::rmsexpand($path); +} + +sub _os2_cwd { + $ENV{'PWD'} = `cmd /c cd`; + chomp $ENV{'PWD'}; + $ENV{'PWD'} =~ s:\\:/:g ; + return $ENV{'PWD'}; +} + +sub _win32_cwd_simple { + $ENV{'PWD'} = `cd`; + chomp $ENV{'PWD'}; + $ENV{'PWD'} =~ s:\\:/:g ; + return $ENV{'PWD'}; +} + +sub _win32_cwd { + # Need to avoid taking any sort of reference to the typeglob or the code in + # the optree, so that this tests the runtime state of things, as the + # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at + # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table + # lookup avoids needing a string eval, which has been reported to cause + # problems (for reasons that we haven't been able to get to the bottom of - + # rt.cpan.org #56225) + if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) { + $ENV{'PWD'} = Win32::GetCwd(); + } + else { # miniperl + chomp($ENV{'PWD'} = `cd`); + } + $ENV{'PWD'} =~ s:\\:/:g ; + return $ENV{'PWD'}; +} + +*_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple; + +sub _dos_cwd { + if (!defined &Dos::GetCwd) { + $ENV{'PWD'} = `command /c cd`; + chomp $ENV{'PWD'}; + $ENV{'PWD'} =~ s:\\:/:g ; + } else { + $ENV{'PWD'} = Dos::GetCwd(); + } + return $ENV{'PWD'}; +} + +sub _qnx_cwd { + local $ENV{PATH} = ''; + local $ENV{CDPATH} = ''; + local $ENV{ENV} = ''; + $ENV{'PWD'} = `/usr/bin/fullpath -t`; + chomp $ENV{'PWD'}; + return $ENV{'PWD'}; +} + +sub _qnx_abs_path { + local $ENV{PATH} = ''; + local $ENV{CDPATH} = ''; + local $ENV{ENV} = ''; + my $path = @_ ? shift : '.'; + local *REALPATH; + + defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or + die "Can't open /usr/bin/fullpath: $!"; + my $realpath = ; + close REALPATH; + chomp $realpath; + return $realpath; +} + +sub _epoc_cwd { + $ENV{'PWD'} = EPOC::getcwd(); + return $ENV{'PWD'}; +} + + +# Now that all the base-level functions are set up, alias the +# user-level functions to the right places + +if (exists $METHOD_MAP{$^O}) { + my $map = $METHOD_MAP{$^O}; + foreach my $name (keys %$map) { + local $^W = 0; # assignments trigger 'subroutine redefined' warning + no strict 'refs'; + *{$name} = \&{$map->{$name}}; + } +} + +# In case the XS version doesn't load. +*abs_path = \&_perl_abs_path unless defined &abs_path; +*getcwd = \&_perl_getcwd unless defined &getcwd; + +# added function alias for those of us more +# used to the libc function. --tchrist 27-Jan-00 +*realpath = \&abs_path; + +1; diff --git a/Parallel/ForkManager.pm b/Parallel/ForkManager.pm new file mode 100644 index 0000000..0de0d20 --- /dev/null +++ b/Parallel/ForkManager.pm @@ -0,0 +1,416 @@ +=head1 NAME + +Parallel::ForkManager - A simple parallel processing fork manager + +=head1 SYNOPSIS + + use Parallel::ForkManager; + + $pm = new Parallel::ForkManager($MAX_PROCESSES); + + foreach $data (@all_data) { + # Forks and returns the pid for the child: + my $pid = $pm->start and next; + + ... do some work with $data in the child process ... + + $pm->finish; # Terminates the child process + } + +=head1 DESCRIPTION + +This module is intended for use in operations that can be done in parallel +where the number of processes to be forked off should be limited. Typical +use is a downloader which will be retrieving hundreds/thousands of files. + +The code for a downloader would look something like this: + + use LWP::Simple; + use Parallel::ForkManager; + + ... + + @links=( + ["http://www.foo.bar/rulez.data","rulez_data.txt"], + ["http://new.host/more_data.doc","more_data.doc"], + ... + ); + + ... + + # Max 30 processes for parallel download + my $pm = new Parallel::ForkManager(30); + + foreach my $linkarray (@links) { + $pm->start and next; # do the fork + + my ($link,$fn) = @$linkarray; + warn "Cannot get $fn from $link" + if getstore($link,$fn) != RC_OK; + + $pm->finish; # do the exit in the child process + } + $pm->wait_all_children; + +First you need to instantiate the ForkManager with the "new" constructor. +You must specify the maximum number of processes to be created. If you +specify 0, then NO fork will be done; this is good for debugging purposes. + +Next, use $pm->start to do the fork. $pm returns 0 for the child process, +and child pid for the parent process (see also L). +The "and next" skips the internal loop in the parent process. NOTE: +$pm->start dies if the fork fails. + +$pm->finish terminates the child process (assuming a fork was done in the +"start"). + +NOTE: You cannot use $pm->start if you are already in the child process. +If you want to manage another set of subprocesses in the child process, +you must instantiate another Parallel::ForkManager object! + +=head1 METHODS + +=over 5 + +=item new $processes + +Instantiate a new Parallel::ForkManager object. You must specify the maximum +number of children to fork off. If you specify 0 (zero), then no children +will be forked. This is intended for debugging purposes. + +=item start [ $process_identifier ] + +This method does the fork. It returns the pid of the child process for +the parent, and 0 for the child process. If the $processes parameter +for the constructor is 0 then, assuming you're in the child process, +$pm->start simply returns 0. + +An optional $process_identifier can be provided to this method... It is used by +the "run_on_finish" callback (see CALLBACKS) for identifying the finished +process. + +=item finish [ $exit_code ] + +Closes the child process by exiting and accepts an optional exit code +(default exit code is 0) which can be retrieved in the parent via callback. +If you use the program in debug mode ($processes == 0), this method doesn't +do anything. + +=item set_max_procs $processes + +Allows you to set a new maximum number of children to maintain. Returns +the previous setting. + +=item wait_all_children + +You can call this method to wait for all the processes which have been +forked. This is a blocking wait. + +=back + +=head1 CALLBACKS + +You can define callbacks in the code, which are called on events like starting +a process or upon finish. + +The callbacks can be defined with the following methods: + +=over 4 + +=item run_on_finish $code [, $pid ] + +You can define a subroutine which is called when a child is terminated. It is +called in the parent process. + +The paremeters of the $code are the following: + + - pid of the process, which is terminated + - exit code of the program + - identification of the process (if provided in the "start" method) + - exit signal (0-127: signal name) + - core dump (1 if there was core dump at exit) + +=item run_on_start $code + +You can define a subroutine which is called when a child is started. It called +after the successful startup of a child in the parent process. + +The parameters of the $code are the following: + + - pid of the process which has been started + - identification of the process (if provided in the "start" method) + +=item run_on_wait $code, [$period] + +You can define a subroutine which is called when the child process needs to wait +for the startup. If $period is not defined, then one call is done per +child. If $period is defined, then $code is called periodically and the +module waits for $period seconds betwen the two calls. Note, $period can be +fractional number also. The exact "$period seconds" is not guarranteed, +signals can shorten and the process scheduler can make it longer (on busy +systems). + +The $code called in the "start" and the "wait_all_children" method also. + +No parameters are passed to the $code on the call. + +=back + +=head1 EXAMPLE + +=head2 Parallel get + +This small example can be used to get URLs in parallel. + + use Parallel::ForkManager; + use LWP::Simple; + my $pm=new Parallel::ForkManager(10); + for my $link (@ARGV) { + $pm->start and next; + my ($fn)= $link =~ /^.*\/(.*?)$/; + if (!$fn) { + warn "Cannot determine filename from $fn\n"; + } else { + $0.=" ".$fn; + print "Getting $fn from $link\n"; + my $rc=getstore($link,$fn); + print "$link downloaded. response code: $rc\n"; + }; + $pm->finish; + }; + +=head2 Callbacks + +Example of a program using callbacks to get child exit codes: + + use strict; + use Parallel::ForkManager; + + my $max_procs = 5; + my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara ); + # hash to resolve PID's back to child specific information + + my $pm = new Parallel::ForkManager($max_procs); + + # Setup a callback for when a child finishes up so we can + # get it's exit code + $pm->run_on_finish( + sub { my ($pid, $exit_code, $ident) = @_; + print "** $ident just got out of the pool ". + "with PID $pid and exit code: $exit_code\n"; + } + ); + + $pm->run_on_start( + sub { my ($pid,$ident)=@_; + print "** $ident started, pid: $pid\n"; + } + ); + + $pm->run_on_wait( + sub { + print "** Have to wait for one children ...\n" + }, + 0.5 + ); + + foreach my $child ( 0 .. $#names ) { + my $pid = $pm->start($names[$child]) and next; + + # This code is the child process + print "This is $names[$child], Child number $child\n"; + sleep ( 2 * $child ); + print "$names[$child], Child $child is about to get out...\n"; + sleep 1; + $pm->finish($child); # pass an exit code to finish + } + + print "Waiting for Children...\n"; + $pm->wait_all_children; + print "Everybody is out of the pool!\n"; + +=head1 BUGS AND LIMITATIONS + +Do not use Parallel::ForkManager in an environment, where other child +processes can affect the run of the main program, so using this module +is not recommended in an environment where fork() / wait() is already used. + +If you want to use more than one copies of the Parallel::ForkManager, then +you have to make sure that all children processes are terminated, before you +use the second object in the main program. + +You are free to use a new copy of Parallel::ForkManager in the child +processes, although I don't think it makes sense. + +=head1 COPYRIGHT + +Copyright (c) 2000 Szabó, Balázs (dLux) + +All right reserved. This program is free software; you can redistribute it +and/or modify it under the same terms as Perl itself. + +=head1 AUTHOR + + dLux (Szabó, Balázs) + +=head1 CREDITS + + Noah Robin (documentation tweaks) + Chuck Hirstius (callback exit status, example) + Grant Hopwood (win32 port) + Mark Southern (bugfix) + +=cut + +package Parallel::ForkManager; +use POSIX ":sys_wait_h"; +use strict; +use vars qw($VERSION); +$VERSION='0.7.5'; + +sub new { my ($c,$processes)=@_; + my $h={ + max_proc => $processes, + processes => {}, + in_child => 0, + }; + return bless($h,ref($c)||$c); +}; + +sub start { my ($s,$identification)=@_; + die "Cannot start another process while you are in the child process" + if $s->{in_child}; + while ($s->{max_proc} && ( keys %{ $s->{processes} } ) >= $s->{max_proc}) { + $s->on_wait; + $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); + }; + $s->wait_children; + if ($s->{max_proc}) { + my $pid=fork(); + die "Cannot fork: $!" if !defined $pid; + if ($pid) { + $s->{processes}->{$pid}=$identification; + $s->on_start($pid,$identification); + } else { + $s->{in_child}=1 if !$pid; + } + return $pid; + } else { + $s->{processes}->{$$}=$identification; + $s->on_start($$,$identification); + return 0; # Simulating the child which returns 0 + } +} + +sub finish { my ($s, $x)=@_; + if ( $s->{in_child} ) { + exit ($x || 0); + } + if ($s->{max_proc} == 0) { # max_proc == 0 + $s->on_finish($$, $x ,$s->{processes}->{$$}, 0, 0); + delete $s->{processes}->{$$}; + } + return 0; +} + +sub wait_children { my ($s)=@_; + return if !keys %{$s->{processes}}; + my $kid; + do { + $kid = $s->wait_one_child(&WNOHANG); + } while $kid > 0 || $kid < -1; # AS 5.6/Win32 returns negative PIDs +}; + +*wait_childs=*wait_children; # compatibility + +sub wait_one_child { my ($s,$par)=@_; + my $kid; + while (1) { + $kid = $s->_waitpid(-1,$par||=0); + last if $kid == 0 || $kid == -1; # AS 5.6/Win32 returns negative PIDs + redo if !exists $s->{processes}->{$kid}; + my $id = delete $s->{processes}->{$kid}; + $s->on_finish( $kid, $? >> 8 , $id, $? & 0x7f, $? & 0x80 ? 1 : 0); + last; + } + $kid; +}; + +sub wait_all_children { my ($s)=@_; + while (keys %{ $s->{processes} }) { + $s->on_wait; + $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); + }; +} + +*wait_all_childs=*wait_all_children; # compatibility; + +sub run_on_finish { my ($s,$code,$pid)=@_; + $s->{on_finish}->{$pid || 0}=$code; +} + +sub on_finish { my ($s,$pid,@par)=@_; + my $code=$s->{on_finish}->{$pid} || $s->{on_finish}->{0} or return 0; + $code->($pid,@par); +}; + +sub run_on_wait { my ($s,$code, $period)=@_; + $s->{on_wait}=$code; + $s->{on_wait_period} = $period; +} + +sub on_wait { my ($s)=@_; + if(ref($s->{on_wait}) eq 'CODE') { + $s->{on_wait}->(); + if (defined $s->{on_wait_period}) { + local $SIG{CHLD} = sub { } if ! defined $SIG{CHLD}; + select undef, undef, undef, $s->{on_wait_period} + }; + }; +}; + +sub run_on_start { my ($s,$code)=@_; + $s->{on_start}=$code; +} + +sub on_start { my ($s,@par)=@_; + $s->{on_start}->(@par) if ref($s->{on_start}) eq 'CODE'; +}; + +sub set_max_procs { my ($s, $mp)=@_; + $s->{max_proc} = $mp; +} + +# OS dependant code follows... + +sub _waitpid { # Call waitpid() in the standard Unix fashion. + return waitpid($_[1],$_[2]); +} + +# On ActiveState Perl 5.6/Win32 build 625, waitpid(-1, &WNOHANG) always +# blocks unless an actual PID other than -1 is given. +sub _NT_waitpid { my ($s, $pid, $par) = @_; + if ($par == &WNOHANG) { # Need to nonblock on each of our PIDs in the pool. + my @pids = keys %{ $s->{processes} }; + # Simulate -1 (no processes awaiting cleanup.) + return -1 unless scalar(@pids); + # Check each PID in the pool. + my $kid; + foreach $pid (@pids) { + $kid = waitpid($pid, $par); + return $kid if $kid != 0; # AS 5.6/Win32 returns negative PIDs. + } + return $kid; + } else { # Normal waitpid() call. + return waitpid($pid, $par); + } +} + +{ + local $^W = 0; + if ($^O eq 'NT' or $^O eq 'MSWin32') { + *_waitpid = \&_NT_waitpid; + } +} + +1; diff --git a/Parallel/ForkManager/callback.pl b/Parallel/ForkManager/callback.pl new file mode 100755 index 0000000..56a52e2 --- /dev/null +++ b/Parallel/ForkManager/callback.pl @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w +use lib '.'; +use strict; +use Parallel::ForkManager; + +my $max_procs = 5; +my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara ); +# hash to resolve PID's back to child specific information + +my $pm = new Parallel::ForkManager($max_procs); + +# Setup a callback for when a child finishes up so we can +# get it's exit code +$pm->run_on_finish( + sub { my ($pid, $exit_code, $ident) = @_; + print "** $ident just got out of the pool ". + "with PID $pid and exit code: $exit_code\n"; + } +); + +$pm->run_on_start( + sub { my ($pid,$ident)=@_; + print "** $ident started, pid: $pid\n"; + } +); + +$pm->run_on_wait( + sub { + print "** Have to wait for one children ...\n" + }, + 0.5, +); + +foreach my $child ( 0 .. $#names ) { + my $pid = $pm->start($names[$child]) and next; + + # This code is the child process + print "This is $names[$child], Child number $child\n"; + sleep ( 2 * $child ); + print "$names[$child], Child $child is about to get out...\n"; + sleep 1; + $pm->finish($child); # pass an exit code to finish +} + +print "Waiting for Children...\n"; +$pm->wait_all_children; +print "Everybody is out of the pool!\n"; + diff --git a/Parallel/ForkManager/parallel_get.pl b/Parallel/ForkManager/parallel_get.pl new file mode 100755 index 0000000..5c6f3cd --- /dev/null +++ b/Parallel/ForkManager/parallel_get.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl -w +use Parallel::ForkManager; +use LWP::Simple; +my $pm=new Parallel::ForkManager(10); +for my $link (@ARGV) { + $pm->start and next; + my ($fn)= $link =~ /^.*\/(.*?)$/; + if (!$fn) { + warn "Cannot determine filename from $fn\n"; + } else { + $0.=" ".$fn; + print "Getting $fn from $link\n"; + my $rc=getstore($link,$fn); + print "$link downloaded. response code: $rc\n"; + }; + $pm->finish; +}; diff --git a/bin/ExtendOrFormatContigs.pl b/bin/ExtendOrFormatContigs.pl index eabbb1c..e27ff98 100755 --- a/bin/ExtendOrFormatContigs.pl +++ b/bin/ExtendOrFormatContigs.pl @@ -1,5 +1,7 @@ ############################################################### - #Marten Boetzer 1-03-2010 # + #Multi-thread implemented by Jose F. Sanchez-Herrero # + # 1-08-2017 # + #Marten Boetzer 1-03-2010 # #SSPACE perl subscript ExtendOrFormatContigs.pl # #This script, based on the the -x parameter; # # -Formats the contigs to appropriate format (-x 0) # @@ -9,6 +11,10 @@ use strict; use File::Basename; use File::Path; + use FindBin qw($Bin); + use lib "$Bin/Parallel/"; + use Parallel::ForkManager; + use Cwd qw(abs_path); my ($MAX, $MAX_TOP, $TRACK_COUNT) = (0, 100, 1); @@ -37,7 +43,6 @@ my $filenameOutExt = $base_name . ".singlereads.fasta"; my ($bin); if($extending == 1){ - &ExtendContigs($base_name, $filecontig, $filenameOutExt); print SUMFILE "\n" if($minContigLength > 0); &FormatContigs() if($minContigLength > 0); @@ -60,63 +65,130 @@ sub ExtendContigs{ &getUnmappedReads($filecontig, $readfile); #-------------------------------------------------CONTIG EXTENSION USING UNMAPPED PAIRS STORED IN $SET &printMessage("\n=>".getDate().": Contig extension initiated\n"); - my $outfileTig = "intermediate_results/" . $base_name . ".extendedcontigs.fasta"; - open (TIG, ">$outfileTig") || die "Can't write to $outfileTig -- fatal\n"; #--------------------------------------------ASSEMBLY START ASSEMBLY: - open(IN, $filecontig) || die "Can't open $filecontig -- fatal\n"; - my ($exttig_count, $counter, $NCount, $orig_mer, $prevhead) = (0, 0, 0, 0, ''); - while(){ - s/\r\n/\n/; - chomp; - $seq.= uc($_) if(eof(IN)); - if (/\>(\S+)/ || eof(IN)){ - my $head=$1; - $orig_mer = length($seq); - if($seq ne ''){ - $NCount++ if($seq=~/([NX])/i); - my $start_sequence = uc($seq); - my $reads_needed = 1; #tracks coverage - my $total_bases = $orig_mer * $reads_needed; - - ($seq, $reads_needed, $total_bases) = doExtension("3", $orig_mer, $seq, $reads_needed, $total_bases, $min_overlap, $base_overlap, $min_base_ratio, $verbose, $counter, $max_trim) if($orig_mer >= $MIN_READ_LENGTH && $orig_mer >= $min_overlap); - - my $seqrc = reverseComplement($seq); - ($seqrc, $reads_needed, $total_bases) = doExtension("5", $orig_mer, $seqrc, $reads_needed, $total_bases, $min_overlap, $base_overlap, $min_base_ratio, $verbose, $counter, $max_trim) if($orig_mer >= $MIN_READ_LENGTH && $orig_mer >= $min_overlap); - - my $leng = length($seqrc); - my $reversetig = reverseComplement($seqrc); ### return to sequence, as inputted - if($leng > $orig_mer){ ### commented out: && $start_sequence ne $seqrc && $start_sequence ne $reversetig - my $cov = $total_bases / $leng; - printf TIG ">extcontig%i|size%i|read%i|cov%.2f|seed:$prevhead\n%s\n", ($counter, $leng, $reads_needed, $cov, $reversetig); #print contigs to file - $exttig_count++; - }else{ - my $cov = $reads_needed = 0; - my $singlet_leng = length($start_sequence); - printf TIG ">contig%i|size%i|read%i|cov%.2f|seed:$prevhead\n%s\n", ($counter, $leng, $reads_needed, $cov, $reversetig); #print singlets to file - } - } - CounterPrint(++$counter); - $prevhead = $head; - $seq=''; - }else{ - $seq .= uc($_); - } - } - CounterPrint(" "); - print SUMFILE "\tNumber of contig sequences =".($counter-1). "\n"; - print SUMFILE "\t\tNumber of contigs containing N's (may prevent proper contig extension) = $NCount\n"; - - print SUMFILE "\tNumber of contigs extended = $exttig_count\n".$seplines; - close IN; - $filecontig = $outfileTig; - if($@){ - my $message = $@; - &printMessage("\nSomething went wrong running $0 ".getDate()."\n$message\n"); - } - close TIG; + + ## Split given fasta file in as many CPUs as expected and send multiple threads for each + # Splits fasta file and takes into account to add the whole sequence if it is broken + my $file = $filecontig; + my $file_size = -s $file; #To get only size + my $block = int($file_size/$threads); + + my $path = abs_path(); + + open (FH, "<$file") or die "Could not open source file. $!"; + print "\t- Splitting file into blocks of $block characters aprox ...\n"; + my $j = 0; my @files; + my @name = split("/", $file); + my $file_name = $name[-1]; + while (1) { + my $chunk; + my @tmp = split ("\.fasta", $file_name); + my $block_file = $path."/intermediate_results/".$tmp[0]."_part-".$j."_tmp.fasta"; + push (@files, $block_file); + open(OUT, ">$block_file") or die "Could not open destination file"; + if (!eof(FH)) { read(FH, $chunk,$block); + if ($j > 0) { $chunk = ">".$chunk; } + print OUT $chunk; + } ## Print the amount of chars + if (!eof(FH)) { $chunk = ; print OUT $chunk; } ## print the whole line if it is broken + if (!eof(FH)) { + $/ = ">"; ## Telling perl where a new line starts + $chunk = ; chop $chunk; print OUT $chunk; + $/ = "\n"; + } ## print the sequence if it is broken + $j++; close(OUT); last if eof(FH); + } + close(FH); + + my $pm = new Parallel::ForkManager($threads); ## Number of subprocesses not equal to CPUs. Each subprocesses will have multiple CPUs if available + $pm->run_on_finish( + sub { my ($pid, $exit_code, $ident) = @_; + print "\n\n** Child process finished with PID $pid and exit code: $exit_code\n\n"; + } ); + $pm->run_on_start( sub { my ($pid,$ident)=@_; print "\n\n** Parsing block of FASTA file $ident PID:$pid\n\n"; } ); + for (my $i=0; $i < scalar @files; $i++) { + my $pid = $pm->start($files[$i], $i) and next; print "\nExecuting subprocess command\n\n"; + ## Send subprocess + + open(IN, $files[$i]) || die "Can't open $files[$i] -- fatal\n"; + + my $out_put = $files[$i]."-extendedcontigs.fasta"; + open (OUT, ">$out_put"); + my ($exttig_count, $counter, $NCount, $orig_mer, $prevhead) = (0, 0, 0, 0, ''); + $/ = ">"; ## Telling perl where a new line starts + while () { + next if /^#/ || /^\s*$/; + chomp; + my ($head, $seq) = split(/\n/,$_,2); + next unless ($head && $seq); + + $NCount++ if($seq=~/([NX])/i); + my $start_sequence = uc($seq); + my $reads_needed = 1; #tracks coverage + my $total_bases = $orig_mer * $reads_needed; + + ($seq, $reads_needed, $total_bases) = doExtension("3", $orig_mer, $seq, $reads_needed, $total_bases, $min_overlap, $base_overlap, $min_base_ratio, $verbose, $counter, $max_trim) if($orig_mer >= $MIN_READ_LENGTH && $orig_mer >= $min_overlap); + + my $seqrc = reverseComplement($seq); + ($seqrc, $reads_needed, $total_bases) = doExtension("5", $orig_mer, $seqrc, $reads_needed, $total_bases, $min_overlap, $base_overlap, $min_base_ratio, $verbose, $counter, $max_trim) if($orig_mer >= $MIN_READ_LENGTH && $orig_mer >= $min_overlap); + + my $leng = length($seqrc); + my $reversetig = reverseComplement($seqrc); ### return to sequence, as inputted + if($leng > $orig_mer){ ### commented out: && $start_sequence ne $seqrc && $start_sequence ne $reversetig + my $cov = $total_bases / $leng; + printf OUT ">extcontig%i|size%i|read%i|cov%.2f|seed:$prevhead\n%s\n", ($counter, $leng, $reads_needed, $cov, $reversetig); #print contigs to file + $exttig_count++; + }else{ + my $cov = $reads_needed = 0; + my $singlet_leng = length($start_sequence); + printf OUT ">contig%i|size%i|read%i|cov%.2f|seed:$prevhead\n%s\n", ($counter, $leng, $reads_needed, $cov, $reversetig); #print singlets to file + } + $counter++; + } + $/ = "\n"; + close(IN); close (OUT); + + ## print STATS + my $stats = $files[$i]."-extendedcontigs.STATS.txt"; + open (STATS, ">$stats"); + print STATS "NCount=$NCount\ncounter=$counter\nexttig_count=$exttig_count\n"; + close (STATS); + + ## finish subprocess + $pm->finish($i); # pass an exit code to finish + } + $pm->wait_all_children; print "\n** All parsing child processes have finished...\n\n"; + + ## concatenate results + my $outfileTig = $path."/intermediate_results/" . $base_name . ".extendedcontigs.fasta"; + my $stats_file = $path."/intermediate_results/stats-tmp.txt"; + + #open (TIG, ">$outfileTig") || die "Can't write to $outfileTig -- fatal\n"; + system("cat $path/intermediate_results/*extendedcontigs.fasta >> $outfileTig"); + system("cat $path/intermediate_results/*extendedcontigs.STATS.txt >> $stats_file"); + + my %stats; + open (STAT, "<$stats_file"); + while () { + my $line = $_; + chomp $line; + my @array = split("=", $line); + $stats{$array[0]} += $array[1]; + } + close (STAT); + + print SUMFILE "\tNumber of contig sequences =".($stats{"counter"}-1). "\n"; + print SUMFILE "\t\tNumber of contigs containing N's (may prevent proper contig extension) = ".$stats{"NCount"}."\n"; + print SUMFILE "\tNumber of contigs extended = ".$stats{"exttig_count"}."\n".$seplines; + + $filecontig = $outfileTig; + if($@){ + my $message = $@; + &printMessage("\nSomething went wrong running $0 ".getDate()."\n$message\n"); + } } ###STORE CONTIGS TO APPROPRIATE FORMAT WHEN CONTIGS WILL NOT BE EXTENDED From b9d0ade52062543cd53fc684cfb0b2d66e3a8297 Mon Sep 17 00:00:00 2001 From: jfsanchezherrero Date: Mon, 4 Sep 2017 17:44:24 +0200 Subject: [PATCH 2/3] pull request --- bin/ExtendOrFormatContigs.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/ExtendOrFormatContigs.pl b/bin/ExtendOrFormatContigs.pl index e27ff98..7b278af 100755 --- a/bin/ExtendOrFormatContigs.pl +++ b/bin/ExtendOrFormatContigs.pl @@ -17,7 +17,6 @@ use Cwd qw(abs_path); my ($MAX, $MAX_TOP, $TRACK_COUNT) = (0, 100, 1); - my $seplines = ("-" x 60)."\n"; my $contig = $ARGV[0]; From e777ca6018fe720029da48943d09a89851f81ee9 Mon Sep 17 00:00:00 2001 From: jfsanchezherrero Date: Tue, 5 Sep 2017 09:15:56 +0200 Subject: [PATCH 3/3] Discard CPAN modules --- Cwd.pm | 855 --------------------------- Parallel/ForkManager.pm | 416 ------------- Parallel/ForkManager/callback.pl | 48 -- Parallel/ForkManager/parallel_get.pl | 17 - 4 files changed, 1336 deletions(-) delete mode 100644 Cwd.pm delete mode 100644 Parallel/ForkManager.pm delete mode 100755 Parallel/ForkManager/callback.pl delete mode 100755 Parallel/ForkManager/parallel_get.pl diff --git a/Cwd.pm b/Cwd.pm deleted file mode 100644 index f27a3a0..0000000 --- a/Cwd.pm +++ /dev/null @@ -1,855 +0,0 @@ -package Cwd; - -=head1 NAME - -Cwd - get pathname of current working directory - -=head1 SYNOPSIS - - use Cwd; - my $dir = getcwd; - - use Cwd 'abs_path'; - my $abs_path = abs_path($file); - -=head1 DESCRIPTION - -This module provides functions for determining the pathname of the -current working directory. It is recommended that getcwd (or another -*cwd() function) be used in I code to ensure portability. - -By default, it exports the functions cwd(), getcwd(), fastcwd(), and -fastgetcwd() (and, on Win32, getdcwd()) into the caller's namespace. - - -=head2 getcwd and friends - -Each of these functions are called without arguments and return the -absolute path of the current working directory. - -=over 4 - -=item getcwd - - my $cwd = getcwd(); - -Returns the current working directory. - -Exposes the POSIX function getcwd(3) or re-implements it if it's not -available. - -=item cwd - - my $cwd = cwd(); - -The cwd() is the most natural form for the current architecture. For -most systems it is identical to `pwd` (but without the trailing line -terminator). - -=item fastcwd - - my $cwd = fastcwd(); - -A more dangerous version of getcwd(), but potentially faster. - -It might conceivably chdir() you out of a directory that it can't -chdir() you back into. If fastcwd encounters a problem it will return -undef but will probably leave you in a different directory. For a -measure of extra security, if everything appears to have worked, the -fastcwd() function will check that it leaves you in the same directory -that it started in. If it has changed it will C with the message -"Unstable directory path, current directory changed -unexpectedly". That should never happen. - -=item fastgetcwd - - my $cwd = fastgetcwd(); - -The fastgetcwd() function is provided as a synonym for cwd(). - -=item getdcwd - - my $cwd = getdcwd(); - my $cwd = getdcwd('C:'); - -The getdcwd() function is also provided on Win32 to get the current working -directory on the specified drive, since Windows maintains a separate current -working directory for each drive. If no drive is specified then the current -drive is assumed. - -This function simply calls the Microsoft C library _getdcwd() function. - -=back - - -=head2 abs_path and friends - -These functions are exported only on request. They each take a single -argument and return the absolute pathname for it. If no argument is -given they'll use the current working directory. - -=over 4 - -=item abs_path - - my $abs_path = abs_path($file); - -Uses the same algorithm as getcwd(). Symbolic links and relative-path -components ("." and "..") are resolved to return the canonical -pathname, just like realpath(3). - -=item realpath - - my $abs_path = realpath($file); - -A synonym for abs_path(). - -=item fast_abs_path - - my $abs_path = fast_abs_path($file); - -A more dangerous, but potentially faster version of abs_path. - -=back - -=head2 $ENV{PWD} - -If you ask to override your chdir() built-in function, - - use Cwd qw(chdir); - -then your PWD environment variable will be kept up to date. Note that -it will only be kept up to date if all packages which use chdir import -it from Cwd. - - -=head1 NOTES - -=over 4 - -=item * - -Since the path separators are different on some operating systems ('/' -on Unix, ':' on MacPerl, etc...) we recommend you use the File::Spec -modules wherever portability is a concern. - -=item * - -Actually, on Mac OS, the C, C and C -functions are all aliases for the C function, which, on Mac OS, -calls `pwd`. Likewise, the C function is an alias for -C. - -=back - -=head1 AUTHOR - -Originally by the perl5-porters. - -Maintained by Ken Williams - -=head1 COPYRIGHT - -Copyright (c) 2004 by the Perl 5 Porters. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the same terms as Perl itself. - -Portions of the C code in this library are copyright (c) 1994 by the -Regents of the University of California. All rights reserved. The -license on this code is compatible with the licensing of the rest of -the distribution - please see the source code in F for the -details. - -=head1 SEE ALSO - -L - -=cut - -use strict; -use Exporter; -use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); - -$VERSION = '3.47'; -my $xs_version = $VERSION; -$VERSION =~ tr/_//; - -@ISA = qw/ Exporter /; -@EXPORT = qw(cwd getcwd fastcwd fastgetcwd); -push @EXPORT, qw(getdcwd) if $^O eq 'MSWin32'; -@EXPORT_OK = qw(chdir abs_path fast_abs_path realpath fast_realpath); - -# sys_cwd may keep the builtin command - -# All the functionality of this module may provided by builtins, -# there is no sense to process the rest of the file. -# The best choice may be to have this in BEGIN, but how to return from BEGIN? - -if ($^O eq 'os2') { - local $^W = 0; - - *cwd = defined &sys_cwd ? \&sys_cwd : \&_os2_cwd; - *getcwd = \&cwd; - *fastgetcwd = \&cwd; - *fastcwd = \&cwd; - - *fast_abs_path = \&sys_abspath if defined &sys_abspath; - *abs_path = \&fast_abs_path; - *realpath = \&fast_abs_path; - *fast_realpath = \&fast_abs_path; - - return 1; -} - -# Need to look up the feature settings on VMS. The preferred way is to use the -# VMS::Feature module, but that may not be available to dual life modules. - -my $use_vms_feature; -BEGIN { - if ($^O eq 'VMS') { - if (eval { local $SIG{__DIE__}; require VMS::Feature; }) { - $use_vms_feature = 1; - } - } -} - -# Need to look up the UNIX report mode. This may become a dynamic mode -# in the future. -sub _vms_unix_rpt { - my $unix_rpt; - if ($use_vms_feature) { - $unix_rpt = VMS::Feature::current("filename_unix_report"); - } else { - my $env_unix_rpt = $ENV{'DECC$FILENAME_UNIX_REPORT'} || ''; - $unix_rpt = $env_unix_rpt =~ /^[ET1]/i; - } - return $unix_rpt; -} - -# Need to look up the EFS character set mode. This may become a dynamic -# mode in the future. -sub _vms_efs { - my $efs; - if ($use_vms_feature) { - $efs = VMS::Feature::current("efs_charset"); - } else { - my $env_efs = $ENV{'DECC$EFS_CHARSET'} || ''; - $efs = $env_efs =~ /^[ET1]/i; - } - return $efs; -} - - -# If loading the XS stuff doesn't work, we can fall back to pure perl -unless (defined &getcwd) { - eval { - if ( $] >= 5.006 ) { - require XSLoader; - XSLoader::load( __PACKAGE__, $xs_version); - } else { - require DynaLoader; - push @ISA, 'DynaLoader'; - __PACKAGE__->bootstrap( $xs_version ); - } - }; -} - -# Big nasty table of function aliases -my %METHOD_MAP = - ( - VMS => - { - cwd => '_vms_cwd', - getcwd => '_vms_cwd', - fastcwd => '_vms_cwd', - fastgetcwd => '_vms_cwd', - abs_path => '_vms_abs_path', - fast_abs_path => '_vms_abs_path', - }, - - MSWin32 => - { - # We assume that &_NT_cwd is defined as an XSUB or in the core. - cwd => '_NT_cwd', - getcwd => '_NT_cwd', - fastcwd => '_NT_cwd', - fastgetcwd => '_NT_cwd', - abs_path => 'fast_abs_path', - realpath => 'fast_abs_path', - }, - - dos => - { - cwd => '_dos_cwd', - getcwd => '_dos_cwd', - fastgetcwd => '_dos_cwd', - fastcwd => '_dos_cwd', - abs_path => 'fast_abs_path', - }, - - # QNX4. QNX6 has a $os of 'nto'. - qnx => - { - cwd => '_qnx_cwd', - getcwd => '_qnx_cwd', - fastgetcwd => '_qnx_cwd', - fastcwd => '_qnx_cwd', - abs_path => '_qnx_abs_path', - fast_abs_path => '_qnx_abs_path', - }, - - cygwin => - { - getcwd => 'cwd', - fastgetcwd => 'cwd', - fastcwd => 'cwd', - abs_path => 'fast_abs_path', - realpath => 'fast_abs_path', - }, - - epoc => - { - cwd => '_epoc_cwd', - getcwd => '_epoc_cwd', - fastgetcwd => '_epoc_cwd', - fastcwd => '_epoc_cwd', - abs_path => 'fast_abs_path', - }, - - MacOS => - { - getcwd => 'cwd', - fastgetcwd => 'cwd', - fastcwd => 'cwd', - abs_path => 'fast_abs_path', - }, - ); - -$METHOD_MAP{NT} = $METHOD_MAP{MSWin32}; - - -# Find the pwd command in the expected locations. We assume these -# are safe. This prevents _backtick_pwd() consulting $ENV{PATH} -# so everything works under taint mode. -my $pwd_cmd; -foreach my $try ('/bin/pwd', - '/usr/bin/pwd', - '/QOpenSys/bin/pwd', # OS/400 PASE. - ) { - - if( -x $try ) { - $pwd_cmd = $try; - last; - } -} - -# Android has a built-in pwd. Using $pwd_cmd will DTRT if -# this perl was compiled with -Dd_useshellcmds, which is the -# default for Android, but the block below is needed for the -# miniperl running on the host when cross-compiling, and -# potentially for native builds with -Ud_useshellcmds. -if ($^O =~ /android/) { - # If targetsh is executable, then we're either a full - # perl, or a miniperl for a native build. - if (-x $Config::Config{targetsh}) { - $pwd_cmd = "$Config::Config{targetsh} -c pwd" - } - else { - $pwd_cmd = "$Config::Config{sh} -c pwd" - } -} - -my $found_pwd_cmd = defined($pwd_cmd); -unless ($pwd_cmd) { - # Isn't this wrong? _backtick_pwd() will fail if someone has - # pwd in their path but it is not /bin/pwd or /usr/bin/pwd? - # See [perl #16774]. --jhi - $pwd_cmd = 'pwd'; -} - -# Lazy-load Carp -sub _carp { require Carp; Carp::carp(@_) } -sub _croak { require Carp; Carp::croak(@_) } - -# The 'natural and safe form' for UNIX (pwd may be setuid root) -sub _backtick_pwd { - # Localize %ENV entries in a way that won't create new hash keys - my @localize = grep exists $ENV{$_}, qw(PATH IFS CDPATH ENV BASH_ENV); - local @ENV{@localize}; - - my $cwd = `$pwd_cmd`; - # Belt-and-suspenders in case someone said "undef $/". - local $/ = "\n"; - # `pwd` may fail e.g. if the disk is full - chomp($cwd) if defined $cwd; - $cwd; -} - -# Since some ports may predefine cwd internally (e.g., NT) -# we take care not to override an existing definition for cwd(). - -unless ($METHOD_MAP{$^O}{cwd} or defined &cwd) { - # The pwd command is not available in some chroot(2)'ed environments - my $sep = $Config::Config{path_sep} || ':'; - my $os = $^O; # Protect $^O from tainting - - - # Try again to find a pwd, this time searching the whole PATH. - if (defined $ENV{PATH} and $os ne 'MSWin32') { # no pwd on Windows - my @candidates = split($sep, $ENV{PATH}); - while (!$found_pwd_cmd and @candidates) { - my $candidate = shift @candidates; - $found_pwd_cmd = 1 if -x "$candidate/pwd"; - } - } - - # MacOS has some special magic to make `pwd` work. - if( $os eq 'MacOS' || $found_pwd_cmd ) - { - *cwd = \&_backtick_pwd; - } - else { - *cwd = \&getcwd; - } -} - -if ($^O eq 'cygwin') { - # We need to make sure cwd() is called with no args, because it's - # got an arg-less prototype and will die if args are present. - local $^W = 0; - my $orig_cwd = \&cwd; - *cwd = sub { &$orig_cwd() } -} - - -# set a reasonable (and very safe) default for fastgetcwd, in case it -# isn't redefined later (20001212 rspier) -*fastgetcwd = \&cwd; - -# A non-XS version of getcwd() - also used to bootstrap the perl build -# process, when miniperl is running and no XS loading happens. -sub _perl_getcwd -{ - abs_path('.'); -} - -# By John Bazik -# -# Usage: $cwd = &fastcwd; -# -# This is a faster version of getcwd. It's also more dangerous because -# you might chdir out of a directory that you can't chdir back into. - -sub fastcwd_ { - my($odev, $oino, $cdev, $cino, $tdev, $tino); - my(@path, $path); - local(*DIR); - - my($orig_cdev, $orig_cino) = stat('.'); - ($cdev, $cino) = ($orig_cdev, $orig_cino); - for (;;) { - my $direntry; - ($odev, $oino) = ($cdev, $cino); - CORE::chdir('..') || return undef; - ($cdev, $cino) = stat('.'); - last if $odev == $cdev && $oino == $cino; - opendir(DIR, '.') || return undef; - for (;;) { - $direntry = readdir(DIR); - last unless defined $direntry; - next if $direntry eq '.'; - next if $direntry eq '..'; - - ($tdev, $tino) = lstat($direntry); - last unless $tdev != $odev || $tino != $oino; - } - closedir(DIR); - return undef unless defined $direntry; # should never happen - unshift(@path, $direntry); - } - $path = '/' . join('/', @path); - if ($^O eq 'apollo') { $path = "/".$path; } - # At this point $path may be tainted (if tainting) and chdir would fail. - # Untaint it then check that we landed where we started. - $path =~ /^(.*)\z/s # untaint - && CORE::chdir($1) or return undef; - ($cdev, $cino) = stat('.'); - die "Unstable directory path, current directory changed unexpectedly" - if $cdev != $orig_cdev || $cino != $orig_cino; - $path; -} -if (not defined &fastcwd) { *fastcwd = \&fastcwd_ } - - -# Keeps track of current working directory in PWD environment var -# Usage: -# use Cwd 'chdir'; -# chdir $newdir; - -my $chdir_init = 0; - -sub chdir_init { - if ($ENV{'PWD'} and $^O ne 'os2' and $^O ne 'dos' and $^O ne 'MSWin32') { - my($dd,$di) = stat('.'); - my($pd,$pi) = stat($ENV{'PWD'}); - if (!defined $dd or !defined $pd or $di != $pi or $dd != $pd) { - $ENV{'PWD'} = cwd(); - } - } - else { - my $wd = cwd(); - $wd = Win32::GetFullPathName($wd) if $^O eq 'MSWin32'; - $ENV{'PWD'} = $wd; - } - # Strip an automounter prefix (where /tmp_mnt/foo/bar == /foo/bar) - if ($^O ne 'MSWin32' and $ENV{'PWD'} =~ m|(/[^/]+(/[^/]+/[^/]+))(.*)|s) { - my($pd,$pi) = stat($2); - my($dd,$di) = stat($1); - if (defined $pd and defined $dd and $di == $pi and $dd == $pd) { - $ENV{'PWD'}="$2$3"; - } - } - $chdir_init = 1; -} - -sub chdir { - my $newdir = @_ ? shift : ''; # allow for no arg (chdir to HOME dir) - $newdir =~ s|///*|/|g unless $^O eq 'MSWin32'; - chdir_init() unless $chdir_init; - my $newpwd; - if ($^O eq 'MSWin32') { - # get the full path name *before* the chdir() - $newpwd = Win32::GetFullPathName($newdir); - } - - return 0 unless CORE::chdir $newdir; - - if ($^O eq 'VMS') { - return $ENV{'PWD'} = $ENV{'DEFAULT'} - } - elsif ($^O eq 'MacOS') { - return $ENV{'PWD'} = cwd(); - } - elsif ($^O eq 'MSWin32') { - $ENV{'PWD'} = $newpwd; - return 1; - } - - if (ref $newdir eq 'GLOB') { # in case a file/dir handle is passed in - $ENV{'PWD'} = cwd(); - } elsif ($newdir =~ m#^/#s) { - $ENV{'PWD'} = $newdir; - } else { - my @curdir = split(m#/#,$ENV{'PWD'}); - @curdir = ('') unless @curdir; - my $component; - foreach $component (split(m#/#, $newdir)) { - next if $component eq '.'; - pop(@curdir),next if $component eq '..'; - push(@curdir,$component); - } - $ENV{'PWD'} = join('/',@curdir) || '/'; - } - 1; -} - - -sub _perl_abs_path -{ - my $start = @_ ? shift : '.'; - my($dotdots, $cwd, @pst, @cst, $dir, @tst); - - unless (@cst = stat( $start )) - { - _carp("stat($start): $!"); - return ''; - } - - unless (-d _) { - # Make sure we can be invoked on plain files, not just directories. - # NOTE that this routine assumes that '/' is the only directory separator. - - my ($dir, $file) = $start =~ m{^(.*)/(.+)$} - or return cwd() . '/' . $start; - - # Can't use "-l _" here, because the previous stat was a stat(), not an lstat(). - if (-l $start) { - my $link_target = readlink($start); - die "Can't resolve link $start: $!" unless defined $link_target; - - require File::Spec; - $link_target = $dir . '/' . $link_target - unless File::Spec->file_name_is_absolute($link_target); - - return abs_path($link_target); - } - - return $dir ? abs_path($dir) . "/$file" : "/$file"; - } - - $cwd = ''; - $dotdots = $start; - do - { - $dotdots .= '/..'; - @pst = @cst; - local *PARENT; - unless (opendir(PARENT, $dotdots)) - { - # probably a permissions issue. Try the native command. - require File::Spec; - return File::Spec->rel2abs( $start, _backtick_pwd() ); - } - unless (@cst = stat($dotdots)) - { - _carp("stat($dotdots): $!"); - closedir(PARENT); - return ''; - } - if ($pst[0] == $cst[0] && $pst[1] == $cst[1]) - { - $dir = undef; - } - else - { - do - { - unless (defined ($dir = readdir(PARENT))) - { - _carp("readdir($dotdots): $!"); - closedir(PARENT); - return ''; - } - $tst[0] = $pst[0]+1 unless (@tst = lstat("$dotdots/$dir")) - } - while ($dir eq '.' || $dir eq '..' || $tst[0] != $pst[0] || - $tst[1] != $pst[1]); - } - $cwd = (defined $dir ? "$dir" : "" ) . "/$cwd" ; - closedir(PARENT); - } while (defined $dir); - chop($cwd) unless $cwd eq '/'; # drop the trailing / - $cwd; -} - - -my $Curdir; -sub fast_abs_path { - local $ENV{PWD} = $ENV{PWD} || ''; # Guard against clobberage - my $cwd = getcwd(); - require File::Spec; - my $path = @_ ? shift : ($Curdir ||= File::Spec->curdir); - - # Detaint else we'll explode in taint mode. This is safe because - # we're not doing anything dangerous with it. - ($path) = $path =~ /(.*)/s; - ($cwd) = $cwd =~ /(.*)/s; - - unless (-e $path) { - _croak("$path: No such file or directory"); - } - - unless (-d _) { - # Make sure we can be invoked on plain files, not just directories. - - my ($vol, $dir, $file) = File::Spec->splitpath($path); - return File::Spec->catfile($cwd, $path) unless length $dir; - - if (-l $path) { - my $link_target = readlink($path); - die "Can't resolve link $path: $!" unless defined $link_target; - - $link_target = File::Spec->catpath($vol, $dir, $link_target) - unless File::Spec->file_name_is_absolute($link_target); - - return fast_abs_path($link_target); - } - - return $dir eq File::Spec->rootdir - ? File::Spec->catpath($vol, $dir, $file) - : fast_abs_path(File::Spec->catpath($vol, $dir, '')) . '/' . $file; - } - - if (!CORE::chdir($path)) { - _croak("Cannot chdir to $path: $!"); - } - my $realpath = getcwd(); - if (! ((-d $cwd) && (CORE::chdir($cwd)))) { - _croak("Cannot chdir back to $cwd: $!"); - } - $realpath; -} - -# added function alias to follow principle of least surprise -# based on previous aliasing. --tchrist 27-Jan-00 -*fast_realpath = \&fast_abs_path; - - -# --- PORTING SECTION --- - -# VMS: $ENV{'DEFAULT'} points to default directory at all times -# 06-Mar-1996 Charles Bailey bailey@newman.upenn.edu -# Note: Use of Cwd::chdir() causes the logical name PWD to be defined -# in the process logical name table as the default device and directory -# seen by Perl. This may not be the same as the default device -# and directory seen by DCL after Perl exits, since the effects -# the CRTL chdir() function persist only until Perl exits. - -sub _vms_cwd { - return $ENV{'DEFAULT'}; -} - -sub _vms_abs_path { - return $ENV{'DEFAULT'} unless @_; - my $path = shift; - - my $efs = _vms_efs; - my $unix_rpt = _vms_unix_rpt; - - if (defined &VMS::Filespec::vmsrealpath) { - my $path_unix = 0; - my $path_vms = 0; - - $path_unix = 1 if ($path =~ m#(?<=\^)/#); - $path_unix = 1 if ($path =~ /^\.\.?$/); - $path_vms = 1 if ($path =~ m#[\[<\]]#); - $path_vms = 1 if ($path =~ /^--?$/); - - my $unix_mode = $path_unix; - if ($efs) { - # In case of a tie, the Unix report mode decides. - if ($path_vms == $path_unix) { - $unix_mode = $unix_rpt; - } else { - $unix_mode = 0 if $path_vms; - } - } - - if ($unix_mode) { - # Unix format - return VMS::Filespec::unixrealpath($path); - } - - # VMS format - - my $new_path = VMS::Filespec::vmsrealpath($path); - - # Perl expects directories to be in directory format - $new_path = VMS::Filespec::pathify($new_path) if -d $path; - return $new_path; - } - - # Fallback to older algorithm if correct ones are not - # available. - - if (-l $path) { - my $link_target = readlink($path); - die "Can't resolve link $path: $!" unless defined $link_target; - - return _vms_abs_path($link_target); - } - - # may need to turn foo.dir into [.foo] - my $pathified = VMS::Filespec::pathify($path); - $path = $pathified if defined $pathified; - - return VMS::Filespec::rmsexpand($path); -} - -sub _os2_cwd { - $ENV{'PWD'} = `cmd /c cd`; - chomp $ENV{'PWD'}; - $ENV{'PWD'} =~ s:\\:/:g ; - return $ENV{'PWD'}; -} - -sub _win32_cwd_simple { - $ENV{'PWD'} = `cd`; - chomp $ENV{'PWD'}; - $ENV{'PWD'} =~ s:\\:/:g ; - return $ENV{'PWD'}; -} - -sub _win32_cwd { - # Need to avoid taking any sort of reference to the typeglob or the code in - # the optree, so that this tests the runtime state of things, as the - # ExtUtils::MakeMaker tests for "miniperl" need to be able to fake things at - # runtime by deleting the subroutine. *foo{THING} syntax on a symbol table - # lookup avoids needing a string eval, which has been reported to cause - # problems (for reasons that we haven't been able to get to the bottom of - - # rt.cpan.org #56225) - if (*{$DynaLoader::{boot_DynaLoader}}{CODE}) { - $ENV{'PWD'} = Win32::GetCwd(); - } - else { # miniperl - chomp($ENV{'PWD'} = `cd`); - } - $ENV{'PWD'} =~ s:\\:/:g ; - return $ENV{'PWD'}; -} - -*_NT_cwd = defined &Win32::GetCwd ? \&_win32_cwd : \&_win32_cwd_simple; - -sub _dos_cwd { - if (!defined &Dos::GetCwd) { - $ENV{'PWD'} = `command /c cd`; - chomp $ENV{'PWD'}; - $ENV{'PWD'} =~ s:\\:/:g ; - } else { - $ENV{'PWD'} = Dos::GetCwd(); - } - return $ENV{'PWD'}; -} - -sub _qnx_cwd { - local $ENV{PATH} = ''; - local $ENV{CDPATH} = ''; - local $ENV{ENV} = ''; - $ENV{'PWD'} = `/usr/bin/fullpath -t`; - chomp $ENV{'PWD'}; - return $ENV{'PWD'}; -} - -sub _qnx_abs_path { - local $ENV{PATH} = ''; - local $ENV{CDPATH} = ''; - local $ENV{ENV} = ''; - my $path = @_ ? shift : '.'; - local *REALPATH; - - defined( open(REALPATH, '-|') || exec '/usr/bin/fullpath', '-t', $path ) or - die "Can't open /usr/bin/fullpath: $!"; - my $realpath = ; - close REALPATH; - chomp $realpath; - return $realpath; -} - -sub _epoc_cwd { - $ENV{'PWD'} = EPOC::getcwd(); - return $ENV{'PWD'}; -} - - -# Now that all the base-level functions are set up, alias the -# user-level functions to the right places - -if (exists $METHOD_MAP{$^O}) { - my $map = $METHOD_MAP{$^O}; - foreach my $name (keys %$map) { - local $^W = 0; # assignments trigger 'subroutine redefined' warning - no strict 'refs'; - *{$name} = \&{$map->{$name}}; - } -} - -# In case the XS version doesn't load. -*abs_path = \&_perl_abs_path unless defined &abs_path; -*getcwd = \&_perl_getcwd unless defined &getcwd; - -# added function alias for those of us more -# used to the libc function. --tchrist 27-Jan-00 -*realpath = \&abs_path; - -1; diff --git a/Parallel/ForkManager.pm b/Parallel/ForkManager.pm deleted file mode 100644 index 0de0d20..0000000 --- a/Parallel/ForkManager.pm +++ /dev/null @@ -1,416 +0,0 @@ -=head1 NAME - -Parallel::ForkManager - A simple parallel processing fork manager - -=head1 SYNOPSIS - - use Parallel::ForkManager; - - $pm = new Parallel::ForkManager($MAX_PROCESSES); - - foreach $data (@all_data) { - # Forks and returns the pid for the child: - my $pid = $pm->start and next; - - ... do some work with $data in the child process ... - - $pm->finish; # Terminates the child process - } - -=head1 DESCRIPTION - -This module is intended for use in operations that can be done in parallel -where the number of processes to be forked off should be limited. Typical -use is a downloader which will be retrieving hundreds/thousands of files. - -The code for a downloader would look something like this: - - use LWP::Simple; - use Parallel::ForkManager; - - ... - - @links=( - ["http://www.foo.bar/rulez.data","rulez_data.txt"], - ["http://new.host/more_data.doc","more_data.doc"], - ... - ); - - ... - - # Max 30 processes for parallel download - my $pm = new Parallel::ForkManager(30); - - foreach my $linkarray (@links) { - $pm->start and next; # do the fork - - my ($link,$fn) = @$linkarray; - warn "Cannot get $fn from $link" - if getstore($link,$fn) != RC_OK; - - $pm->finish; # do the exit in the child process - } - $pm->wait_all_children; - -First you need to instantiate the ForkManager with the "new" constructor. -You must specify the maximum number of processes to be created. If you -specify 0, then NO fork will be done; this is good for debugging purposes. - -Next, use $pm->start to do the fork. $pm returns 0 for the child process, -and child pid for the parent process (see also L). -The "and next" skips the internal loop in the parent process. NOTE: -$pm->start dies if the fork fails. - -$pm->finish terminates the child process (assuming a fork was done in the -"start"). - -NOTE: You cannot use $pm->start if you are already in the child process. -If you want to manage another set of subprocesses in the child process, -you must instantiate another Parallel::ForkManager object! - -=head1 METHODS - -=over 5 - -=item new $processes - -Instantiate a new Parallel::ForkManager object. You must specify the maximum -number of children to fork off. If you specify 0 (zero), then no children -will be forked. This is intended for debugging purposes. - -=item start [ $process_identifier ] - -This method does the fork. It returns the pid of the child process for -the parent, and 0 for the child process. If the $processes parameter -for the constructor is 0 then, assuming you're in the child process, -$pm->start simply returns 0. - -An optional $process_identifier can be provided to this method... It is used by -the "run_on_finish" callback (see CALLBACKS) for identifying the finished -process. - -=item finish [ $exit_code ] - -Closes the child process by exiting and accepts an optional exit code -(default exit code is 0) which can be retrieved in the parent via callback. -If you use the program in debug mode ($processes == 0), this method doesn't -do anything. - -=item set_max_procs $processes - -Allows you to set a new maximum number of children to maintain. Returns -the previous setting. - -=item wait_all_children - -You can call this method to wait for all the processes which have been -forked. This is a blocking wait. - -=back - -=head1 CALLBACKS - -You can define callbacks in the code, which are called on events like starting -a process or upon finish. - -The callbacks can be defined with the following methods: - -=over 4 - -=item run_on_finish $code [, $pid ] - -You can define a subroutine which is called when a child is terminated. It is -called in the parent process. - -The paremeters of the $code are the following: - - - pid of the process, which is terminated - - exit code of the program - - identification of the process (if provided in the "start" method) - - exit signal (0-127: signal name) - - core dump (1 if there was core dump at exit) - -=item run_on_start $code - -You can define a subroutine which is called when a child is started. It called -after the successful startup of a child in the parent process. - -The parameters of the $code are the following: - - - pid of the process which has been started - - identification of the process (if provided in the "start" method) - -=item run_on_wait $code, [$period] - -You can define a subroutine which is called when the child process needs to wait -for the startup. If $period is not defined, then one call is done per -child. If $period is defined, then $code is called periodically and the -module waits for $period seconds betwen the two calls. Note, $period can be -fractional number also. The exact "$period seconds" is not guarranteed, -signals can shorten and the process scheduler can make it longer (on busy -systems). - -The $code called in the "start" and the "wait_all_children" method also. - -No parameters are passed to the $code on the call. - -=back - -=head1 EXAMPLE - -=head2 Parallel get - -This small example can be used to get URLs in parallel. - - use Parallel::ForkManager; - use LWP::Simple; - my $pm=new Parallel::ForkManager(10); - for my $link (@ARGV) { - $pm->start and next; - my ($fn)= $link =~ /^.*\/(.*?)$/; - if (!$fn) { - warn "Cannot determine filename from $fn\n"; - } else { - $0.=" ".$fn; - print "Getting $fn from $link\n"; - my $rc=getstore($link,$fn); - print "$link downloaded. response code: $rc\n"; - }; - $pm->finish; - }; - -=head2 Callbacks - -Example of a program using callbacks to get child exit codes: - - use strict; - use Parallel::ForkManager; - - my $max_procs = 5; - my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara ); - # hash to resolve PID's back to child specific information - - my $pm = new Parallel::ForkManager($max_procs); - - # Setup a callback for when a child finishes up so we can - # get it's exit code - $pm->run_on_finish( - sub { my ($pid, $exit_code, $ident) = @_; - print "** $ident just got out of the pool ". - "with PID $pid and exit code: $exit_code\n"; - } - ); - - $pm->run_on_start( - sub { my ($pid,$ident)=@_; - print "** $ident started, pid: $pid\n"; - } - ); - - $pm->run_on_wait( - sub { - print "** Have to wait for one children ...\n" - }, - 0.5 - ); - - foreach my $child ( 0 .. $#names ) { - my $pid = $pm->start($names[$child]) and next; - - # This code is the child process - print "This is $names[$child], Child number $child\n"; - sleep ( 2 * $child ); - print "$names[$child], Child $child is about to get out...\n"; - sleep 1; - $pm->finish($child); # pass an exit code to finish - } - - print "Waiting for Children...\n"; - $pm->wait_all_children; - print "Everybody is out of the pool!\n"; - -=head1 BUGS AND LIMITATIONS - -Do not use Parallel::ForkManager in an environment, where other child -processes can affect the run of the main program, so using this module -is not recommended in an environment where fork() / wait() is already used. - -If you want to use more than one copies of the Parallel::ForkManager, then -you have to make sure that all children processes are terminated, before you -use the second object in the main program. - -You are free to use a new copy of Parallel::ForkManager in the child -processes, although I don't think it makes sense. - -=head1 COPYRIGHT - -Copyright (c) 2000 Szabó, Balázs (dLux) - -All right reserved. This program is free software; you can redistribute it -and/or modify it under the same terms as Perl itself. - -=head1 AUTHOR - - dLux (Szabó, Balázs) - -=head1 CREDITS - - Noah Robin (documentation tweaks) - Chuck Hirstius (callback exit status, example) - Grant Hopwood (win32 port) - Mark Southern (bugfix) - -=cut - -package Parallel::ForkManager; -use POSIX ":sys_wait_h"; -use strict; -use vars qw($VERSION); -$VERSION='0.7.5'; - -sub new { my ($c,$processes)=@_; - my $h={ - max_proc => $processes, - processes => {}, - in_child => 0, - }; - return bless($h,ref($c)||$c); -}; - -sub start { my ($s,$identification)=@_; - die "Cannot start another process while you are in the child process" - if $s->{in_child}; - while ($s->{max_proc} && ( keys %{ $s->{processes} } ) >= $s->{max_proc}) { - $s->on_wait; - $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); - }; - $s->wait_children; - if ($s->{max_proc}) { - my $pid=fork(); - die "Cannot fork: $!" if !defined $pid; - if ($pid) { - $s->{processes}->{$pid}=$identification; - $s->on_start($pid,$identification); - } else { - $s->{in_child}=1 if !$pid; - } - return $pid; - } else { - $s->{processes}->{$$}=$identification; - $s->on_start($$,$identification); - return 0; # Simulating the child which returns 0 - } -} - -sub finish { my ($s, $x)=@_; - if ( $s->{in_child} ) { - exit ($x || 0); - } - if ($s->{max_proc} == 0) { # max_proc == 0 - $s->on_finish($$, $x ,$s->{processes}->{$$}, 0, 0); - delete $s->{processes}->{$$}; - } - return 0; -} - -sub wait_children { my ($s)=@_; - return if !keys %{$s->{processes}}; - my $kid; - do { - $kid = $s->wait_one_child(&WNOHANG); - } while $kid > 0 || $kid < -1; # AS 5.6/Win32 returns negative PIDs -}; - -*wait_childs=*wait_children; # compatibility - -sub wait_one_child { my ($s,$par)=@_; - my $kid; - while (1) { - $kid = $s->_waitpid(-1,$par||=0); - last if $kid == 0 || $kid == -1; # AS 5.6/Win32 returns negative PIDs - redo if !exists $s->{processes}->{$kid}; - my $id = delete $s->{processes}->{$kid}; - $s->on_finish( $kid, $? >> 8 , $id, $? & 0x7f, $? & 0x80 ? 1 : 0); - last; - } - $kid; -}; - -sub wait_all_children { my ($s)=@_; - while (keys %{ $s->{processes} }) { - $s->on_wait; - $s->wait_one_child(defined $s->{on_wait_period} ? &WNOHANG : undef); - }; -} - -*wait_all_childs=*wait_all_children; # compatibility; - -sub run_on_finish { my ($s,$code,$pid)=@_; - $s->{on_finish}->{$pid || 0}=$code; -} - -sub on_finish { my ($s,$pid,@par)=@_; - my $code=$s->{on_finish}->{$pid} || $s->{on_finish}->{0} or return 0; - $code->($pid,@par); -}; - -sub run_on_wait { my ($s,$code, $period)=@_; - $s->{on_wait}=$code; - $s->{on_wait_period} = $period; -} - -sub on_wait { my ($s)=@_; - if(ref($s->{on_wait}) eq 'CODE') { - $s->{on_wait}->(); - if (defined $s->{on_wait_period}) { - local $SIG{CHLD} = sub { } if ! defined $SIG{CHLD}; - select undef, undef, undef, $s->{on_wait_period} - }; - }; -}; - -sub run_on_start { my ($s,$code)=@_; - $s->{on_start}=$code; -} - -sub on_start { my ($s,@par)=@_; - $s->{on_start}->(@par) if ref($s->{on_start}) eq 'CODE'; -}; - -sub set_max_procs { my ($s, $mp)=@_; - $s->{max_proc} = $mp; -} - -# OS dependant code follows... - -sub _waitpid { # Call waitpid() in the standard Unix fashion. - return waitpid($_[1],$_[2]); -} - -# On ActiveState Perl 5.6/Win32 build 625, waitpid(-1, &WNOHANG) always -# blocks unless an actual PID other than -1 is given. -sub _NT_waitpid { my ($s, $pid, $par) = @_; - if ($par == &WNOHANG) { # Need to nonblock on each of our PIDs in the pool. - my @pids = keys %{ $s->{processes} }; - # Simulate -1 (no processes awaiting cleanup.) - return -1 unless scalar(@pids); - # Check each PID in the pool. - my $kid; - foreach $pid (@pids) { - $kid = waitpid($pid, $par); - return $kid if $kid != 0; # AS 5.6/Win32 returns negative PIDs. - } - return $kid; - } else { # Normal waitpid() call. - return waitpid($pid, $par); - } -} - -{ - local $^W = 0; - if ($^O eq 'NT' or $^O eq 'MSWin32') { - *_waitpid = \&_NT_waitpid; - } -} - -1; diff --git a/Parallel/ForkManager/callback.pl b/Parallel/ForkManager/callback.pl deleted file mode 100755 index 56a52e2..0000000 --- a/Parallel/ForkManager/callback.pl +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -w -use lib '.'; -use strict; -use Parallel::ForkManager; - -my $max_procs = 5; -my @names = qw( Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara ); -# hash to resolve PID's back to child specific information - -my $pm = new Parallel::ForkManager($max_procs); - -# Setup a callback for when a child finishes up so we can -# get it's exit code -$pm->run_on_finish( - sub { my ($pid, $exit_code, $ident) = @_; - print "** $ident just got out of the pool ". - "with PID $pid and exit code: $exit_code\n"; - } -); - -$pm->run_on_start( - sub { my ($pid,$ident)=@_; - print "** $ident started, pid: $pid\n"; - } -); - -$pm->run_on_wait( - sub { - print "** Have to wait for one children ...\n" - }, - 0.5, -); - -foreach my $child ( 0 .. $#names ) { - my $pid = $pm->start($names[$child]) and next; - - # This code is the child process - print "This is $names[$child], Child number $child\n"; - sleep ( 2 * $child ); - print "$names[$child], Child $child is about to get out...\n"; - sleep 1; - $pm->finish($child); # pass an exit code to finish -} - -print "Waiting for Children...\n"; -$pm->wait_all_children; -print "Everybody is out of the pool!\n"; - diff --git a/Parallel/ForkManager/parallel_get.pl b/Parallel/ForkManager/parallel_get.pl deleted file mode 100755 index 5c6f3cd..0000000 --- a/Parallel/ForkManager/parallel_get.pl +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/perl -w -use Parallel::ForkManager; -use LWP::Simple; -my $pm=new Parallel::ForkManager(10); -for my $link (@ARGV) { - $pm->start and next; - my ($fn)= $link =~ /^.*\/(.*?)$/; - if (!$fn) { - warn "Cannot determine filename from $fn\n"; - } else { - $0.=" ".$fn; - print "Getting $fn from $link\n"; - my $rc=getstore($link,$fn); - print "$link downloaded. response code: $rc\n"; - }; - $pm->finish; -};