Matrix Science header

repeat_search.pl

Repeating a search from a Mascot results file.

#!/usr/local/bin/perl
##############################################################################
# file: repeat_search.pl                                                     #
# 'msparser' toolkit                                                         #
# Test harness / example code                                                #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2010 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: parser/examples/test_perl/repeat_search.pl $ #
#     $Author: villek@matrixscience.com $ #
#       $Date: 2018-07-30 16:23:53 +0100 $ #
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ #
# $NoKeywords::                                                            $ #
##############################################################################
use strict;
##############################################################################

# This script must be run in the Mascot Server cgi directory.

use lib '../bin';
use msparser;

if (!defined($ARGV[0])) { 
    usage();
    exit(1);
}
# If this script is being run as a cgi script, then a
# boundary string may be set in the environment.
# This will be different from the simple boundary string defined
# below, so nph-mascot.exe will fail. The following two
# lines clear the relevant environment variables.
delete $ENV{'CONTENT_TYPE'};
delete $ENV{'CONTENT_LENGTH'};

repeatSearch($ARGV[0]);


sub repeatSearch {
    my ($filename) = @_;

    my $resfile = new msparser::ms_mascotresfile($filename);

    if (!$resfile->isValid) {
        print "Cannot open results file ", $filename, ": ";
        print $resfile->getLastErrorString(), "\n";
        return;
    }

    my @s = ();

    push @s, "----12345";
    push @s, 'Content-Disposition: form-data; name="QUE"';
    push @s, '';

    # Parameters section
    my $count = 1;
    my $key = $resfile->enumerateSectionKeys(
        $msparser::ms_mascotresfile::SEC_PARAMETERS, $count
    );

    while ($key) {
        my $val = $resfile->getSectionValueStr(
            $msparser::ms_mascotresfile::SEC_PARAMETERS, $key
        );

        # To search against a different database, add 'and key ne "DB"'
        if ($val 
                and ($key ne "INTERMEDIATE") 
                and ($key ne "RULES") 
                and ($key ne "INTERNALS") 
                and ($key ne "SEARCH")) {
            push @s, $key . "=" . $val;
        }

        ++$count;
        $key = $resfile->enumerateSectionKeys(
            $msparser::ms_mascotresfile::SEC_PARAMETERS, $count
        );
    }

    # To search against a different DB add e.g.
    # push @s, "DB=MY_DB";

    # Most flexible to repeat each search as a 'sequence' search.
    push @s, "SEARCH=SQ";

    # For ms-ms data, tell nph-mascot where to find the ions data
    push @s, "INTERMEDIATE=" . $filename;

    # Now the repeat search data
    for my $q (1 .. $resfile->getNumQueries) {
        push @s, $resfile->getRepeatSearchString($q);
    }

    # Terminating line for MIME format file
    push @s, "----12345--"; 

    # Start nph-mascot.exe, and redirect the output to tmp.txt
    # (Could use open2 here, but this can be unreliable with nph-mascot.exe)
    # For Unix systems, change nph-mascot.exe to ./nph-mascot.exe
    open (my $sock, "| nph-mascot.exe 4 -commandline > tmp.txt");
    print $sock $_, "\n" for @s;
    close $sock;

    open(my $fh, '<', 'tmp.txt');

    while (<$fh>) {
        if ( /SUCCESS/ ) {
            # Next line contains the results file name
            chomp(my $buffer = <$fh>);

            compareResults($resfile, $buffer);
            next;
        }

        if ( /ERROR/ ) {
            print "Search failed: ", $_;
            # Print details of error messages
            print while <$fh>;
            next;
        }
    }
}

sub compareResults {
    my ($originalSearch, $repeatedSearchFileName) = @_;

    my $repeatedSearch = new msparser::ms_mascotresfile($repeatedSearchFileName);
    my $anyReport = 0;

    if (!$repeatedSearch->isValid()) {
        print "Invalid repeat search: ",$repeatedSearch->getLastErrorString(), "\n";
        return;
    }

    if ($originalSearch->anyPMF()) {
        # Use protein summary
        my $originalResults = new msparser::ms_proteinsummary($originalSearch);
        my $repeatedResults = new msparser::ms_proteinsummary($repeatedSearch);

        my $originalProt = $originalResults->getHit(1);
        my $repeatedProt = $repeatedResults->getHit(1);

        if ($originalProt and $repeatedProt) {
            my $diff = $repeatedProt->getScore() - $originalProt->getScore();

            if ($diff > 10.0) {
                print "Protein score is ", $diff, " higher for search ";
                print $originalSearch->getFileName();
                print " than ";
                print $repeatedSearchFileName;
                print "\n";

                $anyReport = 1;
            }
        }
    } else {
        #  Use peptide summary
        my $originalResults = new msparser::ms_peptidesummary($originalSearch);
        my $repeatedResults = new msparser::ms_peptidesummary($repeatedSearch);

        # Compare peptide scores
        for my $q (1 .. $originalSearch->getNumQueries) {
            my $pepOriginal = $originalResults->getPeptide($q, 1);
            my $pepRepeated = $repeatedResults->getPeptide($q, 1);
            my $diff = $pepRepeated->getIonsScore() - $pepOriginal->getIonsScore();

            if ($diff > 10.0) {
                print "Query ", $q, " has score ", $diff;
                print " higher for search ", $originalSearch->getFileName();
                print " than ", $repeatedSearchFileName, "\n";

                $anyReport = 1;
            }
        }
    }

    if (!$anyReport) {
        print "Similar results for ", $originalSearch->getFileName();
        print " and ", $repeatedSearchFileName, "\n";
    }
}


sub usage {
    print <<EOF;
Usage: repeat_search.pl <results file>

Given an mascot results file name, repeat the search against the same data.

The program must be run from the mascot cgi directory.
EOF
}

Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29