Matrix Science header

repeat_search.cpp

Repeating a search from a Mascot results file.

/*
##############################################################################
# file: repeat_search.cpp                                                    #
# 'msparser' toolkit                                                         #
# Test harness / example code                                                #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2005 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#    $Source: parser/examples/test_cxx/repeat_search.cpp $ #
#     $Author: villek@matrixscience.com $ #
#       $Date: 2018-07-30 16:23:53 +0100 $ #
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ #
# $NoKeywords::                                                            $ #
##############################################################################
*/

#include "msparser.hpp"
#include <stdio.h>  // For 'popen'
#include <iostream>
#include <sstream>
#include <string>

#ifdef _WIN32
#define popen  _popen
#define pclose _pclose
#endif

using namespace matrix_science;

// forward declarations
static bool repeatSearch(const char * filename);
static int  usage(const char * progName);
static void compareResults(ms_mascotresfile & originalSearch, 
                           const char * repeatedSearchFileName);


int main(int argc, char * argv[])
{
    if (argc == 2)
        return repeatSearch(argv[1]);
    else
        return usage(argv[0]);
}


static bool repeatSearch(const char * filename)
{
    bool success = false;

    ms_mascotresfile file(filename);
    if (file.isValid())
    {
        std::ostringstream s;     // Build up a MIME format string with all parameters
        s << "----12345\n"
          << "Content-Disposition: form-data; name=\"QUE\""
          << std::endl << std::endl;

        // Parameters section
        int count = 1;
        std::string key = file.enumerateSectionKeys(ms_mascotresfile::SEC_PARAMETERS,
                                                    count);
        while (!key.empty())
        {
            std::string val = file.getSectionValueStr(ms_mascotresfile::SEC_PARAMETERS, 
                                         key.c_str());
            // To search against a different database, add && key != "DB"
            if (!val.empty() && key != "INTERMEDIATE" && key != "RULES" && key != "SEARCH")
                s << key << "=" << val << std::endl;
            key = file.enumerateSectionKeys(ms_mascotresfile::SEC_PARAMETERS, ++count);
        }
        // To search against a different DB add: s << "DB=MY_DB" << std::endl;

        // Most flexible to repeat each search as a 'sequence' search.
        s << "SEARCH=SQ" << std::endl;

        // For ms-ms data, tell nph-mascot where to find the ions data
        s << "INTERMEDIATE=" << filename << std::endl;

        // Now the repeat search data
        for (int q=1; q <= file.getNumQueries(); q++)
            s << file.getRepeatSearchString(q) << std::endl;

        s << "----12345--\n"; // terminating line for MIME format file

        // Start nph-mascot.exe, and redirect the output to tmp.txt
        // Note that for Unix, you may need to use ./nph-mascot.exe
        FILE * f = popen("./nph-mascot.exe 4 -commandline > tmp.txt", "w"); // 4 = repeat search
        if (f)
        {
            fwrite(s.str().c_str(), s.str().length(), 1, f);
            pclose(f);

            if ((f = fopen("tmp.txt", "r")) != 0)
            {
                char buf[1000];
                while (fgets(buf, sizeof(buf), f))
                {
                    if (strstr(buf, "SUCCESS") && fgets(buf, sizeof(buf), f))
                    {
                        if (buf[strlen(buf)-1] == '\n')
                            buf[strlen(buf)-1] = '\0';
                        compareResults(file, buf);
                        success = true;
                    }
                    else if (strstr(buf, "ERROR"))
                    {
                        std::cout << "Search failed: " << buf;
                        while (fgets(buf, sizeof(buf), f))
                        {
                            std::cout << buf;
                        }
                    }
                }
                fclose(f);
            }
            else
            {
                std::cout << "Can't open tmp file. \n"; 
            }
        }
        else
        {
            std::cout << "Cannot open pipe to nph-mascot.exe to repeat search for file " 
                      << filename << std::endl;
        }
    }
    else
    {
        std::cout << "Cannot open results file " << filename 
                  << " " << file.getLastErrorString() << std::endl;
    }
    return success;
}


static void compareResults(ms_mascotresfile & originalSearch, 
                           const char * repeatedSearchFileName)
{
    ms_mascotresfile repeatedSearch(repeatedSearchFileName);
    bool anyReport = false;

    if (repeatedSearch.isValid())
    {
        if (originalSearch.anyPMF())
        {
            // Use protein summary
            ms_proteinsummary originalResults(originalSearch);
            ms_proteinsummary repeatedResults(repeatedSearch);

            ms_protein * originalProt = originalResults.getHit(1);
            ms_protein * repeatedProt = repeatedResults.getHit(1);
            if (originalProt && repeatedProt)
            {
                double diff = repeatedProt->getScore()-originalProt->getScore();
                if (diff > 10.0)
                {
                    std::cout << "Protein score is "
                              << diff
                              << " higher for search "
                              << originalSearch.getFileName()
                              << " than "
                              << repeatedSearchFileName
                              << std::endl;
                    anyReport = true;
                }
            }
        }
        else
        {
            // Use peptide summary
            ms_peptidesummary originalResults(originalSearch);
            ms_peptidesummary repeatedResults(repeatedSearch);

            // Compare peptide scores
            for (int q=1; q <= originalSearch.getNumQueries(); q++)
            {
                ms_peptide * pepOriginal, *pepRepeated;
                if (originalResults.getPeptide(q, 1, pepOriginal)
                &&  repeatedResults.getPeptide(q, 1, pepRepeated))
                {
                    double diff = pepRepeated->getIonsScore() 
                                - pepOriginal->getIonsScore();
                    if (diff > 10.0)
                    {
                        std::cout << "Query " << q 
                                  << "has score " << diff
                                  << " higher for search "
                                  << originalSearch.getFileName()
                                  << " than "
                                  << repeatedSearchFileName
                                  << std::endl;

                        anyReport = true;
                    }
                }
            }
        }
        if (!anyReport)
            std::cout << "Similar results for "
                      << originalSearch.getFileName()
                      << " and "
                      << repeatedSearchFileName
                      << std::endl;
    }
    else
        std::cout << "Invalid repeat search " 
                  << repeatedSearch.getLastErrorString();
}


static int usage(const char * progName)
{
    std::cout << progName << " results_file " << std::endl
              << "Given an mascot results file name, repeat the search "
              << "against the same data" << std::endl
              << "   results_file is a full path to a results file" << std::endl
              << "The program must be run from the mascot cgi directory"
              << std::endl;
    return 1;
}



Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29