Matrix Science header

resfile_ms2quant_to_custom.pl

Example program converting ms_ms2quantitation to ms_customquantitation.

#!/usr/local/bin/perl
##############################################################################
# file: resfile_ms2quant_to_custom.pl                                        #
# Mascot Parser toolkit example code                                         #
##############################################################################
# COPYRIGHT NOTICE                                                           #
# Copyright 1998-2013 Matrix Science Limited  All Rights Reserved.           #
#                                                                            #
##############################################################################
#     $Source: parser/examples/test_perl/resfile_ms2quant_to_custom.pl $
#     $Author: villek@matrixscience.com $
#       $Date: 2018-07-30 16:23:53 +0100 $
#   $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_2_8_1-0-gea32989045 $ 
##############################################################################
use strict;
use warnings;
##############################################################################

use msparser;

if (@ARGV != 4) {
    print STDERR "Usage: $0 <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>\n";
    print STDERR "The location of schema files should be e.g.\n";
    print STDERR "  ../html/xmlns/schema/quantitation_1/quantitation_1.xsd\n";
    print STDERR "  ../html/xmlns/schema/quantitation_2/quantitation_2.xsd\n";
    print STDERR "  ../html/xmlns/schema/unimod_2/unimod_2.xsd\n";
    print STDERR "if running from the Mascot cgi directory.\n";
    exit 1;
}

my $quant_schema1_filepath = $ARGV[0];
my $quant_schema2_filepath = $ARGV[1];
my $unimod_schema_filepath = $ARGV[2];

my $resfile = msparser::ms_mascotresfile->new($ARGV[3], 1);

if (not $resfile->isValid) {
    print STDERR $resfile->getLastErrorString(), "\n";
    exit 1;
}

# And print out related warnings, if any (we know these are only warnings,
# because isValid() returned true earlier):
dump_warnings($resfile->getErrorHandler());
$resfile->clearAllErrors();

my $qmethod = msparser::ms_quant_method->new();

$resfile->setXMLschemaFilePath(
    $msparser::ms_mascotresfile::XML_SCHEMA_QUANTITATION, 
    "http://www.matrixscience.com/xmlns/schema/quantitation_2 $quant_schema2_filepath http://www.matrixscience.com/xmlns/schema/quantitation_1 $quant_schema1_filepath"
);

$resfile->setXMLschemaFilePath(
    $msparser::ms_mascotresfile::XML_SCHEMA_UNIMOD, 
    "http://www.unimod.org/xmlns/schema/unimod_2 $unimod_schema_filepath",
);

my $qmethod = msparser::ms_quant_method->new();

if (not $resfile->getQuantitationMethod($qmethod)) {
    print STDERR $resfile->getLastErrorString(), "\n";
    exit 1;
}

my $pepsum;

do {
    my $datfile = msparser::ms_datfile->new();

    my (undef, $flags, $minprob, $maxhits, $iisb, $minpeplen, $use_pepsum, $flags2)
        = $resfile->get_ms_mascotresults_params($datfile->getMascotOptions());

    if (not $use_pepsum) {
        print STDERR "Results file cannot be opened as a peptide summary\n";
        exit 1;
    }

    $pepsum = msparser::ms_peptidesummary->new(
        $resfile, $flags, $minprob, $maxhits, '', $iisb, $minpeplen, '', $flags2
    );

    if (not $resfile->isValid) {
        print STDERR $resfile->getLastErrorString, "\n";
        exit 1;
    }

    dump_warnings($resfile->getErrorHandler());
    $resfile->clearAllErrors();
};

# First quantitate the file using ms_ms2quantitation:

my $ms2quant = msparser::ms_ms2quantitation->new($pepsum, $qmethod);

if (not $ms2quant->isValid) {
    print STDERR $ms2quant->getLastErrorString(), "\n";
    exit 1;
}

if ($ms2quant->hasIntensityNormalisation()) {
    $ms2quant->normaliseIntensities();
} elsif ($ms2quant->hasPeptideRatioNormalisation()) {
    $ms2quant->normalisePeptideRatios();
}

dump_warnings($ms2quant->getErrorHandler());
$ms2quant->clearAllErrors();

# Then import the data into ms_customquantitation:
    
my $customquant = msparser::ms_customquantitation->new($ms2quant);

if (not $customquant->isValid) {
    print STDERR $customquant->getLastErrorString(), "\n";
    exit 1;
}

do {
    my $err = $customquant->getErrorHandler();

    # And print out related warnings, if any (we know these are only warnings,
    # because isValid() returned true earlier):
    for my $i (1 .. $err->getNumberOfErrors()) {
        print 'Warning: ', $err->getErrorString($i), "\n";
    }
};

$customquant->clearAllErrors();

# Refresh the quantitation method object, in case $customquant altered
# some of the parameters during conversion:

$qmethod = $customquant->getQuantitationMethod();

# Now we can inspect peptide and protein data in $customquant.
# But first, dump quantitation method parameters to see what we have:

do {
    my @comps = ();

    for my $i (0 .. $qmethod->getNumberOfComponents()-1) {
        my $comp = $qmethod->getComponentByNumber($i);
        $comps[$i] = $comp->getName();
    }

    printf "Components: [%s]\n", join(', ', @comps);
};

my @rationames = ();

for my $i (0 .. $qmethod->getNumberOfReportRatios()-1) {
    push @rationames, $qmethod->getReportRatioByNumber($i)->getName();
}

printf "Report ratios: [%s]\n", join(', ', @rationames);

printf "Protein ratio type = %s\n", $qmethod->getProteinRatioType();
printf "Min. number of peptides = %d\n", $qmethod->getMinNumPeptides();

if ($qmethod->haveQuality()) {
    my $q = $qmethod->getQuality();
    printf "Quality: min. precursor charge = %s\n", $q->getMinPrecursorCharge();
    printf "Quality: pep. threshold type = %s\n", $q->getPepThresholdType();
    printf "Quality: pep. threshold value = %s\n", $q->getPepThresholdValue();
} else {
    printf "Quality: no restrictions\n";
}

if ($qmethod->haveNormalisation()) {
    my $q = $qmethod->getNormalisation();
    printf "Normalisation = %s\n", $q->getMethod();

    if ($q->haveProteins()) {
        my $p = $q->getProteins();
        printf " - Accessions = %s\n", join(', ', 
            map { $p->getProtein($_)->getAccession() } 0 .. $p->getNumberOfProteins()-1
        );
    }

    if ($q->havePeptides()) {
        my $p = $q->getPeptides();
        printf "  - Sequences = %s\n", join(', ', 
            map { $p->getPeptide($_)->getSequence() } 0 .. $p->getNumberOfPeptides()-1
        );
    }

    my @component_bases = ();

    for (1 .. $qmethod->getNumberOfComponents()) {
        my $base = $ms2quant->getIntensityNormalisationBase($qmethod->getComponentByNumber($_-1)->getName());
        push @component_bases, $base;
    }

    printf "Intensity normalisation constants: [%s]\n", join(', ', @component_bases);
    printf "Ratio normalisation constants: [%s]\n", join(', ',
        map { $ms2quant->getNormalisationBase($_) } @rationames
    );
} else {
    printf "Normalisation: none\n";
}

if ($qmethod->haveOutliers()) {
    my $q = $qmethod->getOutliers();
    printf "Outliers = %s\n", $q->getMethod();
} else {
    printf "Outliers: none\n";
}

print "\n";

# Collect proteins we're interested in:
my @proteins = ();

for my $i (1 .. $pepsum->getNumberOfHits()) {
    my $hit = $pepsum->getHit($i);
    push @proteins, $hit;

    my $j = 0;
    while (my $protein = $pepsum->getNextFamilyProtein($i, ++$j)) {
        push @proteins, $protein;
    }
}

for my $protein (@proteins) {
    printf "%d::%s:\n", $protein->getDB(), $protein->getAccession();

    for (@rationames) {
        my $ratio = $customquant->getProteinRatio($protein->getAccession(), $protein->getDB(), $_);

        if ($ratio->isMissing()) {
            printf "%10s: ---\n", $_;
        } else {
            printf "%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g\n",
                $_, $ratio->getValue(), $ratio->getStandardError(),
                $ratio->getSampleSize(), $ratio->getNormalityPvalue(),
                $ratio->getHypothesisPvalue();
        }
    }

    print "\n";
    print join(' ', map { sprintf("%10s", $_) } 'Peptide', @rationames), "\n";

    for my $i (1 .. $protein->getNumPeptides()) {
        next if $protein->getPeptideDuplicate($i) == $msparser::ms_protein::DUPE_DuplicateSameQuery;

        my $q = $protein->getPeptideQuery($i);
        my $p = $protein->getPeptideP($i);

        my $peptide = $pepsum->getPeptide($q, $p);

        next if not $peptide;

        my @values = ();

        for (@rationames) {
            # Note that the q,p arguments to ms_peptide_quant_key must be
            # integers. Otherwise the wrong constructor is chosen. Here they
            # are integers, as they are return values from Parser functions
            # that return ints.
            my $ratio = $customquant->getPeptideRatio(msparser::ms_peptide_quant_key->new($q, $p), $_);

            if ($ratio->isMissing()) {
                push @values, sprintf("%10s", '---');
                next;
            }

            if ($ratio->isInfinite()) {
                push @values, sprintf("%10s", '###');
            } else {
                push @values, sprintf("%10g", $ratio->getValue());
            }
        }

        printf "%10s %s\n", sprintf("q%d_p%d", $q, $p), join(' ', @values);
    }

    print "\n";
}

sub dump_warnings {
    my ($err) = @_;

    for my $i (1 .. $err->getNumberOfErrors()) {
        print 'Warning: ', $err->getErrorString($i), "\n";
    }
}


Copyright © 2022 Matrix Science Ltd.  All Rights Reserved. Generated on Thu Mar 31 2022 01:12:29