【问题标题】:hp model protein foldinghp模型蛋白质折叠
【发布时间】:2017-03-22 01:18:20
【问题描述】:

有K.A.Dill在1985年建立的模型[Dil85]。在该模型中,氨基酸分为两类:疏水性 (H) 和亲水性 (P)。 最好的折叠是使相邻但非共价结合的 H-H 对的数量最大化的折叠,从而允许疏水效应。 这是一个例子:

字符串HPHHHHPPHPPH的折叠如下所示。此折叠的得分为 4(见红色虚线邻接)。

               H - P   P - P
                   -   -   -
                   H   H   H - P
                   -   -       -
                   H - H   H - P

我正在尝试在 C++ 中创建一个算法,以最大化分数并返回可能的最佳配置,但我的代码并不能很好地工作。所以我请求你的帮助。

这是我的代码:

#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <math.h>
using namespace std;

string EMPTY = " ";

int solve(string protein,int bestScore,unsigned int sequenceIndex, vector        <vector<string> > & currentGrid, int posX, int posY, char direction,int    score,vector<char> & pDirection)
{
      char  allDirections[4] = {'R','D','L','U'};

    if (direction=='R')
    {
        posY +=1;
    }
    else if (direction == 'D')
    {
        posX +=1;
    }
    else if (direction == 'L')
    {
        posY -=1;
    }
    else if (direction == 'U')
    {
        posX -=1;
    }

    // copy currentGrid into newGrid.
    vector <vector<string> >  newGrid ;

    if (currentGrid[posX][posY] == EMPTY)
    {
        for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
        {
            vector<string> temp;
            newGrid.insert(newGrid.begin(),temp);

            for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)

            {
                newGrid[j].push_back(currentGrid[i][j]);

            }
        }

        //replace the first subelement of protein.
        newGrid[posX][posY] = protein[sequenceIndex];
        // Add new direction to the vector pdirection.
        pDirection.push_back(direction);

        //str.at(i)
        if (protein[sequenceIndex] == 'H')
        {
            if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ( (pDirection[0] == 'U'  ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
            {
                score += 1;
            }
            else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
            {
                score += 1;
            }
        }

        sequenceIndex += 1;
    }
    if (sequenceIndex != protein.size())
    //repeat this for all directions
    {
        for (int i = 0; i < 4; i++)
        {
            direction = allDirections[i];

            bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
        }
    }
    else
    {
        if (score > bestScore)
        {
            bestScore = score;

        }
        else if (score == bestScore)
        {

            bestScore = score;
        }
    }

    return bestScore;
}

int main()
{
    unsigned int  sequenceIndex = 0;
    int score = 0;
    int bestScore = 0;
    vector<char> pDirection;
    string protein = "HPPHPPH";
    vector <vector<string> > currentGrid; //a*[a*[' ']] avec a=len(protein)*2-1

    int posX = protein.size();
    int posY = protein.size();

    //int posX = ceil((protein.size()*2-1)/2));
    //int posY = ceil((protein.size()*2-1)/2));


    // create an empty 2D vector
    for (unsigned int i=0; i < (protein.size()*2-1)/2;i++)
    {
        vector<string> temp;
        currentGrid.insert(currentGrid.begin(),temp);

        for (unsigned int j=0; j < ((protein.size()*2-1)/2);j++)

        {
            currentGrid[j].push_back(EMPTY);

        }
    }
    // place the first element of protein in the middle of currentGrid
    currentGrid[posX][posY] = protein.substr(0);


    cout<<protein<<endl;
    cout<<protein.size()<<endl;
    cout<<solve(protein,bestScore,sequenceIndex, currentGrid,posX,posY,'R',score,pDirection)<<endl;
}

【问题讨论】:

  • “效果不太好”。这并不能清楚地解释您需要帮助解决什么问题。请说明您想要回答的具体问题、错误行为或问题。另外,请注意 C 和 C++ 是不同的语言 - 仅使用相关标签。
  • 嗨 kaylum,我需要的是样本方法来计算使蛋白质的相邻但未共价结合的 H-H 对的数量最大化的分数,从而允许疏水效应,因为我' m 使用回溯验证所有可能的配置并返回此配置的最佳分数。
  • 有一些例子蛋白质序列:HPPH 序列长度:4 最佳得分:1 可能的配置:[ H H | | P - P ] 蛋白质序列:HHPPHH 序列长度:6 最佳得分:2 可能的配置:[ P - H - H | P - H - H ] 蛋白质序列:HHHHHHH 序列长度:7 最佳得分:2 可能的配置:[ H - H | H - H | H - H | H]

标签: c++ algorithm vector


【解决方案1】:

网络搜索证实了我在媒体上听到的消息 - 正如一般所说的那样,蛋白质折叠问题很难 - http://www.brown.edu/Research/Istrail_Lab/papers/10.1.1.110.3139.pdf 描述了与您提出的问题类似的问题的近似解决方案。

【讨论】:

    【解决方案2】:

    对于那些偶然发现此问题的人,我已修复此代码。虽然它非常慢,因为它正在尝试所有可能的结构。无论如何,给你:

    #include <iostream>
    #include <string>
    #include <fstream>
    #include <vector>
    #include <math.h>
    #include <unistd.h>
    
    using namespace std;
    
    string EMPTY = "_";
    int numfolds = 0;
    
    void printGrid(vector<vector<string> > grid);
    int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection);
    
    void printGrid(vector<vector<string> > grid)
    {
        int gridsize = grid.size();
        for (int i=0; i < gridsize;i++)
        {
            for (int j=0; j < gridsize;j++)
            {
                cout << grid[i][j];
    
            }
            cout << endl;
        }
    }
    
    
    int solve(string protein,int bestScore,unsigned int sequenceIndex, vector<vector<string> > & currentGrid, int posX, int posY, char direction,int score,vector<char> & pDirection)
    {
        char  allDirections[4] = {'R','D','L','U'};
    
        if (direction=='R')
        posY +=1;
        else if (direction == 'D')
        posX +=1;
        else if (direction == 'L')
        posY -=1;
        else if (direction == 'U')
        posX -=1;
    
        // copy currentGrid into newGrid
    
        vector<vector<string> > newGrid(currentGrid.size(), vector<string>(currentGrid.size(), EMPTY));
        for (unsigned int i=0; i < currentGrid.size();i++)
            for (unsigned int j=0; j < currentGrid.size();j++)
                newGrid[i][j] = currentGrid[i][j];
        if (currentGrid[posX][posY] == EMPTY)
        {
    
            // Copy old state into the newGrid
            //replace the first subelement of protein.
            newGrid[posX][posY] = protein[sequenceIndex];
            //printGrid(newGrid);
            // Add new direction to the vector pdirection.
            pDirection.push_back(direction);
    
            //str.at(i)
            if (protein[sequenceIndex] == 'H')
            {
                if ((pDirection[0] == 'L' ) && (newGrid[posX+1][posY]=="H" || newGrid[posX-1][posY] == "H"))
                {
                    score += 1;
                }
                else if ( (pDirection[0] == 'U'  ) && (newGrid[posX][posY+1] == "H" || newGrid[posX][posY-1] == "H"))
                {
                    score += 1;
                }
                else if ((pDirection[0] == 'R') && (newGrid[posX+1][posY] == "H" || newGrid[posX-1][posY] == "H"))
                {
                    score += 1;
                }
                else if ((pDirection[0] == 'D' ) && (newGrid[posX][posY+1] =="H" || newGrid[posX][posY-1] == "H"))
                {
                    score += 1;
                }
            }
            sequenceIndex += 1;
        }
        else
        {
            return bestScore;
        }
        // Recursively repeat for all directions
        if (sequenceIndex < protein.size())
        {
            for (int i = 0; i < 4; i++)
            {
                direction = allDirections[i];
                bestScore = solve(protein, bestScore,sequenceIndex,newGrid,posX,posY,direction,score,pDirection);
            }
        }
        else
        {
            numfolds++;
    
            //printGrid(newGrid);
            //cout << endl;
            if(score > bestScore)
            {
                cout << "Improved fold with score: " << score << "\t(" << numfolds << "th fold)" << endl;
                printGrid(newGrid);
            }
            //else if(numfolds%1000==0)
            //{
                //cout << "Currently folded score: " << score << "\t(" << numfolds << "th fold)" << endl;
                //printGrid(newGrid);
            //}
            bestScore = max(score,bestScore);
        }
        return bestScore;
    }
    
    
    int main()
    {
        unsigned int  sequenceIndex = 1;
        int score = 0;
        int bestScore = 0;
        vector<char> pDirection;
        string protein = "HHHPPHPPPPHPHP";
        // vector <vector<string> > currentGrid;
    
        int posX = protein.size();
        int posY = protein.size();
    
        cout << "Protein to fold " << protein << endl;
    
        // create an empty 2D vector
        int gridsize = (protein.size()*2);
        vector<vector<string> > currentGrid(gridsize, vector<string>(gridsize, EMPTY));
        // place the first element of protein in the middle of currentGrid
        currentGrid[posX][posY] = protein.at(0);
    
        //printGrid(currentGrid);
    
        cout << solve(protein,bestScore,sequenceIndex,currentGrid,posX,posY,'R',score,pDirection) << endl;
        cout << numfolds << endl;
    }
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2022-07-14
      • 2020-07-01
      • 1970-01-01
      • 2012-06-27
      • 2013-01-16
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多