'Number of items to replace is not a multiple of replacement length when using a for loop
First, I parsed the AAChange.refGene
column from the variant_calls
dataframe and then extract the Refseq ID
, cDNA level change
, and Protein level change
information.
library(tidyr)
variant_calls = read.delim("variant_calls.txt", sep="\t")
info = tidyr::separate(variant_calls["AAChange.refGene"], AAChange.refGene, c("Refseq ID", "cDNA level change", "Protein level change"), ":")
df = cbind(df["Gene.refGene"],info)
Then, I extract the peptide sequence from hg19 (by refseq accession) and construct a 9mer peptide, where the mutation is in the middle (i.e., 4AAs on each side of the mutation).
library(biomaRt)
ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="https://grch37.ensembl.org", path="/biomart/martservice")
for(i in 1:length(df$`Refseq ID`)){
seq <- getSequence(id=df$`Refseq ID`[i], type="refseq_mrna", seqType="peptide", mart=ensembl)
seq <- sapply(seq$peptide, nchar)
seq <- sort(seq, decreasing=TRUE)
pep[i] <- seq
}
Warning:
1: In pep[i] <- seq:
number of items to replace is not a multiple of replacement length
Dataframe
> dput(df)
structure(list(Gene.refGene = c("PRELP", "TRIM17", "CTNNB1",
"C3orf30", "CLIP2", "TRIM21", "STARD10", "SYTL2", "SIRT4", "AGBL1",
"PRSS8", "CDH11", "RAD51C", "ARHGEF1", "POU3F4", "PRELP", "TRIM17",
"CTNNB1", "C3orf30", "CLIP2", "TRIM21", "STARD10", "SYTL2", "SIRT4",
"AGBL1", "PRSS8", "CDH11", "RAD51C", "ARHGEF1", "POU3F4", "PRELP",
"TRIM17", "CTNNB1", "C3orf30", "CLIP2", "TRIM21", "STARD10",
"SYTL2", "SIRT4", "AGBL1", "PRSS8", "CDH11", "RAD51C", "ARHGEF1",
"POU3F4"), `Refseq ID` = c("NM_002725", "NM_001024940", "NM_001098209",
"NM_152539", "NM_032421", "NM_003141", "NM_006645", "NM_206927",
"NM_012240", "NM_152336", "NM_002773", "NM_001797", "NM_058216",
"NM_198977", "NM_000307", "NM_002725", "NM_001024940", "NM_001098209",
"NM_152539", "NM_032421", "NM_003141", "NM_006645", "NM_206927",
"NM_012240", "NM_152336", "NM_002773", "NM_001797", "NM_058216",
"NM_198977", "NM_000307", "NM_002725", "NM_001024940", "NM_001098209",
"NM_152539", "NM_032421", "NM_003141", "NM_006645", "NM_206927",
"NM_012240", "NM_152336", "NM_002773", "NM_001797", "NM_058216",
"NM_198977", "NM_000307"), `cDNA level change` = c("c.C301T",
"c.T1054A", "c.T109C", "c.G955A", "c.A2422G", "c.G431A", "c.C749T",
"c.C778A", "c.G209A", "c.A382C", "c.G750C", "c.C2125T", "c.C797A",
"c.C1543T", "c.C356T", "c.C301T", "c.T1054A", "c.T109C", "c.G955A",
"c.A2422G", "c.G431A", "c.C749T", "c.C778A", "c.G209A", "c.A382C",
"c.G750C", "c.C2125T", "c.C797A", "c.C1543T", "c.C356T", "c.C301T",
"c.T1054A", "c.T109C", "c.G955A", "c.A2422G", "c.G431A", "c.C749T",
"c.C778A", "c.G209A", "c.A382C", "c.G750C", "c.C2125T", "c.C797A",
"c.C1543T", "c.C356T"), `Protein level change` = c("p.P101S",
"p.Y352N", "p.S37P", "p.E319K", "p.T808A", "p.G144E", "p.S250L",
"p.P260T", "p.G70E", "p.K128Q", "p.W250C", "p.R709W", "p.A266D",
"p.R515W", "p.A119V", "p.P101S", "p.Y352N", "p.S37P", "p.E319K",
"p.T808A", "p.G144E", "p.S250L", "p.P260T", "p.G70E", "p.K128Q",
"p.W250C", "p.R709W", "p.A266D", "p.R515W", "p.A119V", "p.P101S",
"p.Y352N", "p.S37P", "p.E319K", "p.T808A", "p.G144E", "p.S250L",
"p.P260T", "p.G70E", "p.K128Q", "p.W250C", "p.R709W", "p.A266D",
"p.R515W", "p.A119V")), class = "data.frame", row.names = c(NA,
-45L))
Solution 1:[1]
You need to declare pep
prior to the for
loop, and it is good practice to also declare it's length:
pep <- numeric(length(df$`Refseq ID`))
for(i in 1:length(df$`Refseq ID`)){
seq <- getSequence(id=df$`Refseq ID`[i], type="refseq_mrna", seqType="peptide", mart=ensembl)
seq <- sapply(seq$peptide, nchar)
seq <- sort(seq, decreasing=TRUE)
pep[i] <- seq
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Robert Long |