seekia/utilities/importLocusMetadata/importLocusMetadata.go

127 lines
4.1 KiB
Go

// importLocusMetadata.go provides a function to import locus metadata from raw genome files.
// It uses a 23andMe raw genome file to find the chromosomes and positions for new rsIDs.
// The 23andMe file only contains Chromosome and Position information.
// This utility creates a NewLocusMetadata.gob file, which must be renamed to LocusMetadata.gob and placed in the /resources/geneticReferences/locusMetadata folder
// TODO: Instead of using 23andMe files, use a better full-genome reference which has gene names.
package main
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/resources/geneticReferences/modifyLocusMetadata"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import "errors"
import "strings"
import "bytes"
import "log"
func main(){
importLocusMetadataFunction := func()error{
fileExists, fileBytes, err := localFilesystem.GetFileContents("./23andMeRawGenome.txt")
if (err != nil){ return err }
if (fileExists == false){
return errors.New("Error: 23AndMeRawGenome.txt does not exist. You must add a 23andMe raw genome file to the importLocusMetadata folder so we can retrieve locus metadata from the file.")
}
fileReader := bytes.NewReader(fileBytes)
ableToReadFile, locusLocationsMap, err := readRawGenomes.ReadRawGenomeFileLocusLocations(fileReader)
if (err != nil){ return err }
if (ableToReadFile == false){
return errors.New("Unable to read 23andMe file.")
}
// This is a list of rsIDs whose metadata we should add to the locus metadata
lociToAddList := []int64{}
containsDuplicates, _ := helpers.CheckIfListContainsDuplicates(lociToAddList)
if (containsDuplicates == true){
return errors.New("lociToAddList contains duplicates.")
}
// This list will store the loci for which no metadata existed
missingLociList := make([]int64, 0)
// This is a list of locus metadata objects to add
locusMetadatasToAddList := make([]locusMetadata.LocusMetadata, 0)
for _, rsID := range lociToAddList{
locusLocationObject, exists := locusLocationsMap[rsID]
if (exists == false){
// The 23andMe file does not contain metadata for this locus
missingLociList = append(missingLociList, rsID)
continue
}
locusChromosome := locusLocationObject.Chromosome
locusPosition := locusLocationObject.Position
newLocusMetadataObject := locusMetadata.LocusMetadata{
RSIDsList: []int64{rsID},
Chromosome: locusChromosome,
Position: locusPosition,
GeneInfoIsKnown: false,
GeneExists: false,
GeneNamesList: make([]string, 0),
CompanyAliases: make(map[locusMetadata.GeneticsCompany][]string),
References: make(map[string]string),
}
locusMetadatasToAddList = append(locusMetadatasToAddList, newLocusMetadataObject)
}
quantityOfAddedLoci, newLocusMetadataFileBytes, err := modifyLocusMetadata.AddLocusMetadata(locusMetadatasToAddList)
if (err != nil) { return err }
err = localFilesystem.CreateOrOverwriteFile(newLocusMetadataFileBytes, "./", "NewLocusMetadata.gob")
if (err != nil){ return err }
totalLociToAdd := len(lociToAddList)
totalLociToAddString := helpers.ConvertIntToString(totalLociToAdd)
numberOfImportedLociString := helpers.ConvertIntToString(quantityOfAddedLoci)
log.Println("Successfully imported " + numberOfImportedLociString + "/" + totalLociToAddString + " locus metadatas!")
numberOfMissingLoci := len(missingLociList)
numberOfMissingLociString := helpers.ConvertIntToString(numberOfMissingLoci)
log.Println(numberOfMissingLociString + " loci contained no metadata in the 23andMe genome file.")
if (len(missingLociList) > 0){
missingLociStringsList := make([]string, 0, len(missingLociList))
for _, rsID := range missingLociList{
rsIDString := helpers.ConvertInt64ToString(rsID)
missingLociStringsList = append(missingLociStringsList, rsIDString)
}
missingLociListFormatted := strings.Join(missingLociStringsList, ", ")
log.Println("Missing loci list: " + missingLociListFormatted)
}
return nil
}
err := importLocusMetadataFunction()
if (err != nil){
log.Println("ERROR: " + err.Error())
return
}
}