2024-04-11 15:51:56 +02:00
// importLocusMetadata.go provides a function to import locus metadata from raw genome files.
// It uses a 23andMe raw genome file to find the chromosomes and positions for new rsIDs.
2024-08-05 09:11:10 +02:00
// The 23andMe file only contains Chromosome and Position information.
// This utility creates a NewLocusMetadata.gob file, which must be renamed to LocusMetadata.gob and placed in the /resources/geneticReferences/locusMetadata folder
2024-04-11 15:51:56 +02:00
// TODO: Instead of using 23andMe files, use a better full-genome reference which has gene names.
2024-08-05 09:11:10 +02:00
2024-04-11 15:51:56 +02:00
package main
import "seekia/resources/geneticReferences/locusMetadata"
2024-08-05 09:11:10 +02:00
import "seekia/resources/geneticReferences/modifyLocusMetadata"
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
import "seekia/internal/genetics/readRawGenomes"
2024-04-11 15:51:56 +02:00
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
2024-08-05 09:11:10 +02:00
import "errors"
2024-04-11 15:51:56 +02:00
import "strings"
import "bytes"
import "log"
2024-08-05 09:11:10 +02:00
func main ( ) {
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
importLocusMetadataFunction := func ( ) error {
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
fileExists , fileBytes , err := localFilesystem . GetFileContents ( "./23andMeRawGenome.txt" )
if ( err != nil ) { return err }
if ( fileExists == false ) {
return errors . New ( "Error: 23AndMeRawGenome.txt does not exist. You must add a 23andMe raw genome file to the importLocusMetadata folder so we can retrieve locus metadata from the file." )
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
fileReader := bytes . NewReader ( fileBytes )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
ableToReadFile , locusLocationsMap , err := readRawGenomes . ReadRawGenomeFileLocusLocations ( fileReader )
if ( err != nil ) { return err }
if ( ableToReadFile == false ) {
return errors . New ( "Unable to read 23andMe file." )
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
// This is a list of rsIDs whose metadata we should add to the locus metadata
lociToAddList := [ ] int64 { }
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
containsDuplicates , _ := helpers . CheckIfListContainsDuplicates ( lociToAddList )
if ( containsDuplicates == true ) {
return errors . New ( "lociToAddList contains duplicates." )
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
// This list will store the loci for which no metadata existed
missingLociList := make ( [ ] int64 , 0 )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
// This is a list of locus metadata objects to add
locusMetadatasToAddList := make ( [ ] locusMetadata . LocusMetadata , 0 )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
for _ , rsID := range lociToAddList {
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
locusLocationObject , exists := locusLocationsMap [ rsID ]
if ( exists == false ) {
// The 23andMe file does not contain metadata for this locus
missingLociList = append ( missingLociList , rsID )
continue
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
locusChromosome := locusLocationObject . Chromosome
locusPosition := locusLocationObject . Position
newLocusMetadataObject := locusMetadata . LocusMetadata {
RSIDsList : [ ] int64 { rsID } ,
Chromosome : locusChromosome ,
Position : locusPosition ,
GeneInfoIsKnown : false ,
GeneExists : false ,
GeneNamesList : make ( [ ] string , 0 ) ,
CompanyAliases : make ( map [ locusMetadata . GeneticsCompany ] [ ] string ) ,
References : make ( map [ string ] string ) ,
}
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
locusMetadatasToAddList = append ( locusMetadatasToAddList , newLocusMetadataObject )
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
quantityOfAddedLoci , newLocusMetadataFileBytes , err := modifyLocusMetadata . AddLocusMetadata ( locusMetadatasToAddList )
if ( err != nil ) { return err }
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
err = localFilesystem . CreateOrOverwriteFile ( newLocusMetadataFileBytes , "./" , "NewLocusMetadata.gob" )
if ( err != nil ) { return err }
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
totalLociToAdd := len ( lociToAddList )
totalLociToAddString := helpers . ConvertIntToString ( totalLociToAdd )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
numberOfImportedLociString := helpers . ConvertIntToString ( quantityOfAddedLoci )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
log . Println ( "Successfully imported " + numberOfImportedLociString + "/" + totalLociToAddString + " locus metadatas!" )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
numberOfMissingLoci := len ( missingLociList )
numberOfMissingLociString := helpers . ConvertIntToString ( numberOfMissingLoci )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
log . Println ( numberOfMissingLociString + " loci contained no metadata in the 23andMe genome file." )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
if ( len ( missingLociList ) > 0 ) {
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
missingLociStringsList := make ( [ ] string , 0 , len ( missingLociList ) )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
for _ , rsID := range missingLociList {
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
rsIDString := helpers . ConvertInt64ToString ( rsID )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
missingLociStringsList = append ( missingLociStringsList , rsIDString )
}
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
missingLociListFormatted := strings . Join ( missingLociStringsList , ", " )
2024-04-11 15:51:56 +02:00
2024-08-05 09:11:10 +02:00
log . Println ( "Missing loci list: " + missingLociListFormatted )
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
return nil
2024-04-11 15:51:56 +02:00
}
2024-08-05 09:11:10 +02:00
err := importLocusMetadataFunction ( )
if ( err != nil ) {
log . Println ( "ERROR: " + err . Error ( ) )
2024-04-11 15:51:56 +02:00
return
}
}