2024-04-11 15:51:56 +02:00
|
|
|
|
|
|
|
// readRawGenomes provides functions to read raw genome files from companies like 23andMe and AncestryDNA.
|
|
|
|
// These are used to create genetic analyses.
|
|
|
|
|
|
|
|
package readRawGenomes
|
|
|
|
|
|
|
|
import "seekia/resources/geneticReferences/locusMetadata"
|
|
|
|
|
|
|
|
import "seekia/internal/helpers"
|
|
|
|
|
|
|
|
import "bufio"
|
|
|
|
import "strings"
|
|
|
|
import "errors"
|
|
|
|
import "io"
|
|
|
|
import "time"
|
|
|
|
|
|
|
|
|
|
|
|
// Returns the import version performed by the current version of Seekia
|
|
|
|
// Seekia will continue to improve the ability to scan raw genome files
|
|
|
|
// This includes building a database of the rs identifiers for company-specific snp identifiers
|
|
|
|
// The raw genome metadata within myGenomes.go must be updated whenever the user updates Seekia and a newer import version exists
|
|
|
|
func GetCurrentCompanyImportVersion(companyName string)(int, error){
|
|
|
|
|
|
|
|
if (companyName == "23andMe"){
|
|
|
|
return 1, nil
|
|
|
|
}
|
|
|
|
if (companyName == "AncestryDNA"){
|
|
|
|
return 1, nil
|
|
|
|
}
|
|
|
|
return 0, errors.New("GetCurrentCompanyImportVersion called with invalid company name: " + companyName)
|
|
|
|
}
|
|
|
|
|
|
|
|
type RawGenomeLocusValue struct{
|
|
|
|
|
|
|
|
// This is one of the following: "C", "A", "T", "G", "I", "D"
|
|
|
|
Allele1 string
|
|
|
|
|
|
|
|
// This stores if a second base exists
|
|
|
|
// Base2 will not exist for some chromosomes (Example: Y chromosome)
|
|
|
|
Allele2Exists bool
|
|
|
|
|
|
|
|
// This is one of the following: "C", "A", "T", "G", "I", "D"
|
|
|
|
Allele2 string
|
|
|
|
}
|
|
|
|
|
|
|
|
// This function will always adjust all locus values to represent the Plus strand
|
|
|
|
// We only import genes into the genomeMap which are used for the analysis
|
|
|
|
//
|
|
|
|
// TODO: The ability to process genomes with a different strand will be added
|
|
|
|
// A database to know which SNPs are Plus/Minus for different HG reference genomes needs to be added
|
|
|
|
//Outputs:
|
|
|
|
// -string: Company name ("23andMe", "AncestryDNA")
|
|
|
|
// -int: ImportVersion (This will help when Seekia has a new way of importing the files. Each company has its own importVersion)
|
|
|
|
// -int64: Time file was generated
|
|
|
|
// -int64: Total number of loci (we will only read a tiny fraction of these into the genome map)
|
|
|
|
// -This does not count loci which have no value (Example: "--")
|
|
|
|
// -bool: IsPhased (allele order corresponds to haplotype)
|
|
|
|
// -map[int64]RawGenomeLocusValue: RSID -> Locus allele value(s)
|
|
|
|
// -error (file not readable)
|
|
|
|
func ReadRawGenomeFile(fileReader io.Reader) (string, int, int64, int64, bool, map[int64]RawGenomeLocusValue, error) {
|
|
|
|
|
|
|
|
validBasesList := []string{"C", "A", "T", "G", "I", "D"}
|
|
|
|
|
|
|
|
verifyBase := func(inputBase string)bool{
|
|
|
|
|
|
|
|
for _, baseString := range validBasesList{
|
|
|
|
if (inputBase == baseString){
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
//Outputs:
|
|
|
|
// -bool: Able to read
|
|
|
|
// -int64: RSID int64
|
|
|
|
readRSIDString := func(inputRSID string)(bool, int64){
|
|
|
|
|
|
|
|
stringWithoutPrefix, prefixExists := strings.CutPrefix(inputRSID, "rs")
|
|
|
|
if (prefixExists == false){
|
|
|
|
return false, 0
|
|
|
|
}
|
|
|
|
|
|
|
|
result, err := helpers.ConvertStringToInt64(stringWithoutPrefix)
|
|
|
|
if (err != nil){
|
|
|
|
return false, 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, result
|
|
|
|
}
|
|
|
|
|
|
|
|
fileBufioReader := bufio.NewReader(fileReader)
|
|
|
|
|
|
|
|
firstLine, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed genome file: Too short.")
|
|
|
|
}
|
|
|
|
|
|
|
|
fileIsAncestryDNA := strings.HasPrefix(firstLine, "#AncestryDNA raw data download")
|
|
|
|
if (fileIsAncestryDNA == true){
|
|
|
|
|
|
|
|
// AncestryDNA represents all locus values on the Plus strand. (allegedly)
|
|
|
|
|
|
|
|
secondLine, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed genome file: Too short.")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Second line contains time of creation
|
|
|
|
// Example:
|
|
|
|
// "#This file was generated by AncestryDNA at: 02/22/2022 10:10:10 UTC"
|
|
|
|
// Time is Month -> Day -> Year
|
|
|
|
timeOfCreationString := strings.TrimPrefix(secondLine, "#This file was generated by AncestryDNA at: ")
|
|
|
|
|
|
|
|
monthString, remainingString, found := strings.Cut(timeOfCreationString, "/")
|
|
|
|
if (found == false){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Missing Month")
|
|
|
|
}
|
|
|
|
dayString, remainingString, found := strings.Cut(remainingString, "/")
|
|
|
|
if (found == false){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Missing Day")
|
|
|
|
}
|
|
|
|
yearString, _, found := strings.Cut(remainingString, " ")
|
|
|
|
if (found == false){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Missing Year")
|
|
|
|
}
|
|
|
|
|
|
|
|
getMonthObject := func()(time.Month, error){
|
2024-07-19 19:16:28 +02:00
|
|
|
|
|
|
|
switch monthString{
|
|
|
|
case "01":{
|
|
|
|
return time.January, nil
|
|
|
|
}
|
|
|
|
case "02":{
|
|
|
|
return time.February, nil
|
|
|
|
}
|
|
|
|
case "03":{
|
|
|
|
return time.March, nil
|
|
|
|
}
|
|
|
|
case "04":{
|
|
|
|
return time.April, nil
|
|
|
|
}
|
|
|
|
case "05":{
|
|
|
|
return time.May, nil
|
|
|
|
}
|
|
|
|
case "06":{
|
|
|
|
return time.June, nil
|
|
|
|
}
|
|
|
|
case "07":{
|
|
|
|
return time.July, nil
|
|
|
|
}
|
|
|
|
case "08":{
|
|
|
|
return time.August, nil
|
|
|
|
}
|
|
|
|
case "09":{
|
|
|
|
return time.September, nil
|
|
|
|
}
|
|
|
|
case "10":{
|
|
|
|
return time.October, nil
|
|
|
|
}
|
|
|
|
case "11":{
|
|
|
|
return time.November, nil
|
|
|
|
}
|
|
|
|
case "12":{
|
|
|
|
return time.December, nil
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
return time.January, errors.New("Malformed AncestryDNA genome file: Invalid month: " + monthString)
|
|
|
|
}
|
|
|
|
|
|
|
|
monthObject, err := getMonthObject()
|
|
|
|
if (err != nil) { return "", 0, 0, 0, false, nil, err }
|
|
|
|
|
|
|
|
yearInt, err := helpers.ConvertStringToInt(yearString)
|
|
|
|
if (err != nil) {
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Invalid year: " + yearString)
|
|
|
|
}
|
|
|
|
|
|
|
|
dayInt, err := helpers.ConvertStringToInt(dayString)
|
|
|
|
if (err != nil) {
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Invalid day: " + dayString)
|
|
|
|
}
|
|
|
|
|
|
|
|
fileTimeObject := time.Date(yearInt, monthObject, dayInt, 0, 0, 0, 0, time.UTC)
|
|
|
|
|
|
|
|
fileTimeUnix := fileTimeObject.Unix()
|
|
|
|
|
|
|
|
// Now we advance bufio reader to the SNP rows
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
|
|
|
fileLineString, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed genome file: Too short.")
|
|
|
|
}
|
|
|
|
|
|
|
|
// All SNP rows comes after this line:
|
|
|
|
// "rsid chromosome position allele1 allele2"
|
|
|
|
lineReached := strings.HasPrefix(fileLineString, "rsid")
|
|
|
|
if (lineReached == true){
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfLoci := int64(0)
|
|
|
|
|
|
|
|
genomeMap := make(map[int64]RawGenomeLocusValue)
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
|
|
|
fileLineString, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if (fileLineString == "\n"){
|
|
|
|
// This is the final line
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfLoci += 1
|
|
|
|
|
|
|
|
fileLineWithoutNewline := strings.TrimSuffix(fileLineString, "\n")
|
|
|
|
|
|
|
|
// Example row (tab delimited)
|
|
|
|
// "rs9777703 1 928836 T T"
|
|
|
|
|
|
|
|
rowColumnsSlice := strings.Split(fileLineWithoutNewline, "\t")
|
|
|
|
if (len(rowColumnsSlice) != 5){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Invalid row detected: " + fileLineString)
|
|
|
|
}
|
|
|
|
|
|
|
|
snpIdentifier := rowColumnsSlice[0]
|
|
|
|
|
|
|
|
isValid, locusRSID := readRSIDString(snpIdentifier)
|
|
|
|
if (isValid == false){
|
|
|
|
// This must be a custom identifier, not an RSID
|
|
|
|
// TODO: Create database to convert them to RSIDs, like the one we have for 23andMe
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// We check to see if this rsID is used in any part of the genetic analysis, and skip it if it is not.
|
|
|
|
|
|
|
|
locusFound, _, err := locusMetadata.GetLocusMetadata(locusRSID)
|
|
|
|
if (err != nil) { return "", 0, 0, 0, false, nil, err }
|
|
|
|
if (locusFound == false){
|
|
|
|
// This locus is not used for any part of the genetic analysis
|
|
|
|
// We don't need to include it in our genome map
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
alleleABase := rowColumnsSlice[3]
|
|
|
|
alleleBBaseRaw := rowColumnsSlice[4]
|
|
|
|
|
|
|
|
// Allele B has a control character suffix
|
|
|
|
// We will remove it
|
|
|
|
alleleBBase := string(alleleBBaseRaw[0])
|
|
|
|
|
|
|
|
if (alleleABase == "0" || alleleBBase == "0"){
|
|
|
|
// The data was not successfully recorded
|
|
|
|
// We will skip this line
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
aIsValid := verifyBase(alleleABase)
|
|
|
|
if (aIsValid == false){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Invalid base detected: " + alleleABase)
|
|
|
|
}
|
|
|
|
bIsValid := verifyBase(alleleBBase)
|
|
|
|
if (bIsValid == false){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Invalid base detected: " + alleleBBase)
|
|
|
|
}
|
|
|
|
|
|
|
|
locusValueObject := RawGenomeLocusValue{
|
|
|
|
|
|
|
|
Allele1: alleleABase,
|
|
|
|
Allele2Exists: true,
|
|
|
|
Allele2: alleleBBase,
|
|
|
|
}
|
|
|
|
|
|
|
|
genomeMap[locusRSID] = locusValueObject
|
|
|
|
}
|
|
|
|
|
|
|
|
// Each file usually has 500,000+ SNPs
|
|
|
|
if (numberOfLoci < 1000){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed AncestryDNA genome file: Not enough bases.")
|
|
|
|
}
|
|
|
|
|
|
|
|
return "AncestryDNA", 1, fileTimeUnix, numberOfLoci, false, genomeMap, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// 23andMe files have the following first line format:
|
|
|
|
// "# This data file generated by 23andMe at: Mon Jan 01 01:00:00 2022"
|
|
|
|
fileIs23andMe := strings.HasPrefix(firstLine, "# This data file generated by 23andMe at:")
|
|
|
|
if (fileIs23andMe == true){
|
|
|
|
|
|
|
|
// 23andMe represents all locus values on the Plus strand.
|
|
|
|
|
|
|
|
timeOfGenerationString := strings.TrimPrefix(firstLine, "# This data file generated by 23andMe at: ")
|
|
|
|
|
|
|
|
timeSlice := strings.Split(timeOfGenerationString, " ")
|
|
|
|
if (len(timeSlice) != 5 && len(timeSlice) != 6){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed 23andMe genome file: Invalid time: " + timeOfGenerationString)
|
|
|
|
}
|
|
|
|
|
|
|
|
monthString := timeSlice[1]
|
|
|
|
|
|
|
|
getDayAndYearString := func()(string, string){
|
|
|
|
|
|
|
|
if (len(timeSlice) == 5){
|
|
|
|
|
|
|
|
dayString := timeSlice[2]
|
|
|
|
yearString := timeSlice[4]
|
|
|
|
|
|
|
|
return dayString, yearString
|
|
|
|
}
|
|
|
|
|
|
|
|
// len(timeSlice) == 6
|
|
|
|
|
|
|
|
dayString := timeSlice[3]
|
|
|
|
yearString := timeSlice[5]
|
|
|
|
|
|
|
|
return dayString, yearString
|
|
|
|
}
|
|
|
|
|
|
|
|
dayString, yearString := getDayAndYearString()
|
|
|
|
|
|
|
|
getMonthObject := func()(time.Month, error){
|
|
|
|
|
|
|
|
switch monthString{
|
|
|
|
case "Jan":{
|
|
|
|
return time.January, nil
|
|
|
|
}
|
|
|
|
case "Feb":{
|
|
|
|
return time.February, nil
|
|
|
|
}
|
|
|
|
case "Mar":{
|
|
|
|
return time.March, nil
|
|
|
|
}
|
|
|
|
case "Apr":{
|
|
|
|
return time.April, nil
|
|
|
|
}
|
|
|
|
case "May":{
|
|
|
|
return time.May, nil
|
|
|
|
}
|
|
|
|
case "Jun":{
|
|
|
|
return time.June, nil
|
|
|
|
}
|
|
|
|
case "Jul":{
|
|
|
|
return time.July, nil
|
|
|
|
}
|
|
|
|
case "Aug":{
|
|
|
|
return time.August, nil
|
|
|
|
}
|
|
|
|
case "Sep":{
|
|
|
|
return time.September, nil
|
|
|
|
}
|
|
|
|
case "Oct":{
|
|
|
|
return time.October, nil
|
|
|
|
}
|
|
|
|
case "Nov":{
|
|
|
|
return time.November, nil
|
|
|
|
}
|
|
|
|
case "Dec":{
|
|
|
|
return time.December, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return time.January, errors.New("Malformed 23andMe genome file: Invalid month: " + monthString)
|
|
|
|
}
|
|
|
|
|
|
|
|
monthObject, err := getMonthObject()
|
|
|
|
if (err != nil) { return "", 0, 0, 0, false, nil, err }
|
|
|
|
|
|
|
|
// We have to remove control character and newline suffix
|
|
|
|
yearExtractedString := string(yearString[:4])
|
|
|
|
|
|
|
|
yearInt, err := helpers.ConvertStringToInt(yearExtractedString)
|
|
|
|
if (err != nil) {
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed 23andMe genome file: Invalid year: " + yearExtractedString)
|
|
|
|
}
|
|
|
|
|
|
|
|
dayInt, err := helpers.ConvertStringToInt(dayString)
|
|
|
|
if (err != nil) {
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed 23andMe genome file: Invalid day: " + dayString)
|
|
|
|
}
|
|
|
|
|
|
|
|
fileTimeObject := time.Date(yearInt, monthObject, dayInt, 0, 0, 0, 0, time.UTC)
|
|
|
|
|
|
|
|
fileTimeUnix := fileTimeObject.Unix()
|
|
|
|
|
|
|
|
// Now we advance bufio reader to the snp rows
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
|
|
|
fileLineString, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed genome file: Too short.")
|
|
|
|
}
|
|
|
|
|
|
|
|
// All SNP rows come after this line:
|
|
|
|
// "# rsid chromosome position genotype"
|
|
|
|
lineReached := strings.HasPrefix(fileLineString, "# rsid")
|
|
|
|
if (lineReached == true){
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
numberOfLoci := int64(0)
|
|
|
|
|
|
|
|
genomeMap := make(map[int64]RawGenomeLocusValue)
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
|
|
|
fileLineString, err := fileBufioReader.ReadString('\n')
|
|
|
|
if (err != nil){
|
|
|
|
// File does not have another line
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if (fileLineString == "\n"){
|
|
|
|
// This is the final line
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
fileLineWithoutNewline := strings.TrimSuffix(fileLineString, "\n")
|
|
|
|
|
|
|
|
// Rows look like this
|
|
|
|
// "rs4477212 1 82154 GG"
|
|
|
|
// "rs571313759 1 1181945 --" (-- means no entry)
|
|
|
|
// "i3001920 MT 16470 G" (one base is possible)
|
|
|
|
|
|
|
|
rowSlice := strings.Split(fileLineWithoutNewline, "\t")
|
|
|
|
|
|
|
|
if (len(rowSlice) != 4){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed 23andMe genome data: Invalid SNP row: " + fileLineString)
|
|
|
|
}
|
|
|
|
|
|
|
|
snpIdentifier := rowSlice[0]
|
|
|
|
snpValueRaw := rowSlice[3]
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
if (len(snpValueRaw) < 2){
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Malformed 23andMe genome data: Invalid SNP row snp value: " + fileLineString)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (snpValueRaw[0] == '-'){
|
|
|
|
// Locus value is "--"
|
|
|
|
// Locus value does not exist
|
|
|
|
continue
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
numberOfLoci += 1
|
|
|
|
|
2024-04-11 15:51:56 +02:00
|
|
|
//Outputs:
|
|
|
|
// -bool: rsID found
|
|
|
|
// -int64: rsID value
|
2024-07-19 19:16:28 +02:00
|
|
|
// -error
|
2024-04-11 15:51:56 +02:00
|
|
|
getRSIDIdentifier := func()(bool, int64, error){
|
|
|
|
|
|
|
|
isRSID, rsidInt64 := readRSIDString(snpIdentifier)
|
|
|
|
if (isRSID == true){
|
|
|
|
return true, rsidInt64, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// We see if it is a custom locus alias
|
|
|
|
|
|
|
|
aliasFound, rsidInt64, err := locusMetadata.GetCompanyAliasRSID("23andMe", snpIdentifier)
|
|
|
|
if (err != nil) { return false, 0, err }
|
|
|
|
if (aliasFound == true){
|
|
|
|
return true, rsidInt64, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return false, 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
rsidFound, locusRSID, err := getRSIDIdentifier()
|
|
|
|
if (err != nil) { return "", 0, 0, 0, false, nil, err }
|
|
|
|
if (rsidFound == false){
|
|
|
|
// RSID is unknown.
|
|
|
|
// It is probably a custom identifier that represents a locus we don't use for the analysis.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
locusFound, _, err := locusMetadata.GetLocusMetadata(locusRSID)
|
|
|
|
if (err != nil) { return "", 0, 0, 0, false, nil, err }
|
|
|
|
if (locusFound == false){
|
|
|
|
// This locus is not used for any part of the genetic analysis
|
|
|
|
// We don't need to include it in our genome map
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
getLocusBasesList := func()([]rune, error){
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
locusBasesList := make([]rune, 0)
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
finalIndex := len(snpValueRaw) - 1
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
for index, character := range snpValueRaw{
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
baseIsValid := verifyBase(string(character))
|
|
|
|
if (baseIsValid == false){
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
if (index == finalIndex){
|
|
|
|
// The final index of snpValueRaw is sometimes a control character
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
return locusBasesList, nil
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
return nil, errors.New("Malformed 23andMe genome file: Invalid SNP base: " + string(character))
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
locusBasesList = append(locusBasesList, character)
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
return locusBasesList, nil
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
locusBasesList, err := getLocusBasesList()
|
|
|
|
if (err != nil){ return "", 0, 0, 0, false, nil, err }
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
allele1 := string(locusBasesList[0])
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
locusValueObject := RawGenomeLocusValue{
|
|
|
|
Allele1: allele1,
|
|
|
|
}
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
if (len(locusBasesList) > 1){
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
allele2 := string(locusBasesList[1])
|
2024-04-11 15:51:56 +02:00
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
locusValueObject.Allele2Exists = true
|
|
|
|
locusValueObject.Allele2 = allele2
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
2024-07-19 19:16:28 +02:00
|
|
|
genomeMap[locusRSID] = locusValueObject
|
2024-04-11 15:51:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return "23andMe", 1, fileTimeUnix, numberOfLoci, false, genomeMap, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return "", 0, 0, 0, false, nil, errors.New("Cannot read genome file: File format not known.")
|
|
|
|
}
|
|
|
|
|
|
|
|
|