// createRawGenomes provides functions to create fake raw genome files // This package's functions are only used to test the readRawGenomes, createPersonGeneticAnalysis, and createCoupleGeneticAnalysis packages. package createRawGenomes import "seekia/internal/genetics/readRawGenomes" import "seekia/internal/helpers" import "seekia/internal/unixTime" import "time" import "errors" import "strings" // Only use this function for tests // Outputs: // -string: Fake raw genome file string // -int64: Time of fake file generation // -int64: Number of loci // -This does not include loci which have no base pair value in the file (Denoted by: "--") // -map[int64]readRawGenomes.RawGenomeLocusValue: Raw genome map (rsID -> Locus base pair value) // -error func CreateFakeRawGenome_23andMe()(string, int64, int64, map[int64]readRawGenomes.RawGenomeLocusValue, error){ yearUnix := unixTime.GetYearUnix() maximumTime := time.Now().Unix() minimumTime := maximumTime - (yearUnix*20) randomUnixTime := helpers.GetRandomInt64WithinRange(minimumTime, maximumTime) randomTimeObject := time.Unix(randomUnixTime, 0) timeMonthObject := randomTimeObject.Month() timeDayInt := randomTimeObject.Day() timeYearInt := randomTimeObject.Year() fileCreationTimeObject := time.Date(timeYearInt, timeMonthObject, timeDayInt, 0, 0, 0, 0, time.UTC) fileCreationTimeUnix := fileCreationTimeObject.Unix() timeWeekdayString := randomTimeObject.Weekday().String() timeWeekdayTrimmed := timeWeekdayString[:3] timeMonthString := timeMonthObject.String() timeMonthTrimmed := timeMonthString[:3] timeYearString := helpers.ConvertIntToString(timeYearInt) getTimeDayFormatted := func()string{ timeDayString := helpers.ConvertIntToString(timeDayInt) if (len(timeDayString) == 2){ return timeDayString } // We have to add 0 prefix result := "0" + timeDayString return result } timeDayFormatted := getTimeDayFormatted() fileTimeString := timeWeekdayTrimmed + " " + timeMonthTrimmed + " " + timeDayFormatted + " 12:34:56 " + timeYearString // We use this builder to create the file string var fileContentsBuilder strings.Builder fileHeader := `# This data file generated by 23andMe at: ` + fileTimeString + ` # # This file contains raw genotype data, including data that is not used in 23andMe reports. # This data has undergone a general quality review however only a subset of markers have been # individually validated for accuracy. As such, this data is suitable only for research, # educational, and informational use and not for medical or other use. # # Below is a text version of your data. Fields are TAB-separated # Each line corresponds to a single SNP. For each SNP, we provide its identifier # (an rsid or an internal id), its location on the reference human genome, and the # genotype call oriented with respect to the plus strand on the human reference sequence. # We are using reference human assembly build 37 (also known as Annotation Release 104). # Note that it is possible that data downloaded at different times may be different due to ongoing # improvements in our ability to call genotypes. More information about these changes can be found at: # https://you.23andme.com/p//tools/data/download/ # # More information on reference human assembly builds: # https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13/ # # rsid chromosome position genotype ` _, err := fileContentsBuilder.WriteString(fileHeader) if (err != nil){ return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error()) } numberOfLociToAdd := helpers.GetRandomInt64WithinRange(500000, 600000) numberOfAddedLoci := int64(0) // We use this map to avoid adding duplicate rsIDs // Map Structure: rsID -> Nothing addedRSIDsMap := make(map[int64]struct{}) // We use this map to return the contents of the map so we can verify reading it correctly // Map Structure: rsID -> Locus value object (Example: G,G, I,D) fileRSIDsMap := make(map[int64]readRawGenomes.RawGenomeLocusValue) // We use this map to avoid adding duplicate positions addedPositionsMap := make(map[int]struct{}) allelePossibilities := []string{"G", "C", "A", "T", "I", "D"} for numberOfAddedLoci < numberOfLociToAdd{ locusRSID := helpers.GetRandomInt64WithinRange(1, 10000000) _, exists := addedRSIDsMap[locusRSID] if (exists == true){ // We try again to get a unique rsid continue } locusChromosome := helpers.GetRandomIntWithinRange(1, 26) locusPosition := helpers.GetRandomIntWithinRange(1, 10000000) _, exists = addedPositionsMap[locusPosition] if (exists == true){ // We try again to get a unique position continue } locusRSIDString := helpers.ConvertInt64ToString(locusRSID) locusChromosomeString := helpers.ConvertIntToString(locusChromosome) locusPositionString := helpers.ConvertIntToString(locusPosition) // Outputs: // -string: Base pair for file // -bool: Base pair exists // -string: Allele A for rsidsMap // -string: Allele B for rsidsMap // -error getBasePair := func()(string, bool, string, string, error){ randomInt := helpers.GetRandomIntWithinRange(1, 1000) if (randomInt == 1){ // ~1/1000 loci will be unknown return "--", false, "", "", nil } alleleA, err := helpers.GetRandomItemFromList(allelePossibilities) if (err != nil){ return "", false, "", "", err } alleleB, err := helpers.GetRandomItemFromList(allelePossibilities) if (err != nil){ return "", false, "", "", err } basePairForFile := alleleA + alleleB return basePairForFile, true, alleleA, alleleB, nil } basePairForFile, basePairExists, alleleA, alleleB, err := getBasePair() if (err != nil){ return "", 0, 0, nil, errors.New("getBasePair failed: " + err.Error()) } newLine := "rs" + locusRSIDString + "\t" + locusChromosomeString + "\t" + locusPositionString + "\t" + basePairForFile + string(byte(13)) + "\n" _, err = fileContentsBuilder.WriteString(newLine) if (err != nil){ return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error()) } addedRSIDsMap[locusRSID] = struct{}{} addedPositionsMap[locusPosition] = struct{}{} if (basePairExists == false){ continue } numberOfAddedLoci += 1 locusValueObject := readRawGenomes.RawGenomeLocusValue{ Allele1: alleleA, Allele2Exists: true, Allele2: alleleB, } fileRSIDsMap[locusRSID] = locusValueObject } fileString := fileContentsBuilder.String() return fileString, fileCreationTimeUnix, numberOfAddedLoci, fileRSIDsMap, nil } // Only use this function for tests // Outputs: // -string: Fake raw genome file string // -int64: File creation time // -int64: Number of loci in file // -map[int64]readRawGenomes.RawGenomeLocusValue: Raw genome map (rsID -> Locus base pair value) // -error func CreateFakeRawGenome_AncestryDNA()(string, int64, int64, map[int64]readRawGenomes.RawGenomeLocusValue, error){ yearUnix := unixTime.GetYearUnix() maximumTime := time.Now().Unix() minimumTime := maximumTime - (yearUnix*20) randomUnixTime := helpers.GetRandomInt64WithinRange(minimumTime, maximumTime) randomTimeObject := time.Unix(randomUnixTime, 0) timeMonthInt := randomTimeObject.Month() timeDayInt := randomTimeObject.Day() timeYearInt := randomTimeObject.Year() fileCreationTimeObject := time.Date(timeYearInt, timeMonthInt, timeDayInt, 0, 0, 0, 0, time.UTC) fileCreationTimeUnix := fileCreationTimeObject.Unix() timeDayString := helpers.ConvertIntToString(timeDayInt) timeYearString := helpers.ConvertIntToString(timeYearInt) getTimeMonthFormatted := func()string{ timeMonthString := helpers.ConvertIntToString(int(timeMonthInt)) if (len(timeMonthString) == 2){ return timeMonthString } // We have to add 0 prefix result := "0" + timeMonthString return result } timeMonthFormatted := getTimeMonthFormatted() fileTimeString := timeMonthFormatted + "/" + timeDayString + "/" + timeYearString // We use this builder to create the file string var fileContentsBuilder strings.Builder fileHeader := `#AncestryDNA raw data download #This file was generated by AncestryDNA at: ` + fileTimeString + ` 10:00:00 UTC #Data was collected using AncestryDNA array version: V2.0 #Data is formatted using AncestryDNA converter version: V1.0 #Below is a text version of your DNA file from Ancestry.com DNA, LLC. THIS #INFORMATION IS FOR YOUR PERSONAL USE AND IS INTENDED FOR GENEALOGICAL RESEARCH #ONLY. IT IS NOT INTENDED FOR MEDICAL, DIAGNOSTIC, OR HEALTH PURPOSES. THE EXPORTED DATA IS #SUBJECT TO THE AncestryDNA TERMS AND CONDITIONS, BUT PLEASE BE AWARE THAT THE #DOWNLOADED DATA WILL NO LONGER BE PROTECTED BY OUR SECURITY MEASURES. #WHEN YOU DOWNLOAD YOUR RAW DNA DATA, YOU ASSUME ALL RISK OF STORING, #SECURING AND PROTECTING YOUR DATA. FOR MORE INFORMATION, SEE ANCESTRYDNA FAQS. # #Genetic data is provided below as five TAB delimited columns. Each line #corresponds to a SNP. Column one provides the SNP identifier (rsID where #possible). Columns two and three contain the chromosome and basepair position #of the SNP using human reference build 37.1 coordinates. Columns four and five #contain the two alleles observed at this SNP (genotype). The genotype is reported #on the forward (+) strand with respect to the human reference. rsid chromosome position allele1 allele2 ` _, err := fileContentsBuilder.WriteString(fileHeader) if (err != nil){ return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error()) } numberOfLociToAdd := helpers.GetRandomInt64WithinRange(500000, 600000) numberOfAddedLoci := int64(0) // We use this map to avoid adding duplicate rsIDs and to verify results of file read // Map Structure: rsID -> Base pair (Example: "G,G", "I,D") fileRSIDsMap := make(map[int64]readRawGenomes.RawGenomeLocusValue) // We use this map to avoid adding duplicate positions addedPositionsMap := make(map[int]struct{}) allelePossibilities := []string{"0", "G", "C", "A", "T", "I", "D"} for numberOfAddedLoci < numberOfLociToAdd{ locusRSID := helpers.GetRandomInt64WithinRange(1, 10000000) _, exists := fileRSIDsMap[locusRSID] if (exists == true){ // We try again to get a unique rsid continue } locusChromosome := helpers.GetRandomIntWithinRange(1, 26) locusPosition := helpers.GetRandomIntWithinRange(1, 10000000) _, exists = addedPositionsMap[locusPosition] if (exists == true){ // We try again to get a unique position continue } locusRSIDString := helpers.ConvertInt64ToString(locusRSID) locusChromosomeString := helpers.ConvertIntToString(locusChromosome) locusPositionString := helpers.ConvertIntToString(locusPosition) alleleA, err := helpers.GetRandomItemFromList(allelePossibilities) if (err != nil){ return "", 0, 0, nil, errors.New("GetRandomItemFromList failed: " + err.Error()) } alleleB, err := helpers.GetRandomItemFromList(allelePossibilities) if (err != nil){ return "", 0, 0, nil, errors.New("GetRandomItemFromList failed: " + err.Error()) } newLine := "rs" + locusRSIDString + "\t" + locusChromosomeString + "\t" + locusPositionString + "\t" + alleleA + "\t" + alleleB + "\n" _, err = fileContentsBuilder.WriteString(newLine) if (err != nil){ return "", 0, 0, nil, errors.New("Failed to WriteString to string builder: " + err.Error()) } locusValueObject := readRawGenomes.RawGenomeLocusValue{ Allele1: alleleA, Allele2Exists: true, Allele2: alleleB, } fileRSIDsMap[locusRSID] = locusValueObject addedPositionsMap[locusPosition] = struct{}{} numberOfAddedLoci += 1 } fileString := fileContentsBuilder.String() return fileString, fileCreationTimeUnix, numberOfAddedLoci, fileRSIDsMap, nil }