GET /api/scoresets/
HTTP 200 OK
Allow: GET
Content-Type: application/json
Vary: Accept

[
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000040-a-4",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effect of variants in yeast HSP90 under different combinations of temperature (30C or 36C) and presence/absence of salt (0.5 M NaCl). The results explore the adaptive potential of this essential gene.",
        "method_text": "Sequencing reads were filtered based on a minimum Phred quality score of 20 across all 36 bases. For each time point, the log2 ratio of each variant's count to the wild type count was calculated. The score of each variant was calculated as the slope of these log ratios to time in wild type generations. Scores of -0.5 are considered null-like.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae at 36C with 0.5 M NaCl.",
        "title": "Deep mutational scan of HSP90, 36C with salt",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24299404",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24299404",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000040-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000040-a-4",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000040-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-08",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000041-a-1",
        "publish_date": "2019-08-08",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study infers the activity of variants in Src kinases's catalytic domain by measuring their effects on yeast growth, and therefore phosphotransferase activity. The resulting dataset includes variants classified as gain of function, loss of function. or neutral.",
        "method_text": "Variant scores were calculated using Enrich2 weighted least squares regression and wild-type normalization. The `activity_score` is `-1 * score`, because negative scores indicate depletion in the population, which is associated with higher kinase activity.\r\n\r\nVariants were classified as \"gain of function\" \"neutral\" or \"loss of function\" based on whether they were within +/- 2 standard deviations of the mean score of synonymous variants (variants with wild type amino acid sequence). These categories are denoted by 1, 0, or -1 in the table respectively.",
        "short_description": "Amino acid scores for deep mutational scan of the Src kinase catalytic domain.",
        "title": "Amino acid scores for Src CD",
        "keywords": [
            {
                "text": "subassembly"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "kinase"
            },
            {
                "text": "NNK mutagenesis"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "regression"
            },
            {
                "text": "Enrich2"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "30956043",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/30956043",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Src catalytic domain",
            "reference_sequence": {
                "sequence": "CTGCGGCTGGAGGTCAAGCTGGGCCAGGGCTGCTTTGGCGAGGTGTGGATGGGGACCTGGAACGGTACCACCAGGGTGGCCATCAAAACCCTGAAGCCTGGCACGATGTCTCCAGAGGCCTTCCTGCAGGAGGCCCAGGTCATGAAGAAGCTGAGGCATGAGAAGCTGGTGCAGTTGTATGCTGTGGTTTCAGAGGAGCCCATTTACATCGTCACGGAGTACATGAGCAAGGGGAGTTTGCTGGACTTTCTCAAGGGGGAGACAGGCAAGTACCTGCGGCTGCCTCAGCTGGTGGACATGGCTGCTCAGATCGCCTCAGGCATGGCGTACGTGGAGCGGATGAACTACGTCCACCGGGACCTTCGTGCAGCCAACATCCTGGTGGGAGAGAACCTGGTGTGCAAAGTGGCCGACTTTGGGCTGGCTCGGCTCATTGAAGACAATGAGTACACGGCGCGGCAAGGTGCCAAATTCCCCATCAAGTGGACGGCTCCAGAAGCTGCCCTCTATGGCCGCTTCACCATCAAGTCGGACGTGTGGTCCTTCGGGATCCTGCTGACTGAGCTCACCACAAAGGGACGGGTGCCCTACCCTGGGATGGTGAACCGCGAGGTGCTGGACCAGGTGGAGCGGGGCTACCGGATGCCCTGCCCGCCGGAGTGTCCCGAGTCCCTGCACGACCTCATGTGCCAGTGCTGGCGGAAGGAGCCTGAGGAGCGGCCCACCTTCGAGTACCTGCAGGCCTTCCTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 269,
                "identifier": "P12931",
                "url": "http://purl.uniprot.org/uniprot/P12931",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000041-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "std",
            "epsilon",
            "activity_score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000041-a-1",
        "variant_count": 3506,
        "experiment": "urn:mavedb:00000041-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000048-a-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CXCR4 and to map ligand interaction sites. Cells were selected for CXCR4 surface expression.",
        "method_text": "Data obtained from sorting cells for anti-myc staining was analysed using Enrich (version unspecified). Log2 enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence.  Log2 enrichment ratios for four replicates (two replicates each for two tags) were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for cell surface expression of CXCR4 in Expi293F cells.",
        "title": "CXCR4 surface expression",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23827138",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23827138",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CXCR4",
            "reference_sequence": {
                "sequence": "GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P61073",
                "url": "http://purl.uniprot.org/uniprot/P61073",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000048-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1_anti-myc-FITC",
            "rep2_anti-myc-FITC",
            "rep1_anti-myc-Alexa",
            "rep2_anti-myc-Alexa"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000048-a-1",
        "variant_count": 7021,
        "experiment": "urn:mavedb:00000048-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000023-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "19",
            "end": 11089548,
            "start": 11089231,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of LDLR promoter in HepG2 cells. Biological replicate 1 of 2.",
        "title": "Saturation mutagenesis MPRA of LDLR promoter, replicate 1",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LDLR promoter",
            "reference_sequence": {
                "sequence": "AGCTCTTCACCGGAGACCCAAATACAACAAATCAAGTCGCCTGCCCTGGCGACACTTTCGAAGGACTGGAGTGGGAATCAGAGCTTCACGGGTTAAAAAGCCGATGTCACATCGGCCGTTCGAAACTCCTCCTCTTGCAGTGAGGTGAAGACATTTGAAAATCACCCCACTGCAAACTCCTCCCCCTGCTAGAAACCTCACATTGAAATGCTGTAAATGACGTGGGCCCCGAGTGCAATCGCGGGAAGCCAGGGTTTCCAGCTAGGACACAGCAGGTCGTGATCCGGGTCGGGACACTGCCTGGCAGAGGCTGCGAGC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000023-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000023-a-1",
        "variant_count": 1083,
        "experiment": "urn:mavedb:00000023-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-c-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Miconazole",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-c-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-2",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 25ug/ml folate in A222V background",
        "title": "MTHFR at 25ug/ml folate in A222V background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-2",
        "variant_count": 13690,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-01-12",
        "modification_date": "2021-01-12",
        "urn": "urn:mavedb:00000056-a-1",
        "publish_date": "2021-01-12",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment identified candidate pharmacogenomic variants in NUDT15 that could contribute to thiopurine toxicity. Two functional assays were performed on the same variant library, one for NUDT15 activity and one for protein stability.",
        "method_text": "Barcode-variant counts were used as input for the ABSSeq RNA-sequencing analysis pipeline (Yang et al., 2016). Variant scores are based on the fold change in variant frequency between drug treated and untreated populations.\r\n\r\nReported are the non-normalized average score and standard deviation for each missense change, but the nature of the averaging (e.g. distinct codons with synonymous consequences or replicate assays) was not specified.",
        "short_description": "FLAGGED FOR REMOVAL",
        "title": "FLAGGED FOR REMOVAL",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "32094176",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32094176",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "27488180",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/27488180",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "NUDT15",
            "reference_sequence": {
                "sequence": "ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "Q9NV35",
                "url": "http://purl.uniprot.org/uniprot/Q9NV35",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000056-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000056-a-1",
        "variant_count": 3100,
        "experiment": "urn:mavedb:00000056-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-03-10",
        "modification_date": "2021-04-15",
        "urn": "urn:mavedb:00000062-b-1",
        "publish_date": "2021-04-15",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study tested the impact of CYP2C19 variants on the abundance of this protein in HEK293T. The authors fused the target protein with green fluorescent protein (GFP) and used the intensity of GPF as the indicator of target protein abundance.",
        "method_text": "After 5 days' growth, the cells were sorted to 4 bins by FACS according to the relative intensity of GFP and mCherry. The gate for each bin was set by the values of wildtype and known target protein variants, for example bin 1 represented protein expression less than 25%. Sorted cells for each bin were sequenced by Illumina and the frequency of each variant in the bin was determined. The protein abundance score for a variant was calculated as: the sum of weighted variant frequency in each bin (weight: 0.25, 0.5, 0.75 & 1 for bin1 to bin4) and divided by the sum of the un-weighted variant frequency in each bin.\r\nThe experiment was repeated 4 times and the final score was the mean of them.",
        "short_description": "This study tested the protein abundance of CYP2C19 variants by fusing green fluorescent protein.",
        "title": "Abundance of CYP2C19 variants",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1111/cts.12758",
                "url": "https://doi.org/10.1111/cts.12758",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32004414",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32004414",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CYP2C19",
            "reference_sequence": {
                "sequence": "CTCTGTCTCTCATGTTTGCTTCTCCTTTCAATCTGGAGACAGAGCTCTGGGAGAGGAAAACTCCCTCCTGGCCCCACTCCTCTCCCAGTGATTGGAAATATCCTACAGATAGATATTAAGGATGTCAGCAAATCCTTAACCAATCTCTCAAAAATCTATGGCCCTGTGTTCACTCTGTATTTTGGCCTGGAACGCATGGTGGTGCTGCATGGATATGAAGTGGTGAAGGAAGCCCTGATTGATCTTGGAGAGGAGTTTTCTGGAAGAGGCCATTTCCCACTGGCTGAAAGAGCTAACAGAGGATTTGGAATCGTTTTCAGCAATGGAAAGAGATGGAAGGAGATCCGGCGTTTCTCCCTCATGACGCTGCGGAATTTTGGGATGGGGAAGAGGAGCATTGAGGACCGTGTTCAAGAGGAAGCCCGCTGCCTTGTGGAGGAGTTGAGAAAAACCAAGGCTTCACCCTGTGATCCCACTTTCATCCTGGGCTGTGCTCCCTGCAATGTGATCTGCTCCATTATTTTCCAGAAACGTTTCGATTATAAAGATCAGCAATTTCTTAACTTGATGGAAAAATTGAATGAAAACATCAGGATTGTAAGCACCCCCTGGATCCAGATATGCAATAATTTTCCCACTATCATTGATTATTTCCCGGGAACCCATAACAAATTACTTAAAAACCTTGCTTTTATGGAAAGTGATATTTTGGAGAAAGTAAAAGAACACCAAGAATCGATGGACATCAACAACCCTCGGGACTTTATTGATTGCTTCCTGATCAAAATGGAGAAGGAAAAGCAAAACCAACAGTCTGAATTCACTATTGAAAACTTGGTAATCACTGCAGCTGACTTACTTGGAGCTGGGACAGAGACAACAAGCACAACCCTGAGATATGCTCTCCTTCTCCTGCTGAAGCACCCAGAGGTCACAGCTAAAGTCCAGGAAGAGATTGAACGTGTCATTGGCAGAAACCGGAGCCCCTGCATGCAGGACAGGGGCCACATGCCCTACACAGATGCTGTGGTGCACGAGGTCCAGAGATACATCGACCTCATCCCCACCAGCCTGCCCCATGCAGTGACCTGTGACGTTAAATTCAGAAACTACCTCATTCCCAAGGGCACAACCATATTAACTTCCCTCACTTCTGTGCTACATGACAACAAAGAATTTCCCAACCCAGAGATGTTTGACCCTCGTCACTTTCTGGATGAAGGTGGAAATTTTAAGAAAAGTAACTACTTCATGCCTTTCTCAGCAGGAAAACGGATTTGTGTGGGAGAGGGCCTGGCCCGCATGGAGCTGTTTTTATTCCTGACCTTCATTTTACAGAACTTTAACCTGAAATCTCTGATTGACCCAAAGGACCTTGACACAACTCCTGTTGTCAATGGATTTGCTTCTGTCCCGCCCTTCTATCAGCTGTGCTTCATTCCT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 8,
                "identifier": "P33261",
                "url": "http://purl.uniprot.org/uniprot/P33261",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000062-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "e1_score",
            "e2_score",
            "e3_score",
            "e4_score",
            "sd",
            "se",
            "lower_ci",
            "upper_ci"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000062-b-1",
        "variant_count": 121,
        "experiment": "urn:mavedb:00000062-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-01-17",
        "urn": "urn:mavedb:00000057-c-1",
        "publish_date": "2021-01-17",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors used saturation mutagenesis to study the variant effect of Ras, with the regulation of GTPase activating protein (GAP) and guanine nucleotide exchange factor (GEF). The variants were selected by bacterial two-hybrid strategy.",
        "method_text": "The frequency of each mutant was determined through sequencing. The natural logarithm of the frequency ratio for each mutant after and before selection was calculated. These values were then normalized by dividing the wild type values which were calculated the same way.",
        "short_description": "In the presence of a GAP and a GEF, selection result on the mutated H-Ras.",
        "title": "Regulated-Ras",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28686159",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28686159",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ras",
            "reference_sequence": {
                "sequence": "ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P01112",
                "url": "http://purl.uniprot.org/uniprot/P01112",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000057-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000057-c-1",
        "variant_count": 3300,
        "experiment": "urn:mavedb:00000057-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000018-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "11",
            "end": 5227208,
            "start": 5227022,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of HBB promoter in HEL 92.1.7 cells.",
        "title": "Saturation mutagenesis MPRA of HBB promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HBB promoter",
            "reference_sequence": {
                "sequence": "GGTGTCTGTTTGAGGTTGCTAGTGAACACAGTTGTGTCAGAAGCAAATGTAAGCAATAGATGGCTCTGCCCTGACTTTTATGCCCAGCCCTGGCTCCTGCCCTCCCTGCTCCTGGGAGTAGATTGGCCAACCCTAGGGTGTGGCTCCACAGGGTGAGGTCTAAGTGATGACAGCCGTACCTGTCCTT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000018-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000018-a-1",
        "variant_count": 623,
        "experiment": "urn:mavedb:00000018-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-f-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 12h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 12h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-f-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-f-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-f",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-5",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the GPD promoter with CYC 3'UTR.",
        "title": "Deep mutational scan of HSP90, GPD construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-5",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-5",
        "variant_count": 182,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-c-1",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.",
        "short_description": "A Deep Mutational Scan of human Calmodulin using functional complementation in yeast via DMS-TileSeq.",
        "title": "Human Calmodulin DMS-TileSeq",
        "keywords": [
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CALM1",
            "reference_sequence": {
                "sequence": "ATGGCTGATCAGCTGACCGAAGAACAGATTGCTGAATTCAAGGAAGCCTTCTCCCTATTTGATAAAGATGGCGATGGCACCATCACAACAAAGGAACTTGGAACTGTCATGAGGTCACTGGGTCAGAACCCAACAGAAGCTGAATTGCAGGATATGATCAATGAAGTGGATGCTGATGGTAATGGCACCATTGACTTCCCCGAATTTTTGACTATGATGGCTAGAAAAATGAAAGATACAGATAGTGAAGAAGAAATCCGTGAGGCATTCCGAGTCTTTGACAAGGATGGCAATGGTTATATCAGTGCAGCAGAACTACGTCACGTCATGACAAACTTAGGAGAAAAACTAACAGATGAAGAAGTAGATGAAATGATCAGAGAAGCAGATATTGATGGAGACGGACAAGTCAACTATGAAGAATTCGTACAGATGATGACTGCAAAATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P0DP23",
                "url": "http://purl.uniprot.org/uniprot/P0DP23",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000198668",
                "url": "http://www.ensembl.org/id/ENSG00000198668",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 833,
                "identifier": "NM_001363670.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_001363670.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-c-1",
        "variant_count": 5957,
        "experiment": "urn:mavedb:00000001-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-3",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 100ug/ml folate in A222V background",
        "title": "MTHFR at 100ug/ml folate in A222V background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-3",
        "variant_count": 13690,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-4",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the CYC promoter, no 3'UTR added.",
        "title": "Deep mutational scan of HSP90, CYCdter construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-4",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-26",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-a-2",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-BarSeq and reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, Barseq read counts were used to establish relative frequencies of each strain at each timepoint and converted to estimates of absolute frequencies using OD measurement data. Absolute counts were used to establish growth curves from which fitness parameters were estimated and then normalized to 0-1 scale where 0 corresponds to null controls and 1 corresponds to WT controls. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.",
        "short_description": "A Deep Mutational Scan of the human SUMO E2 conjugase UBE2I using functional complementation in yeast via DMS-BarSeq",
        "title": "UBE2I DMS-BarSeq",
        "keywords": [
            {
                "text": "DMS-BarSeq"
            },
            {
                "text": "E2"
            },
            {
                "text": "sumoylation"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "UBE2I",
            "reference_sequence": {
                "sequence": "ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P63279",
                "url": "http://purl.uniprot.org/uniprot/P63279",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000103275",
                "url": "http://www.ensembl.org/id/ENSG00000103275",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 159,
                "identifier": "NM_003345",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_003345",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-a-2",
        "variant_count": 3418,
        "experiment": "urn:mavedb:00000001-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-10-16",
        "modification_date": "2020-11-25",
        "urn": "urn:mavedb:00000050-a-1",
        "publish_date": "2020-11-25",
        "created_by": "0000-0002-6145-882X",
        "modified_by": "0000-0002-6145-882X",
        "extra_metadata": {},
        "abstract_text": "Loss-of-function scores for MSH2 variants in human MSH2- HAP1 cells.",
        "method_text": "Scores represent the log2-scaled enrichment of each MSH2 variant after selection with 6-TG compared with mock selection, averaged over three replicates.  Positive scores correspond to loss-of-function and negative scores correspond to functionally neutral variants.",
        "short_description": "MSH2 loss-of-function (LOF) scores in human HAP1 cells.",
        "title": "MSH2 LOF scores (HAP1)",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-6145-882X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MSH2",
            "reference_sequence": {
                "sequence": "ATGGCGGTGCAGCCGAAGGAGACGCTGCAGTTGGAGAGCGCGGCCGAGGTCGGCTTCGTGCGCTTCTTTCAGGGCATGCCGGAGAAGCCGACCACCACAGTGCGCCTTTTCGACCGGGGCGACTTCTATACGGCGCACGGCGAGGACGCGCTGCTGGCCGCCCGGGAGGTGTTCAAGACCCAGGGGGTGATCAAGTACATGGGGCCGGCAGGAGCAAAGAATCTGCAGAGTGTTGTGCTTAGTAAAATGAATTTTGAATCTTTTGTAAAAGATCTTCTTCTGGTTCGTCAGTATAGAGTTGAAGTTTATAAGAATAGAGCTGGAAATAAGGCATCCAAGGAGAATGATTGGTATTTGGCATATAAGGCTTCTCCTGGCAATCTCTCTCAGTTTGAAGACATTCTCTTTGGTAACAATGATATGTCAGCTTCCATTGGTGTTGTGGGTGTTAAAATGTCCGCAGTTGATGGCCAGAGACAGGTTGGAGTTGGGTATGTGGATTCCATACAGAGGAAACTAGGACTGTGTGAATTCCCTGATAATGATCAGTTCTCCAATCTTGAGGCTCTCCTCATCCAGATTGGACCAAAGGAATGTGTTTTACCCGGAGGAGAGACTGCTGGAGACATGGGGAAACTGAGACAGATAATTCAAAGAGGAGGAATTCTGATCACAGAAAGAAAAAAAGCTGACTTTTCCACAAAAGACATTTATCAGGACCTCAACCGGTTGTTGAAAGGCAAAAAGGGAGAGCAGATGAATAGTGCTGTATTGCCAGAAATGGAGAATCAGGTTGCAGTTTCATCACTGTCTGCGGTAATCAAGTTTTTAGAACTCTTATCAGATGATTCCAACTTTGGACAGTTTGAACTGACTACTTTTGACTTCAGCCAGTATATGAAATTGGATATTGCAGCAGTCAGAGCCCTTAACCTTTTTCAGGGTTCTGTTGAAGATACCACTGGCTCTCAGTCTCTGGCTGCCTTGCTGAATAAGTGTAAAACCCCTCAAGGACAAAGACTTGTTAACCAGTGGATTAAGCAGCCTCTCATGGATAAGAACAGAATAGAGGAGAGATTGAATTTAGTGGAAGCTTTTGTAGAAGATGCAGAATTGAGGCAGACTTTACAAGAAGATTTACTTCGTCGATTCCCAGATCTTAACCGACTTGCCAAGAAGTTTCAAAGACAAGCAGCAAACTTACAAGATTGTTACCGACTCTATCAGGGTATAAATCAACTACCTAATGTTATACAGGCTCTGGAAAAACATGAAGGAAAACACCAGAAATTATTGTTGGCAGTTTTTGTGACTCCTCTTACTGATCTTCGTTCTGACTTCTCCAAGTTTCAGGAAATGATAGAAACAACTTTAGATATGGATCAGGTGGAAAACCATGAATTCCTTGTAAAACCTTCATTTGATCCTAATCTCAGTGAATTAAGAGAAATAATGAATGACTTGGAAAAGAAGATGCAGTCAACATTAATAAGTGCAGCCAGAGATCTTGGCTTGGACCCTGGCAAACAGATTAAACTGGATTCCAGTGCACAGTTTGGATATTACTTTCGTGTAACCTGTAAGGAAGAAAAAGTCCTTCGTAACAATAAAAACTTTAGTACTGTAGATATCCAGAAGAATGGTGTTAAATTTACCAACAGCAAATTGACTTCTTTAAATGAAGAGTATACCAAAAATAAAACAGAATATGAAGAAGCCCAGGATGCCATTGTTAAAGAAATTGTCAATATTTCTTCAGGCTATGTAGAACCAATGCAGACACTCAATGATGTGTTAGCTCAGCTAGATGCTGTTGTCAGCTTTGCTCACGTGTCAAATGGAGCACCTGTTCCATATGTACGACCAGCCATTTTGGAGAAAGGACAAGGAAGAATTATATTAAAAGCATCCAGGCATGCTTGTGTTGAAGTTCAAGATGAAATTGCATTTATTCCTAATGACGTATACTTTGAAAAAGATAAACAGATGTTCCACATCATTACTGGCCCCAATATGGGAGGTAAATCAACATATATTCGACAAACTGGGGTGATAGTACTCATGGCCCAAATTGGGTGTTTTGTGCCATGTGAGTCAGCAGAAGTGTCCATTGTGGACTGCATCTTAGCCCGAGTAGGGGCTGGTGACAGTCAATTGAAAGGAGTCTCCACGTTCATGGCTGAAATGTTGGAAACTGCTTCTATCCTCAGGTCTGCAACCAAAGATTCATTAATAATCATAGATGAATTGGGAAGAGGAACTTCTACCTACGATGGATTTGGGTTAGCATGGGCTATATCAGAATACATTGCAACAAAGATTGGTGCTTTTTGCATGTTTGCAACCCATTTTCATGAACTTACTGCCTTGGCCAATCAGATACCAACTGTTAATAATCTACATGTCACAGCACTCACCACTGAAGAGACCTTAACTATGCTTTATCAGGTGAAGAAAGGTGTCTGTGATCAAAGTTTTGGGATTCATGTTGCAGAGCTTGCTAATTTCCCTAAGCATGTAATAGAGTGTGCTAAACAGAAAGCCCTGGAACTTGAGGAGTTTCAGTATATTGGAGAATCGCAAGGATATGATATCATGGAACCAGCAGCAAAGAAGTGCTATCTGGAAAGAGAGCAAGGTGAAAAAATTATTCAGGAGTTCCTGTCCAAGGTGAAACAAATGCCCTTTACTGAAATGTCAGAAGAAAACATCACAATAAAGTTAAAACAGCTAAAAGCTGAAGTAATAGCAAAGAATAATAGCTTTGTAAATGAAATCATTTCACGAATAAAAGTTACTACGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P43246",
                "url": "http://purl.uniprot.org/uniprot/P43246",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": {
                "offset": 0,
                "identifier": "NP_000242.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NP_000242.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000050-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000050-a-1",
        "variant_count": 17746,
        "experiment": "urn:mavedb:00000050-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-i-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 72h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 72h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-i-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-i-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-i",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-07-10",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000003-b-2",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "These experiments measured the functional consequences of mutations in the BRCA1 RING domain, where most clinically-relevant single nucleotide variants reside. One goal of the study was to create a \"look-up table\" of single nucleotide variants for clinical use, by prospectively measuring the impact of all possible variants that are likely to appear in patients. The study combines two different assays - one based on E3 ubiquitin ligase activity and one based on BRCA1-BARD1 heterodimer formation - and makes one of the first attempts to combine data from different MAVEs on the same target.\r\n\r\nThis entry contains scores from the yeast two-hybrid assay, which tested the BRCA1-BARD1 heterodimer formation in BRCA1 variants.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. Counts for each variant were calculated as the sum of counts for all barcodes associated with a variant with the same amino acid sequence.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in hours.",
        "short_description": "Amino acid variant scores for deep mutational scan of the BRCA1 RING domain using yeast two-hybrid calculated by Enrich2.",
        "title": "Enrich2 amino acid variant scores for BRCA1 Y2H",
        "keywords": [
            {
                "text": "Yeast two-hybrid"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "BRCA1 RING domain",
            "reference_sequence": {
                "sequence": "GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000003-b-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Y2H_1_Rep1",
            "score_Y2H_1_Rep1",
            "SE_Y2H_1_Rep2",
            "score_Y2H_1_Rep2",
            "SE_Y2H_1_Rep3",
            "score_Y2H_1_Rep3",
            "SE_Y2H_2_Rep1",
            "score_Y2H_2_Rep1",
            "SE_Y2H_2_Rep2",
            "score_Y2H_2_Rep2",
            "SE_Y2H_2_Rep3",
            "score_Y2H_2_Rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "PlusE2NewRep3_c_0",
            "PlusE2NewRep3_c_1",
            "PlusE2NewRep3_c_2",
            "PlusE2NewRep3_c_3",
            "PlusE2NewRep3_c_4",
            "PlusE2NewRep3_c_5",
            "PlusE2NewRep4_c_0",
            "PlusE2NewRep4_c_1",
            "PlusE2NewRep4_c_2",
            "PlusE2NewRep4_c_3",
            "PlusE2NewRep4_c_4",
            "PlusE2NewRep4_c_5",
            "PlusE2NewRep5_c_0",
            "PlusE2NewRep5_c_1",
            "PlusE2NewRep5_c_2",
            "PlusE2NewRep5_c_3",
            "PlusE2NewRep5_c_4",
            "PlusE2NewRep5_c_5",
            "PlusE2Rep3_c_0",
            "PlusE2Rep3_c_1",
            "PlusE2Rep3_c_2",
            "PlusE2Rep3_c_3",
            "PlusE2Rep3_c_4",
            "PlusE2Rep3_c_5",
            "PlusE2Rep4_c_0",
            "PlusE2Rep4_c_1",
            "PlusE2Rep4_c_2",
            "PlusE2Rep4_c_3",
            "PlusE2Rep4_c_4",
            "PlusE2Rep4_c_5",
            "PlusE2Rep5_c_0",
            "PlusE2Rep5_c_1",
            "PlusE2Rep5_c_2",
            "PlusE2Rep5_c_3",
            "PlusE2Rep5_c_4",
            "PlusE2Rep5_c_5",
            "Y2H_1_Rep1_c_0",
            "Y2H_1_Rep1_c_18",
            "Y2H_1_Rep1_c_37",
            "Y2H_1_Rep1_c_45",
            "Y2H_1_Rep2_c_0",
            "Y2H_1_Rep2_c_18",
            "Y2H_1_Rep2_c_37",
            "Y2H_1_Rep2_c_45",
            "Y2H_1_Rep3_c_0",
            "Y2H_1_Rep3_c_18",
            "Y2H_1_Rep3_c_37",
            "Y2H_1_Rep3_c_45",
            "Y2H_2_Rep1_c_0",
            "Y2H_2_Rep1_c_16",
            "Y2H_2_Rep1_c_41",
            "Y2H_2_Rep1_c_64",
            "Y2H_2_Rep2_c_0",
            "Y2H_2_Rep2_c_16",
            "Y2H_2_Rep2_c_41",
            "Y2H_2_Rep2_c_64",
            "Y2H_2_Rep3_c_0",
            "Y2H_2_Rep3_c_16",
            "Y2H_2_Rep3_c_41",
            "Y2H_2_Rep3_c_64"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000003-b-2",
        "variant_count": 12316,
        "experiment": "urn:mavedb:00000003-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000027-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "1",
            "end": 155301864,
            "start": 155301395,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of PKLR promoter, 24h post-transfection in K562 cells.",
        "title": "Saturation mutagenesis MPRA of PKLR promoter, 24h",
        "keywords": [
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PKLR promoter",
            "reference_sequence": {
                "sequence": "TGCTTTCAGTGTGGGCCTGGGGCTGCGGGACCATGGAATGAGAGGGAGAGGATGACAAAACTGCTGGTCTTATCTAAGGGAGACAGAGAAGAGAAAAGGGGCACACCCAGTAGGCCACCCTGTCCCCACAGAATCCCTCCCCCAGAACGGCCTGCTCTCTGCCCTCATCTCCTGGCATTTCCTCTCATCCTTTTTTCCTGATAAATTTTCAATCCATTCATACTATCTGGTCATCCACGTGAATAGATATTTTTTTTTTGGCCAGTCATATGGCCCCATTTTCTTTGTACTTTACTGAAGTTAGCTCTAGTGAATCCAGGGAGCAGGGGCTGTAGGGTGGGGCTGGAGCCTGAAGAAAGACAAAAGGGATCACTGTGATAATATGGTGGGGGGAGGGTTACCCAGTTCTGACCACTTTTTTTCTCTGTCTCAACCAAGAAATGCAGAGTGCCTTCACCACTCTGTAACCT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000027-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000027-a-1",
        "variant_count": 1776,
        "experiment": "urn:mavedb:00000027-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2021-03-10",
        "modification_date": "2021-04-15",
        "urn": "urn:mavedb:00000062-a-1",
        "publish_date": "2021-04-15",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study tested the impact of CYP2C9 variants on the abundance of this protein in HEK293T. The authors fused the target protein with green fluorescent protein (GFP) and used the intensity of GPF as the indicator of target protein abundance.",
        "method_text": "After 5 days' growth, the cells were sorted to 4 bins by FACS according to the relative intensity of GFP and mCherry. The gate for each bin was set by the values of wildtype and known target protein variants, for example bin 1 represented protein expression less than 25%. Sorted cells for each bin were sequenced by Illumina and the frequency of each variant in the bin was determined. The protein abundance score for a variant was calculated as: the sum of weighted variant frequency in each bin (weight: 0.25, 0.5, 0.75 & 1 for bin1 to bin4) and divided by the sum of the un-weighted variant frequency in each bin.\r\nThe experiment was repeated 4 times and the final score was the mean of them.",
        "short_description": "This study tested the protein abundance of CYP2C9 variants by fusing green fluorescent protein.",
        "title": "Abundance of CYP2C9 variants",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1111/cts.12758",
                "url": "https://doi.org/10.1111/cts.12758",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32004414",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32004414",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CYP2C9",
            "reference_sequence": {
                "sequence": "TCTCTTGTGGTCCTTGTGCTCTGTCTCTCATGTTTGCTTCTCCTTTCACTCTGGAGACAGAGCTCTGGGAGAGGAAAACTCCCTCCTGGCCCCACTCCTCTCCCAGTGATTGGAAATATCCTACAGATAGGTATTAAGGACATCAGCAAATCCTTAACCAATCTCTCAAAGGTCTATGGCCCTGTGTTCACTCTGTATTTTGGCCTGAAACCCATAGTGGTGCTGCATGGATATGAAGCAGTGAAGGAAGCCCTGATTGATCTTGGAGAGGAGTTTTCTGGAAGAGGCATTTTCCCACTGGCTGAAAGAGCTAACAGAGGATTTGGAATTGTTTTCAGCAATGGAAAGAAATGGAAGGAGATCCGGCGTTTCTCCCTCATGACGCTGCGGAATTTTGGGATGGGGAAGAGGAGCATTGAGGACCGTGTTCAAGAGGAAGCCCGCTGCCTTGTGGAGGAGTTGAGAAAAACCAAGGCCTCACCCTGTGATCCCACTTTCATCCTGGGCTGTGCTCCCTGCAATGTGATCTGCTCCATTATTTTCCATAAACGTTTTGATTATAAAGATCAGCAATTTCTTAACTTAATGGAAAAGTTGAATGAAAACATCAAGATTTTGAGCAGCCCCTGGATCCAGATCTGCAATAATTTTTCTCCTATCATTGATTACTTCCCGGGAACTCACAACAAATTACTTAAAAACGTTGCTTTTATGAAAAGTTATATTTTGGAAAAAGTAAAAGAACACCAAGAATCAATGGACATGAACAACCCTCAGGACTTTATTGATTGCTTCCTGATGAAAATGGAGAAGGAAAAGCACAACCAACCATCTGAATTTACTATTGAAAGCTTGGAAAACACTGCAGTTGACTTGTTTGGAGCTGGGACAGAGACGACAAGCACAACCCTGAGATATGCTCTCCTTCTCCTGCTGAAGCACCCAGAGGTCACAGCTAAAGTCCAGGAAGAGATTGAACGTGTGATTGGCAGAAACCGGAGCCCCTGCATGCAAGACAGGAGCCACATGCCCTACACAGATGCTGTGGTGCACGAGGTCCAGAGATACATTGACCTTCTCCCCACCAGCCTGCCCCATGCAGTGACCTGTGACATTAAATTCAGAAACTATCTCATTCCCAAGGGCACAACCATATTAATTTCCCTGACTTCTGTGCTACATGACAACAAAGAATTTCCCAACCCAGAGATGTTTGACCCTCATCACTTTCTGGATGAAGGTGGCAATTTTAAGAAAAGTAAATACTTCATGCCTTTCTCAGCAGGAAAACGGATTTGTGTGGGAGAAGCCCTGGCCGGCATGGAGCTGTTTTTATTCCTGACCTCCATTTTACAGAACTTTAACCTGAAATCTCTGGTTGACCCAAAGAACCTTGACACCACTCCAGTTGTCAATGGATTTGCCTCTGTGCCGCCCTTCTACCAGCTGTGCTTCATTCCT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 2,
                "identifier": "P11712",
                "url": "http://purl.uniprot.org/uniprot/P11712",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000062-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "e1_score",
            "e2_score",
            "e3_score",
            "e4_score",
            "sd",
            "se",
            "lower_ci",
            "upper_ci"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000062-a-1",
        "variant_count": 109,
        "experiment": "urn:mavedb:00000062-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-10-04",
        "modification_date": "2019-10-22",
        "urn": "urn:mavedb:00000005-a-5",
        "publish_date": "2019-10-22",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee Sun et al 2018",
        "method_text": "Scoring procedure:\r\nDMS-TileSeq reads were processed using the tileseq_package and tileseqMave softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. \r\n\r\nSee Sun et al 2018 for more details.",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at low levels of Vitamin B6.",
        "title": "CBS low-B6",
        "keywords": [
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-5",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "nonselect1",
            "nonselect2",
            "nonselect3",
            "nonselect4",
            "nonselect5",
            "nonselect6",
            "nonselect7",
            "nonselect8",
            "select1",
            "select2",
            "select3",
            "select4",
            "select5",
            "select6",
            "select7",
            "select8",
            "controlNS1",
            "controlNS2",
            "controlNS3",
            "controlNS4",
            "controlNS5",
            "controlNS6",
            "controlNS7",
            "controlNS8",
            "controlS1",
            "controlS2",
            "controlS3",
            "controlS4",
            "controlS5",
            "controlS6",
            "controlS7",
            "controlS8"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000005-a-5",
        "variant_count": 11478,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000033-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "6",
            "end": 37808077,
            "start": 37807499,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of ZFAND3 enhancer in MIN6 cells.",
        "title": "Saturation mutagenesis MPRA of ZFAND3 enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ZFAND3 enhancer",
            "reference_sequence": {
                "sequence": "GTTCATGTTTCCCCCGTATGTGCGTGCGCATGCACATATATAAACACATCTAGCTTCACACCTGTGTACTCACACCTGTGTGTACACACTCCTCCTGTACATGTGTATACACGCACACCCTCCCACCAGCATGAGCAGTATGTGGATTTTTTCTTCCAGCAGCCCTTTCAAGAACATACACCAGATGGGATTGGGTCTGCTACATGACTGAGCAGCCCCCACATCAGAAAGTCATCATCATTGAATCCATCATGTGGTGCAGCCTTGGCAAGGGCACAGCTCCTCGTCCTGCCCAATGGGCCGTGGAGGAATGCCTCCTCCTCCAGAGCAGAGAGCAGATATGGGCAGGCAGTTGCCAGGCAGTTGAGCCGGGAGAACAGATGGCAGCTGCACAGCCTCCTTCTAGCCCAGTTTCCTGGCTGCCGGGGAAGGAGTTGCAGGCGGCAAGGATCCTGCAGAAACCGCCCTCCCTATAACAGGCCATCTTGCCCTCTATCCTCTTACTGGGCTTTGGAGGTCCCAATCCTGCCCTGGTTCACCACCTGTGTGGCTTACCAAGCTGTGCAACTTGGGGCAGGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000033-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000033-a-1",
        "variant_count": 2012,
        "experiment": "urn:mavedb:00000033-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-01-17",
        "urn": "urn:mavedb:00000057-d-1",
        "publish_date": "2021-01-17",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors used saturation mutagenesis to study the variant effect of Ras, on the oncogenic G12V background, without the regulation of GTPase activating protein (GAP) or guanine nucleotide exchange factor (GEF). The variants were selected by bacterial two-hybrid strategy.",
        "method_text": "The frequency of each mutant was determined through sequencing. The natural logarithm of the frequency ratio for each mutant after and before selection was calculated. These values were then normalized by dividing the wild type values which were calculated the same way. Although this is a single amino acid mutagenesis study in the background of G12V, the variants were annotated as double mutants with respect to wild type Ras.",
        "short_description": "Selection on the oncogenic G12V mutated type of Ras",
        "title": "G12V mutated Ras",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28686159",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28686159",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ras",
            "reference_sequence": {
                "sequence": "ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P01112",
                "url": "http://purl.uniprot.org/uniprot/P01112",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000057-d-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000057-d-1",
        "variant_count": 3300,
        "experiment": "urn:mavedb:00000057-d",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-17",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000009-a-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the impact of nearly all possible point mutations in the *SUL1* promoter, which is frequently amplified under sulfate-limited growth conditions. The results demonstrated that the optimal set of observed point mutations were able to increase organismal fitness by 11%, which is far below the fitness increases previously measured for amplification of *SUL1* (35% or higher). These experiments also revealed the fitness effects of creating new transcription factor binding sites in the existing promoter sequence.",
        "method_text": "Scores were calculated by converting barcode frequencies at each time point to log ratios between that round's frequency and the input frequency. The fitness score for each barcode was calculated as the slope of the ordinary least-squares regression for these ratios on the number of generations elapsed for the sample. The fitness scores were normalized by subtracting the wild type fitness score from each measurement. Read count cutoff for each variant was set at 50, which was heuristically determined.",
        "short_description": "Comprehensive analysis of the SUL1 promoter under sulfate limited conditions.",
        "title": "SUL1 promoter under sulfate limited conditions",
        "keywords": [
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "26936925",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/26936925",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SUL1 promoter",
            "reference_sequence": {
                "sequence": "CGCCACCTCGAGTGCACTTTTTTTAATAAAGATCTCGTGTAATTGTCCAAATCTGACTTTTTCTTATAGTCTCGCTGGAACCACAGTGCGGCTTTGCAATTTTGCAAATCGGAATTTGAGTCACAGATCCCAGAAAAACTCCACACCTTCCCCACGCAGCAAGCGATAACGAACAAGTTGTCAAATTAGACCCATAATAATTTTGAACACTTCTACCTGTTCATGTCTTTTCTCGAACACTGTCATTTGAAATTATGCACTGTGAAAAAAAGAAACAAAGACCAAAAGAATAATATAAATAGTGAAGTAAAATGTGTTGTAATGCACATGGATCTTGTACTGCTCAAACTTAATATTTCTATTGTAGAAAAATTTTCGATTTAAAATTGTGAAACCGATTATATAAAAGTATATTAGCTGACATTAACGTCTCAAAACCAGGTCAATAGCTTTAAAAATAAAAATAAATCCCTGCAGAATACTCGGAAAGAAT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000009-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "r_sq",
            "slope",
            "barcode.count",
            "scored.unique.barcodes"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "count.0",
            "count.6",
            "count.10",
            "count.12",
            "count.17",
            "count.23",
            "count.37",
            "count.40"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000009-a-1",
        "variant_count": 125032,
        "experiment": "urn:mavedb:00000009-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-6",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 25ug/ml folate in WT background",
        "title": "MTHFR at 25ug/ml folate in WT background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-6",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-6",
        "variant_count": 13704,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-09-30",
        "modification_date": "2019-10-08",
        "urn": "urn:mavedb:00000043-a-2",
        "publish_date": "2019-10-08",
        "created_by": "0000-0001-7684-5841",
        "modified_by": "0000-0001-7684-5841",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "The DMS library was encoded in the retroviral expression system pMX-GW-PGK-PuroR-GFP. Each codon for amino acid positions 488-516 of the human TpoR protein, encompassing the TM and partial JM regions, was independently randomised using degenerate (NNN) primers (IDT) to encode all 64 possible codons. Plasmid preparations for all positions (488-516) were pooled together in equal amounts to obtain a library containing all 1,856 possible DNA variants. The frequencies of variants in each library were determined by Illumina sequencing, which confirmed 100% representation across the randomised region.\r\n\r\nThe library was transfected into HEK293T cells along with retroviral packaging vectors using calcium phosphate transfection. After 48 hrs, supernatants containing virus were harvested and sterile filtered for transduction into Ba/F3 cells.  The pooled retroviral library was used to transduce 10^6 Ba/F3 cells at a multiplicity of infection ~0.1, generating ~10^5 transductants for each of six biological replicates. These six cultures were treated with 5 μg/ml puromycin (on IL-3) for 48 hours to yield pure virus-positive cells. Cells for each replicate were split in half and cultured in the continued presence of IL-3, or were washed to remove IL-3, and subjected to another 48 hours culture. mRNA was prepared from the live cells that remained at the end of this procedure.\r\n\r\ncDNA was prepared from 1 μg of total RNA using the TpoR-specific reverse transcription primer containing a 16 bp unique molecular identifier (UMI) and an Illumina adapter. cDNA was amplified to add illumina adapters and indexes for sequencing using an Illumina NextSeq kit with 140 cycles in the forward direction and 160 cycles in the reverse direction.\r\n\r\nThe paired-end reads from Illumina sequencing runs were separated into samples based on Illumina index sequences using Cutadapt v1.157. De-duplication based on the UMI was performed using UMI Tools v1.15 after sample separation. Reads were trimmed to the region of interest and filtered for length using Cutadapt prior to analysis with Enrich2 v1.2.0. \r\n\r\nTo determine mutations that confer increased activity on the S505N background six replicates of library-containing BaF3 cells grown in IL-3 were compared to six replicates of library-containing BaF3 cells grown without growth factors.  Log ratio enrichment scores were calculated using “wild-type” (all synonymous DNA sequences encoding the WT amino acid sequence) count normalisation.",
        "short_description": "This screen was designed to identify all single-amino-acid substitutions in the human TpoR transmembrane (TM) and partial juxtamembrane (JM) regions (488-516) that enhance the known S505N TpoR transmembrane domain mutation that causes receptor constitutive activity.",
        "title": "Novel Modifiers of MPL-dependent Oncogenic Transformation",
        "keywords": [
            {
                "text": "Thrombopoietin Receptor"
            },
            {
                "text": "Transmembrane"
            },
            {
                "text": "Enrich2"
            },
            {
                "text": "mRNA"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-7684-5841"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "S505N MPL",
            "reference_sequence": {
                "sequence": "ACCGAGACCGCCTGGATCTCCTTGGTGACCGCTCTGCATCTAGTGCTGGGCCTCAACGCCGTCCTGGGCCTGCTGCTGCTGAGGTGGCAGTTT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 486,
                "identifier": "P40238",
                "url": "http://purl.uniprot.org/uniprot/P40238",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000043-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Replicate_1",
            "score_Replicate_1",
            "SE_Replicate_2",
            "score_Replicate_2",
            "SE_Replicate_3",
            "score_Replicate_3",
            "SE_Replicate_4",
            "score_Replicate_4",
            "SE_Replicate_5",
            "score_Replicate_5",
            "SE_Replicate_6",
            "score_Replicate_6"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Replicate_1_c_0",
            "Replicate_1_c_1",
            "Replicate_2_c_0",
            "Replicate_2_c_1",
            "Replicate_3_c_0",
            "Replicate_3_c_1",
            "Replicate_4_c_0",
            "Replicate_4_c_1",
            "Replicate_5_c_0",
            "Replicate_5_c_1",
            "Replicate_6_c_0",
            "Replicate_6_c_1"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000043-a-2",
        "variant_count": 1966,
        "experiment": "urn:mavedb:00000043-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-10-04",
        "modification_date": "2019-10-22",
        "urn": "urn:mavedb:00000005-a-6",
        "publish_date": "2019-10-22",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee Sun et al 2018",
        "method_text": "Scoring procedure:\r\nDMS-TileSeq reads were processed using the tileseq_package and tileseqMave softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. \r\n\r\nSee Sun et al 2018 for more details.",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at high levels of Vitamin B6.",
        "title": "CBS high-B6",
        "keywords": [
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-6",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "nonselect1",
            "nonselect2",
            "nonselect3",
            "nonselect4",
            "select1",
            "select2",
            "select3",
            "select4",
            "controlNS1",
            "controlNS2",
            "controlNS3",
            "controlNS4",
            "controlS1",
            "controlS2",
            "controlS3",
            "controlS4"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000005-a-6",
        "variant_count": 10802,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-11-25",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000004-a-3",
        "publish_date": "2018-12-03",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study identified multiple gain-of-function mutations in the ubiquitination factor E4B U-box domain by measuring auto-ubiquitination in a phage display system. E4B is an E3 ligase, responsible for transferring a ubiquitin from an E2 ligase to the target (in this case the phage displaying E4B). Many of these mutations are not specific to one E2 enzyme and so may be generalizable for mutiple E2s and E3s.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. In contrast to the originally reported scores, these scores use all rounds of phage display instead of only the input/last round to calculate scores.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in rounds.",
        "short_description": "Amino acid variant scores for deep mutational scan of the E4B U-box domain using phage display calculated by Enrich2.",
        "title": "Enrich2 amino acid variant scores for E4B",
        "keywords": [
            {
                "text": "U-box"
            },
            {
                "text": "E3"
            },
            {
                "text": "Phage display"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23509263",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23509263",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "E4B",
            "reference_sequence": {
                "sequence": "ATAGAGAAGTTTAAACTTCTTGCAGAGAAAGTGGAGGAAATCGTGGCAAAGAATGCGCGGGCAGAAATAGACTACAGCGATGCCCCGGACGAGTTCAGAGACCCTCTGATGGACACCCTGATGACCGATCCCGTGAGACTGCCCTCTGGCACCGTCATGGACCGTTCTATCATCCTGCGGCATCTGCTCAACTCCCCCACCGACCCCTTCAACCGCCAGATGCTGACTGAGAGCATGCTGGAGCCAGTGCCAGAGCTAAAGGAGCAGATTCAGGCCTGGATGAGAGAGAAACAGAGCAGTGACCACTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1071,
                "identifier": "Q9ES00",
                "url": "http://purl.uniprot.org/uniprot/Q9ES00",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": {
                "offset": 3939,
                "identifier": "NM_022022.3",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_022022.3",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "mm10",
                        "organism_name": "Mus musculus",
                        "assembly_identifier": {
                            "identifier": "GCF_000001635.20",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000004-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Rep_2",
            "score_Rep_2",
            "SE_Rep_3",
            "score_Rep_3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Rep_2_c_0",
            "Rep_2_c_1",
            "Rep_2_c_2",
            "Rep_2_c_3",
            "Rep_3_c_0",
            "Rep_3_c_1",
            "Rep_3_c_2",
            "Rep_3_c_3"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000004-a-3",
        "variant_count": 96991,
        "experiment": "urn:mavedb:00000004-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-1",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 100ug/ml folate in WT background",
        "title": "MTHFR at 100ug/ml folate in WT background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-1",
        "variant_count": 13704,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000028-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "10",
            "end": 43087078,
            "start": 43086479,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of RET enhancer in Neuro-2a cells.",
        "title": "Saturation mutagenesis MPRA of RET enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RET enhancer",
            "reference_sequence": {
                "sequence": "CAGAGGCACCAGGGTCAAAGCCAGTGGTGATGCCCTGGCCCCGTTGCCCCAGGCCAGGGCCAGTGAACAATGTAATCAGCTGGGGCAGACTCTACAGCCCTGCAGCCAAGGGGGCCAGTGACCCTTACATGGTCATCCACAGGCCACTTGGGTGGCCAGTCCTGTTCAGCCAGGCCTTGCCCTAGGAAAGAAATTAATTATAACCTAATTGGCAGTTTCCTTTGCATAGAAGCCGGAAGCAACTGCCAGTGAGGCTGGTGATTAACTCTGCAGCAGCTGGGAAATTGCAGTTGGGCAGGAGCGCCCATCATCCTGGCCAGGCCGCTGCAGCTGGTCTGGGTATGGAAGTGTGGGTGGTGGCCATCGTGCAGCTTAGGGCCTGGGCCCCTCAGAGCAGAAGGCTGGGTCTGTGTGCAGAAGGTAGCCTTGGGCTGCCAGGTCCCCCAGTGCCCAGTTGCGGACCTCCTTCTCCCAAACCTCAGCCATGCCTGGACCCCTTCCCCACTCACGGCAGAAGGCTGCATGGCTTTTAGGGGTGTCAGGAGGCTCATGTCCTGGTTGCAGTTCTACCATGGGTGTGCAGCGGGAATTCTGGGCTTC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000028-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000028-a-1",
        "variant_count": 1962,
        "experiment": "urn:mavedb:00000028-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2018-06-26",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-a-4",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-BarSeq and DMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, Barseq read counts were used to establish relative frequencies of each strain at each timepoint and converted to estimates of absolute frequencies using OD measurement data. Absolute counts were used to establish growth curves from which fitness parameters were estimated and then normalized to 0-1 scale where 0 corresponds to null controls and 1 corresponds to WT controls. Meanwhile, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then rescaled to the distribution of the BarSeq fitness scores. Fitness scores were joined using confidence-weighted averages. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.",
        "short_description": "A joint Deep Mutational Scan dataset of the human SUMO E2 conjugase UBE2I using functional complementation in yeast, combining DMS-BarSeq and DMS-TileSeq",
        "title": "UBE2I joint data",
        "keywords": [
            {
                "text": "DMS-BarSeq"
            },
            {
                "text": "E2"
            },
            {
                "text": "sumoylation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "UBE2I",
            "reference_sequence": {
                "sequence": "ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P63279",
                "url": "http://purl.uniprot.org/uniprot/P63279",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000103275",
                "url": "http://www.ensembl.org/id/ENSG00000103275",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 159,
                "identifier": "NM_003345",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_003345",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se",
            "df"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-a-4",
        "variant_count": 10438,
        "experiment": "urn:mavedb:00000001-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-2",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 24 hours of selection.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, SEL_A_24h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "Yeast two-hybrid"
            },
            {
                "text": "Binding"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-2",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000029-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "1",
            "end": 109275251,
            "start": 109274652,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of SORT1 enhancer in HepG2 cells. Biological replicate 1 of 2.",
        "title": "Saturation mutagenesis MPRA of SORT1 enhancer, replicate 1",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SORT1 enhancer",
            "reference_sequence": {
                "sequence": "GAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGCCCCTGGACCCCCAAATGCTGCATGAACACATTTTCAGGGGAGCCTGTGCCCCCAGGCGGGGGTCGGGCAGCCCCAGCCCCTCTCCTTTTCCTGGACTCTGGCCGTGCGCGGCAGCCCAGGTGTTTGCTCAGTTGCTGACCCAAAAGTGCTTCATTTTTCGTGCCCGCCCCGCGCCCCGGGCAGGCCAGTCATGTGTTAAGTTGCGCTTCTTTGCTGTGATGTGGGTGGGGGAGGAAGAGTAAACACAGTGCTGGCTCGGCTGCCCTGAGGGTGCTCAATCAAGCACAGGTTTCAAGTCTGGGTTCTGGTGTCCACTCACCCACCCCACCCCCCAAAATCAGACAAATGCTACTTTGTCTAACCTGCTGTGGCCTCTGAGACATGTTCTATTTTTAACCCCTTCTTGGAATTGGCTCTCTTCTTCAAAGGACCAGGTCCTGTTCCTCTTTCTCCCCGACTCCACCCCAGCTCCCTGTGAAGAGAGAGTTAATATATTTGTTTTATTTATTTGCTTTTTGTGTTGGGATGGGTTCGTGTCCAGTCCCGGGGGTCTG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000029-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000029-a-1",
        "variant_count": 1927,
        "experiment": "urn:mavedb:00000029-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-18",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000013-b-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "abundance_codes": {
                "low": 1,
                "wt-like": 4,
                "possibly_low": 2,
                "possibly_wt-like": 3
            }
        },
        "abstract_text": "This study demonstrated variant abundance by massively parallel sequencing (VAMP-seq), a technique for measuring the effect of protein variants on abundance using fluorescence. VAMP-seq is a generally-applicable assay for protein stability that can identify loss-of-function variants.",
        "method_text": "Barcodes for each bin were counted and associated with variant sequences using Enrich2. Scores were calculated based on the frequency of each variant in each bin using a weighted average such that increased abundance in high-signal bins gives a higher score. Abundance scores were calculated based on a min-max normalization using wild type (score of 1) and the average nonsense variant score (score of 0). The scores reported are the average of the eight replicate scores. Confidence intervals and variance estimates are based on the replicate scores.\r\n\r\nMetadata contains the mapping between abundance classes and the integer values found in the database.",
        "short_description": "Amino acid scores for variant abundance by massively parallel sequencing (VAMP-seq) applied to TPMT.",
        "title": "TMPT VAMP-seq",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "Enrich2"
            },
            {
                "text": "VAMP-seq"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29785012",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29785012",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TPMT",
            "reference_sequence": {
                "sequence": "ATGGATGGTACAAGAACTTCACTTGACATTGAAGAGTACTCGGATACTGAGGTACAGAAAAACCAAGTACTAACTCTGGAAGAATGGCAAGACAAGTGGGTGAACGGCAAGACTGCTTTTCATCAGGAACAAGGACATCAGCTATTAAAGAAGCATTTAGATACTTTCCTTAAAGGCAAGAGTGGACTGAGGGTATTTTTTCCTCTTTGCGGAAAAGCGGTTGAGATGAAATGGTTTGCAGACCGGGGACACAGTGTAGTTGGTGTGGAAATCAGTGAACTTGGGATACAAGAATTTTTTACAGAGCAGAATCTTTCTTACTCAGAAGAACCAATCACCGAAATTCCTGGAACCAAAGTATTTAAGAGTTCTTCGGGGAACATTTCATTGTACTGTTGCAGTATTTTTGATCTTCCCAGGACAAATATTGGCAAATTTGACATGATTTGGGATAGAGGAGCATTAGTTGCCATTAATCCAGGTGATCGCAAATGCTATGCAGATACAATGTTTTCCCTCCTGGGAAAGAAGTTTCAGTATCTCCTGTGTGTTCTTTCTTATGATCCAACTAAACATCCAGGTCCACCATTTTATGTTCCACATGCTGAAATTGAAAGGTTGTTTGGTAAAATATGCAATATACGTTGTCTTGAGAAGGTTGATGCTTTTGAAGAACGACATAAAAGTTGGGGAATTGACTGTCTTTTTGAAAAGTTATATCTACTTACAGAAAAGTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P51580",
                "url": "http://purl.uniprot.org/uniprot/P51580",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000013-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "expts",
            "se",
            "lower_ci",
            "upper_ci",
            "score1",
            "score2",
            "score3",
            "score4",
            "score5",
            "score6",
            "score7",
            "score8",
            "median_w_ave",
            "exp1_w_ave",
            "exp2_w_ave",
            "exp3_w_ave",
            "exp4_w_ave",
            "exp5_w_ave",
            "exp6_w_ave",
            "exp7_w_ave",
            "exp8_w_ave",
            "snv",
            "abundance_class"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000013-b-1",
        "variant_count": 4028,
        "experiment": "urn:mavedb:00000013-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-09-30",
        "modification_date": "2019-10-08",
        "urn": "urn:mavedb:00000043-a-1",
        "publish_date": "2019-10-08",
        "created_by": "0000-0001-7684-5841",
        "modified_by": "0000-0001-7684-5841",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "The DMS library was encoded in the retroviral expression system pMX-GW-PGK-PuroR-GFP. Each codon for amino acid positions 488-516 of the human TpoR protein, encompassing the TM and partial JM regions, was independently randomised using degenerate (NNN) primers (IDT) to encode all 64 possible codons. Plasmid preparations for all positions (488-516) were pooled together in equal amounts to obtain a library containing all 1,856 possible DNA variants. The frequencies of variants in each library were determined by Illumina sequencing, which confirmed 100% representation across the randomised region and constitutes the unselected control sample.\r\n\r\nThe library was transfected into HEK293T cells along with retroviral packaging vectors using calcium phosphate transfection. After 48 hrs, supernatants containing virus were harvested and sterile filtered for transduction into Ba/F3 cells.  The pooled retroviral library was used to transduce 10^6 Ba/F3 cells at a multiplicity of infection ~0.1, generating ~10^5 transductants for each of six biological replicates. These six cultures were treated with 5 μg/ml puromycin (on IL-3) for 48 hours to yield pure virus-positive cells. Cells for each replicate were split in half and cultured in the continued presence of IL-3, or were washed to remove IL-3, and subjected to another 48 hours culture. mRNA was prepared from the live cells that remained at the end of this procedure.\r\n\r\ncDNA was prepared from 1 μg of total RNA using the TpoR-specific reverse transcription primer containing a 16 bp unique molecular identifier (UMI) and an Illumina adapter. cDNA was amplified to add illumina adapters and indexes for sequencing using an Illumina NextSeq kit with 140 cycles in the forward direction and 160 cycles in the reverse direction.\r\n\r\nThe paired-end reads from Illumina sequencing runs were separated into samples based on Illumina index sequences using Cutadapt v1.15. De-duplication based on the UMI was performed using UMI Tools v1.15 after sample separation. Reads were trimmed to the region of interest and filtered for length using Cutadapt7 prior to analysis with Enrich2 v1.2.0. \r\n\r\nTo determine mutations that confer constitutive activity log ratio enrichment scores were calculated for each of the six replicates by comparing the variant counts in the unselected plasmid library with those remaining in Ba/F3 cells after 2 days of culture in growth factor free media using “wild-type” (all synonymous DNA sequences encoding the WT amino acid sequence) count normalisation.",
        "short_description": "The screen was designed to identify all single-amino-acid substitutions in the human TpoR transmembrane (TM) and partial juxtamembrane (JM) regions (488-516) that confer factor-free growth.",
        "title": "Novel Drivers of MPL-dependent Oncogenic Transformation",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-7684-5841"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MPL",
            "reference_sequence": {
                "sequence": "ACCGAGACCGCCTGGATCTCCTTGGTGACCGCTCTGCATCTAGTGCTGGGCCTCAGCGCCGTCCTGGGCCTGCTGCTGCTGAGGTGGCAGTTT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 486,
                "identifier": "P40238",
                "url": "http://purl.uniprot.org/uniprot/P40238",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000043-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Replicate_A",
            "score_Replicate_A",
            "SE_Replicate_B",
            "score_Replicate_B",
            "SE_Replicate_C",
            "score_Replicate_C",
            "SE_Replicate_D",
            "score_Replicate_D",
            "SE_Replicate_E",
            "score_Replicate_E",
            "SE_Replicate_F",
            "score_Replicate_F"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Replicate_A_c_0",
            "Replicate_A_c_1",
            "Replicate_B_c_0",
            "Replicate_B_c_1",
            "Replicate_C_c_0",
            "Replicate_C_c_1",
            "Replicate_D_c_0",
            "Replicate_D_c_1",
            "Replicate_E_c_0",
            "Replicate_E_c_1",
            "Replicate_F_c_0",
            "Replicate_F_c_1"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000043-a-1",
        "variant_count": 2415,
        "experiment": "urn:mavedb:00000043-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-7",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the TEF promoter, no 3'UTR added.",
        "title": "Deep mutational scan of HSP90, TEFdter construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-7",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-7",
        "variant_count": 184,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-d-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 8h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 8h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-d-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-d-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-d",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 24 hours of nonselective growth.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, NONSEL_24h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "DNA-binding"
            },
            {
                "text": "Yeast two-hybrid"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-1",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-26",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-a-3",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using [tileseq_package](https://bitbucket.org/rothlabto/tileseq_package) and [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were normalized to a 0-1 scale, where 0 corresponds to the median nonsense variant score and 1 corresponds to the median synonymous score.\r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.",
        "short_description": "A Deep Mutational Scan of the human SUMO E2 conjugase UBE2I using functional complementation in yeast via DMS-TileSeq.",
        "title": "UBE2I DMS-TileSeq",
        "keywords": [
            {
                "text": "E2"
            },
            {
                "text": "sumoylation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "UBE2I",
            "reference_sequence": {
                "sequence": "ATGTCGGGGATCGCCCTCAGCAGACTCGCCCAGGAGAGGAAAGCATGGAGGAAAGACCACCCATTTGGTTTCGTGGCTGTCCCAACAAAAAATCCCGATGGCACGATGAACCTCATGAACTGGGAGTGCGCCATTCCAGGAAAGAAAGGGACTCCGTGGGAAGGAGGCTTGTTTAAACTACGGATGCTTTTCAAAGATGATTATCCATCTTCGCCACCAAAATGTAAATTCGAACCACCATTATTTCACCCGAATGTGTACCCTTCGGGGACAGTGTGCCTGTCCATCTTAGAGGAGGACAAGGACTGGAGGCCAGCCATCACAATCAAACAGATCCTATTAGGAATACAGGAACTTCTAAATGAACCAAATATCCAAGACCCAGCTCAAGCAGAGGCCTACACGATTTACTGCCAAAACAGAGTGGAGTACGAGAAAAGGGTCCGAGCACAAGCCAAGAAGTTTGCGCCCTCATAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P63279",
                "url": "http://purl.uniprot.org/uniprot/P63279",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000103275",
                "url": "http://www.ensembl.org/id/ENSG00000103275",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 159,
                "identifier": "NM_003345",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_003345",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se",
            "df"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-a-3",
        "variant_count": 7390,
        "experiment": "urn:mavedb:00000001-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-09-05",
        "modification_date": "2020-09-05",
        "urn": "urn:mavedb:00000046-a-1",
        "publish_date": "2020-09-05",
        "created_by": "0000-0001-7684-5841",
        "modified_by": "0000-0001-7684-5841",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "HeLa cells were transduced with the CD86 library containing a GFP reporter and GFP positive cells isolated by FACS.  This population was stained for cell surface expression of CD86 and sorted for CD86 positive cells.  Variant ratios between CD86+ and GFP+ populations were calculated in Enrich2 using Enrich2 Log ratios with wild-type normalisation.",
        "short_description": "Library pre-screened for variants that support expression of CD86 at the cell surface.",
        "title": "CD86 Surface Expression",
        "keywords": [
            {
                "text": "MIR2"
            },
            {
                "text": "CD86"
            },
            {
                "text": "Flow Cytometry"
            },
            {
                "text": "DMS"
            },
            {
                "text": "mRNA"
            },
            {
                "text": "MARCH1"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-7684-5841"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CD86",
            "reference_sequence": {
                "sequence": "GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 243,
                "identifier": "P42081",
                "url": "http://purl.uniprot.org/uniprot/P42081",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000046-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Replicate_1",
            "score_Replicate_1",
            "SE_Replicate_2",
            "score_Replicate_2",
            "SE_Replicate_3",
            "score_Replicate_3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Replicate_1_c_0",
            "Replicate_1_c_1",
            "Replicate_2_c_0",
            "Replicate_2_c_1",
            "Replicate_3_c_0",
            "Replicate_3_c_1"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000046-a-1",
        "variant_count": 32347,
        "experiment": "urn:mavedb:00000046-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-6",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the TEF promoter with CYC 3'UTR.",
        "title": "Deep mutational scan of HSP90, TEF construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-6",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-6",
        "variant_count": 187,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-c-2",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Random-Forest-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A machine-learning imputed and refined Deep Mutational Scan of human Calmodulin using functional complementation in yeast.",
        "title": "Human Calmodulin imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CALM1",
            "reference_sequence": {
                "sequence": "ATGGCTGATCAGCTGACCGAAGAACAGATTGCTGAATTCAAGGAAGCCTTCTCCCTATTTGATAAAGATGGCGATGGCACCATCACAACAAAGGAACTTGGAACTGTCATGAGGTCACTGGGTCAGAACCCAACAGAAGCTGAATTGCAGGATATGATCAATGAAGTGGATGCTGATGGTAATGGCACCATTGACTTCCCCGAATTTTTGACTATGATGGCTAGAAAAATGAAAGATACAGATAGTGAAGAAGAAATCCGTGAGGCATTCCGAGTCTTTGACAAGGATGGCAATGGTTATATCAGTGCAGCAGAACTACGTCACGTCATGACAAACTTAGGAGAAAAACTAACAGATGAAGAAGTAGATGAAATGATCAGAGAAGCAGATATTGATGGAGACGGACAAGTCAACTATGAAGAATTCGTACAGATGATGACTGCAAAATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P0DP23",
                "url": "http://purl.uniprot.org/uniprot/P0DP23",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000198668",
                "url": "http://www.ensembl.org/id/ENSG00000198668",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 833,
                "identifier": "NM_001363670.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_001363670.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-c-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se",
            "exp.score",
            "exp.sd",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-c-2",
        "variant_count": 2980,
        "experiment": "urn:mavedb:00000001-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-d-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Melatonin",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-d-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-d-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-d",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-07-10",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000003-a-2",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "These experiments measured the functional consequences of mutations in the BRCA1 RING domain, where most clinically-relevant single nucleotide variants reside. One goal of the study was to create a \"look-up table\" of single nucleotide variants for clinical use, by prospectively measuring the impact of all possible variants that are likely to appear in patients. The study combines two different assays - one based on E3 ubiquitin ligase activity and one based on BRCA1-BARD1 heterodimer formation - and makes one of the first attempts to combine data from different MAVEs on the same target.\r\n\r\nThis entry contains scores from the phage autoubiquitination assay, which tested the E3 ubiquitin ligase activity of BRCA1 variants.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. Counts for each variant were calculated as the sum of counts for all barcodes associated with a variant with the same amino acid sequence.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in rounds.",
        "short_description": "Amino acid variant scores for deep mutational scan of the BRCA1 RING domain using autoubiquitination calculated by Enrich2.",
        "title": "Enrich2 amino acid variant scores for BRCA1 E3",
        "keywords": [
            {
                "text": "Phage display"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "BRCA1 RING domain",
            "reference_sequence": {
                "sequence": "GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000003-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_PlusE2NewRep3",
            "score_PlusE2NewRep3",
            "SE_PlusE2NewRep4",
            "score_PlusE2NewRep4",
            "SE_PlusE2NewRep5",
            "score_PlusE2NewRep5",
            "SE_PlusE2Rep3",
            "score_PlusE2Rep3",
            "SE_PlusE2Rep4",
            "score_PlusE2Rep4",
            "SE_PlusE2Rep5",
            "score_PlusE2Rep5"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "PlusE2NewRep3_c_0",
            "PlusE2NewRep3_c_1",
            "PlusE2NewRep3_c_2",
            "PlusE2NewRep3_c_3",
            "PlusE2NewRep3_c_4",
            "PlusE2NewRep3_c_5",
            "PlusE2NewRep4_c_0",
            "PlusE2NewRep4_c_1",
            "PlusE2NewRep4_c_2",
            "PlusE2NewRep4_c_3",
            "PlusE2NewRep4_c_4",
            "PlusE2NewRep4_c_5",
            "PlusE2NewRep5_c_0",
            "PlusE2NewRep5_c_1",
            "PlusE2NewRep5_c_2",
            "PlusE2NewRep5_c_3",
            "PlusE2NewRep5_c_4",
            "PlusE2NewRep5_c_5",
            "PlusE2Rep3_c_0",
            "PlusE2Rep3_c_1",
            "PlusE2Rep3_c_2",
            "PlusE2Rep3_c_3",
            "PlusE2Rep3_c_4",
            "PlusE2Rep3_c_5",
            "PlusE2Rep4_c_0",
            "PlusE2Rep4_c_1",
            "PlusE2Rep4_c_2",
            "PlusE2Rep4_c_3",
            "PlusE2Rep4_c_4",
            "PlusE2Rep4_c_5",
            "PlusE2Rep5_c_0",
            "PlusE2Rep5_c_1",
            "PlusE2Rep5_c_2",
            "PlusE2Rep5_c_3",
            "PlusE2Rep5_c_4",
            "PlusE2Rep5_c_5",
            "Y2H_1_Rep1_c_0",
            "Y2H_1_Rep1_c_18",
            "Y2H_1_Rep1_c_37",
            "Y2H_1_Rep1_c_45",
            "Y2H_1_Rep2_c_0",
            "Y2H_1_Rep2_c_18",
            "Y2H_1_Rep2_c_37",
            "Y2H_1_Rep2_c_45",
            "Y2H_1_Rep3_c_0",
            "Y2H_1_Rep3_c_18",
            "Y2H_1_Rep3_c_37",
            "Y2H_1_Rep3_c_45",
            "Y2H_2_Rep1_c_0",
            "Y2H_2_Rep1_c_16",
            "Y2H_2_Rep1_c_41",
            "Y2H_2_Rep1_c_64",
            "Y2H_2_Rep2_c_0",
            "Y2H_2_Rep2_c_16",
            "Y2H_2_Rep2_c_41",
            "Y2H_2_Rep2_c_64",
            "Y2H_2_Rep3_c_0",
            "Y2H_2_Rep3_c_16",
            "Y2H_2_Rep3_c_41",
            "Y2H_2_Rep3_c_64"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000003-a-2",
        "variant_count": 12316,
        "experiment": "urn:mavedb:00000003-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-20",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-7",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 200ug/ml folate in WT background",
        "title": "MTHFR at 200ug/ml folate in WT background",
        "keywords": [
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-7",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-7",
        "variant_count": 12538,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-01-29",
        "modification_date": "2021-01-29",
        "urn": "urn:mavedb:00000060-a-2",
        "publish_date": "2021-01-29",
        "created_by": "0000-0002-9712-9163",
        "modified_by": "0000-0002-9712-9163",
        "extra_metadata": {},
        "abstract_text": "Insoluble protein aggregates are the hallmarks of many neurodegenerative diseases. For example, aggregates of TDP-43 occur in nearly all cases of amyotrophic lateral sclerosis (ALS). However, whether aggregates cause cellular toxicity is still not clear, even in simpler cellular systems. We reasoned that deep mutagenesis might be a powerful approach to disentangle the relationship between aggregation and toxicity. We generated >50,000 mutations in the prion-like domain (PRD) of TDP-43 and quantified their toxicity in yeast cells. Surprisingly, mutations that increase hydrophobicity and aggregation strongly decrease toxicity. In contrast, toxic variants promote the formation of dynamic liquid-like condensates. Mutations have their strongest effects in a hotspot that genetic interactions reveal to be structured in vivo, illustrating how mutagenesis can probe the in vivo structures of unstructured proteins. Our results show that aggregation of TDP-43 is not harmful but protects cells, most likely by titrating the protein away from a toxic liquid-like phase.",
        "method_text": "Variants for TDP-43 were constructed using a doped oligo for each of the TDP-43 libraries (290-331 and 332-373). Libraries were transformed in Saccharomyces cerevisiae and TDP-43 expression was induced for 5-6 generations. Variant counts were processed with DiMSum (Faure et al. 2020) to obtain a toxicity score for each variant. See Bolognesi et al. 2019 for details.",
        "short_description": "Deep mutational scanning of TDP-43 prion-like domain (AA 332-373) using a toxicity assay",
        "title": "TDP-43 toxicity",
        "keywords": [
            {
                "text": "prion-like domain"
            },
            {
                "text": "TDP-43"
            },
            {
                "text": "toxicity"
            },
            {
                "text": "intrinsic disorder"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-12101-z",
                "url": "https://doi.org/10.1038/s41467-019-12101-z",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-9712-9163",
            "0000-0002-6632-947X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TARDBP",
            "reference_sequence": {
                "sequence": "ATGTCTGAATATATTCGGGTAACCGAAGATGAGAACGATGAGCCCATTGAAATACCATCGGAAGACGATGGGACGGTGCTGCTCTCCACGGTTACAGCCCAGTTTCCAGGGGCGTGTGGGCTTCGCTACAGGAATCCAGTGTCTCAGTGTATGAGAGGTGTCCGGCTGGTAGAAGGAATTCTGCATGCCCCAGATGCTGGCTGGGGAAATCTGGTGTATGTTGTCAACTATCCAAAAGATAACAAAAGAAAAATGGATGAGACAGATGCTTCATCAGCAGTGAAAGTGAAAAGAGCAGTCCAGAAAACATCCGATTTAATAGTGTTGGGTCTCCCATGGAAAACAACCGAACAGGACCTGAAAGAGTATTTTAGTACCTTTGGAGAAGTTCTTATGGTGCAGGTCAAGAAAGATCTTAAGACTGGTCATTCAAAGGGGTTTGGCTTTGTTCGTTTTACGGAATATGAAACACAAGTGAAAGTAATGTCACAGCGACATATGATAGATGGACGATGGTGTGACTGCAAACTTCCTAATTCTAAGCAAAGCCAAGATGAGCCTTTGAGAAGCAGAAAAGTGTTTGTGGGGCGCTGTACAGAGGACATGACTGAGGATGAGCTGCGGGAGTTCTTCTCTCAGTACGGGGATGTGATGGATGTCTTCATCCCCAAGCCATTCAGGGCCTTTGCCTTTGTTACATTTGCAGATGATCAGATTGCGCAGTCTCTTTGTGGAGAGGACTTGATCATTAAAGGAATCAGCGTTCATATATCCAATGCCGAACCTAAGCACAATAGCAATAGACAGTTAGAAAGAAGTGGAAGATTTGGTGGTAATCCAGGTGGCTTTGGGAATCAGGGTGGATTTGGTAATAGCAGAGGGGGTGGAGCTGGTTTGGGAAACAATCAAGGTAGTAATATGGGTGGTGGGATGAACTTTGGTGCGTTCAGCATTAATCCAGCCATGATGGCTGCCGCCCAGGCAGCACTACAGAGCAGTTGGGGTATGATGGGCATGTTAGCCAGCCAGCAGAACCAGTCAGGCCCATCGGGTAATAACCAAAACCAAGGCAACATGCAGAGGGAGCCAAACCAGGCCTTCGGTTCTGGAAATAACTCTTATAGTGGCTCTAATTCTGGTGCAGCAATTGGTTGGGGATCAGCATCCAATGCAGGGTCGGGCAGTGGTTTTAATGGAGGCTTTGGCTCAAGCATGGATTCTAAGTCTTCTGGCTGGGGAATG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 994,
                "identifier": "Q13148",
                "url": "http://purl.uniprot.org/uniprot/Q13148",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000120948",
                "url": "http://www.ensembl.org/id/ENSG00000120948",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 0,
                "identifier": "NP_031401.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NP_031401.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg19",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.13",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000060-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000060-a-2",
        "variant_count": 714,
        "experiment": "urn:mavedb:00000060-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-08-23",
        "modification_date": "2021-01-18",
        "urn": "urn:mavedb:00000058-a-1",
        "publish_date": "2021-01-18",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment measured the molecular determinants of A$\\beta$ 42 aggregation in a cell-based system, by combining a yeast DHFR aggregation assay with deep mutational scanning. The effect of 791 of the 798 possible single amino acid substitutions on the aggregation propensity of A$\\beta$ 42 was measured using a yeast growth-based aggregation assay. This record contains amino acid level scores.",
        "method_text": "Enrich2 was used to calculate solubility scores for each A$\\beta$ variant from the FASTQ files. Sequencing reads were required to have a Phred score greater than 20 for every base and no uncalled bases. Scoring was performed using a weighted linear least squares regression line fit to the normalized frequency ratios across time points. The score is the slope of the regression line. The three replicates were combined using Enrich2's random-effects model. Solubility scores below 0 denote variants that are more aggregation-prone than wild-type. Scores above 0 indicate that a variant has increased solubility compared to wild-type.",
        "short_description": "Amino acid level scores for molecular determinants of Aβ aggregation with deep mutational scanning",
        "title": "Amino acid scores for Aβ42 variants",
        "keywords": [
            {
                "text": "regression"
            },
            {
                "text": "Enrich2"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "31558564",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31558564",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Aβ42",
            "reference_sequence": {
                "sequence": "GATGCAGAATTCCGACATGACTCAGGATATGAAGTTCATCATCAAAAATTGGTGTTCTTTGCAGAAGATGTGGGTTCAAACAAAGGTGCAATCATTGGACTCATGGTGGGCGGTGTTGTCATAGCG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 670,
                "identifier": "P05067",
                "url": "http://purl.uniprot.org/uniprot/P05067",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000058-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000058-a-1",
        "variant_count": 2162,
        "experiment": "urn:mavedb:00000058-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-k-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The expression level of alpha-synuclein missense variants was determined by cell sorting of yeast expressing each variant fused to GFP",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Expression Level in Yeast",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41589-020-0480-6",
                "url": "https://doi.org/10.1038/s41589-020-0480-6",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32152544",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32152544",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-k-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-k-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-k",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-10",
        "modification_date": "2019-07-15",
        "urn": "urn:mavedb:00000036-a-2",
        "publish_date": "2019-07-15",
        "created_by": "0000-0002-4998-4368",
        "modified_by": "0000-0002-4998-4368",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of LDLRAP1 based on a Y2H assay with the interactor AP2B1.",
        "title": "LDLRAP1 AP2B1 imputed and refined",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-4998-4368"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LDLRAP1",
            "reference_sequence": {
                "sequence": "ATGGACGCGCTCAAGTCGGCGGGGCGGGCGCTGATCCGGAGCCCCAGCTTGGCCAAGCAGAGCTGGGGGGGCGGTGGCCGGCACCGCAAGCTGCCTGAGAACTGGACAGACACGCGGGAGACGCTGCTGGAGGGGATGCTGTTCAGCCTCAAGTACCTGGGCATGACGCTAGTGGAGCAGCCCAAGGGTGAGGAGCTGTCGGCCGCCGCCATCAAGAGGATCGTGGCTACAGCTAAGGCCAGTGGGAAGAAGCTGCAGAAGGTGACTCTGAAGGTGTCGCCACGGGGAATTATCCTGACAGACAACCTCACCAACCAGCTCATTGAGAACGTGTCCATATACAGGATCTCCTATTGCACAGCAGACAAGATGCACGACAAGGTGTTTGCATACATCGCCCAGAGCCAGCACAACCAGAGCCTCGAGTGCCACGCCTTCCTCTGCACCAAGCGGAAGATGGCACAGGCTGTTACCCTCACCGTAGCCCAGGCCTTCAAAGTCGCCTTTGAGTTTTGGCAGGTGTCCAAGGAAGAGAAAGAGAAGAGGGACAAAGCCAGCCAAGAGGGAGGGGACGTCCTGGGGGCCCGCCAAGACTGCACCCCCCCCTTGAAGAGCTTGGTCGCCACTGGGAACCTGCTGGACTTAGAGGAGACGGCTAAGGCCCCGCTGTCCACGGTCAGCGCCAACACCACCAACATGGACGAGGTGCCGCGGCCACAAGCCTTGAGTGGCAGCAGTGTTGTCTGGGAGCTGGATGATGGCCTGGATGAAGCGTTTTCGAGGCTTGCCCAGTCTCGGACAAACCCTCAGGTCCTGGACACTGGCCTGACAGCCCAGGACATGCATTACGCCCAGTGCCTCTCGCCTGTCGACTGGGACAAGCCTGACAGCAGCGGCACAGAGCAGGATGACCTCTTCAGCTTCTGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000036-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000036-a-2",
        "variant_count": 6383,
        "experiment": "urn:mavedb:00000036-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000027-b-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "1",
            "end": 155301864,
            "start": 155301395,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of PKLR promoter, 48h post-transfection in K562 cells.",
        "title": "Saturation mutagenesis MPRA of PKLR promoter, 48h",
        "keywords": [
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PKLR promoter",
            "reference_sequence": {
                "sequence": "TGCTTTCAGTGTGGGCCTGGGGCTGCGGGACCATGGAATGAGAGGGAGAGGATGACAAAACTGCTGGTCTTATCTAAGGGAGACAGAGAAGAGAAAAGGGGCACACCCAGTAGGCCACCCTGTCCCCACAGAATCCCTCCCCCAGAACGGCCTGCTCTCTGCCCTCATCTCCTGGCATTTCCTCTCATCCTTTTTTCCTGATAAATTTTCAATCCATTCATACTATCTGGTCATCCACGTGAATAGATATTTTTTTTTTGGCCAGTCATATGGCCCCATTTTCTTTGTACTTTACTGAAGTTAGCTCTAGTGAATCCAGGGAGCAGGGGCTGTAGGGTGGGGCTGGAGCCTGAAGAAAGACAAAAGGGATCACTGTGATAATATGGTGGGGGGAGGGTTACCCAGTTCTGACCACTTTTTTTCTCTGTCTCAACCAAGAAATGCAGAGTGCCTTCACCACTCTGTAACCT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000027-b-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000027-b-1",
        "variant_count": 1794,
        "experiment": "urn:mavedb:00000027-b",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2018-07-10",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000002-a-2",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This was the first published deep mutational scan. The experiment quantified binding affinity between the human YAP65 (YAP1) WW domain and a peptide binding partner using phage display. The phage display selection was tuned such that the enrichment in each round was moderate, allowing inefficient binders to be maintained in the population and subsequently quantified.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Scores for the two technical replicates were combined using the Enrich2 random-effects model.\r\n\r\nThe scores and standard errors calculated for each of the two technical replicates (101208 and 110307) appear as additional columns.\r\n\r\nCount columns are named using the format <replicate>_c_<timepoint>. The 0 time point is the input (unselected) and each replicate underwent three rounds of selection, numbered 1..3.",
        "short_description": "Amino acid variant scores for deep mutational scan of the hYAP65 WW domain using phage display calculated by Enrich2.",
        "title": "Enrich2 amino acid variant scores for YAP65 WW domain",
        "keywords": [
            {
                "text": "WW domain"
            },
            {
                "text": "Phage display"
            },
            {
                "text": "Binding"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-7614-1713"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "hYAP65 WW domain",
            "reference_sequence": {
                "sequence": "GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTCCTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 169,
                "identifier": "P46937",
                "url": "http://purl.uniprot.org/uniprot/P46937",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000002-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_101208",
            "score_101208",
            "SE_110307",
            "score_110307"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "101208_c_0",
            "101208_c_1",
            "101208_c_2",
            "101208_c_3",
            "110307_c_0",
            "110307_c_1",
            "110307_c_2",
            "110307_c_3"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000002-a-2",
        "variant_count": 30487,
        "experiment": "urn:mavedb:00000002-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-1",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.\r\n\r\nThis score set contains the selection coefficients for the ADH promoter with CYC 3'UTR.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the ADH promoter with CYC 3'UTR.",
        "title": "Deep mutational scan of HSP90, ADH construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-1",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-c-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 6h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 6h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-c-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-10",
        "modification_date": "2019-07-15",
        "urn": "urn:mavedb:00000035-a-2",
        "publish_date": "2019-07-15",
        "created_by": "0000-0002-4998-4368",
        "modified_by": "0000-0002-4998-4368",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human HMG-CoA reductase (HMGCR) based on a functional complementation assay in yeast via DMS-TileSeq in glucose media with no statin.",
        "title": "HMGCR no statin imputed and refined",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-4998-4368"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HMGCR",
            "reference_sequence": {
                "sequence": "ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000035-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000035-a-2",
        "variant_count": 18448,
        "experiment": "urn:mavedb:00000035-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000031-b-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "5",
            "end": 1295247,
            "start": 1294989,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of TERT promoter in glioblastoma SF7996 (GBM) cells.",
        "title": "Saturation mutagenesis MPRA of TERT promoter, GBM",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TERT promoter",
            "reference_sequence": {
                "sequence": "TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000031-b-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000031-b-1",
        "variant_count": 973,
        "experiment": "urn:mavedb:00000031-b",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-09-05",
        "modification_date": "2020-09-05",
        "urn": "urn:mavedb:00000046-a-2",
        "publish_date": "2020-09-05",
        "created_by": "0000-0001-7684-5841",
        "modified_by": "0000-0001-7684-5841",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "MIR2, which targets CD86 for down-regulation, was expressed in cells with CD86 at the cell surface. Cells that remained CD86 positive were sorted. Enrich2 log ratios were calculated with wild-type normalisation to determine which variants were enriched after selection thereby determining which variants were resistant to MIR2-mediated down-regulation.",
        "title": "CD86 susceptibility to MIR2",
        "keywords": [
            {
                "text": "MIR2"
            },
            {
                "text": "CD86"
            },
            {
                "text": "Flow Cytometry"
            },
            {
                "text": "DMS"
            },
            {
                "text": "mRNA"
            },
            {
                "text": "MARCH1"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-7684-5841"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CD86",
            "reference_sequence": {
                "sequence": "GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 243,
                "identifier": "P42081",
                "url": "http://purl.uniprot.org/uniprot/P42081",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000046-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Replicate_1",
            "score_Replicate_1",
            "SE_Replicate_2",
            "score_Replicate_2",
            "SE_Replicate_3",
            "score_Replicate_3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Replicate_1_c_0",
            "Replicate_1_c_3",
            "Replicate_2_c_0",
            "Replicate_2_c_3",
            "Replicate_3_c_0",
            "Replicate_3_c_3"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000046-a-2",
        "variant_count": 4360,
        "experiment": "urn:mavedb:00000046-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-10",
        "modification_date": "2019-07-15",
        "urn": "urn:mavedb:00000035-a-3",
        "publish_date": "2019-07-15",
        "created_by": "0000-0002-4998-4368",
        "modified_by": "0000-0002-4998-4368",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human HMG-CoA reductase (HMGCR) based on a functional complementation assay in yeast via DMS-TileSeq in atorvastatin media.",
        "title": "HMGCR atorvastatin imputed and refined",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-4998-4368"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HMGCR",
            "reference_sequence": {
                "sequence": "ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000035-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000035-a-3",
        "variant_count": 18448,
        "experiment": "urn:mavedb:00000035-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-01-23",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000006-a-1",
        "publish_date": "2019-01-24",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "chr": "9",
            "end": 104195828,
            "start": 104195570,
            "reference": "hg19"
        },
        "abstract_text": "This study described the functional consequence of over 100,000 enhancer variants *in vivo* in mouse liver. Two human enhancers (ALDOB, ECR11) and one mouse enhancer (LTV1) were known to be active in mouse liver and therefore variants in these enhancers should show a measurable difference in transcription. The results were broadly consistent with evolutionary data and transcription factor binding sites, but were not always concordant demonstrating the importance of measuring the effect of enhancer variants directly.\r\n\r\nThis MaveDB entry describes the ALDOB enhancer data. Datasets for other enhancers described in the same publication are also available: [ECR11](https://www.mavedb.org/experiment/urn:mavedb:00000007-a/) [LTV1](https://www.mavedb.org/experiment/urn:mavedb:00000008-a/)",
        "method_text": "Scores were calculated using a trivariate linear regression model. A separate model was built for each position in the enhancer, with a predictor for each possible variant nucleotide at that position.\r\n\r\nThe scores presented are therefore a combination of the effects of each individual variant on diverse enhancer haplotype backgrounds.\r\n\r\nSee metadata (available via download button) for wild type genomic coordinates in JSON format.",
        "short_description": "Trivariate regression scores for each nucleotide change as described in Patwardhan et al. 2012.",
        "title": "Trivariate regression scores for ALDOB",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "doped oligo synthesis"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "liver"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22371081",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22371081",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ALDOB enhancer",
            "reference_sequence": {
                "sequence": "TCTTCAATCTGGGTATGCTGACTCAACCAGAATAACAGTGAAAATGATAATTCAAACTAATACTGTTTACAGGGAGTTAAACTTCTACAGTGGGATTAAAGGTCTGTACCACGTTAGCACAAATGTCACCTCTCTGTTAATCATAAAACAGGGTCACAGGCCAATGTTCACCACAAGGAGACAGGAGGACAACCTGGGATGGGTAATGACAAAGAACGATTTCCGTACTCCTAAGCCTCTGCTCTCTCAGATCTCAAGC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg19",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.13",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000006-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "pvalue"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000006-a-1",
        "variant_count": 777,
        "experiment": "urn:mavedb:00000006-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-26",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-h-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41589-020-0480-6",
                "url": "https://doi.org/10.1038/s41589-020-0480-6",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32152544",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32152544",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-h-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-h-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-h",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-08-23",
        "modification_date": "2020-12-10",
        "urn": "urn:mavedb:00000051-c-1",
        "publish_date": "2020-12-10",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study mutated the residues on GpA helix region of the bacterial inner membrane. The experiment links the insertion and self-association property of the tested region to the survive of bacteria and studied the membrane-protein energetics landscape of missense mutations of the target.",
        "method_text": "The helix region of GpA is used as the membrane-spanning segment for dsT$\\beta$L in this experiment. The frequency of the count of each mutant relative to wild-type in the selected and reference pools was computed. Variants with <100 counts in the reference population were removed. The selection coefficients were calculated as the ratio of variant relative frequency in the selected and reference pool. The selection coeffcients were then transformed to apparent changes in free energy due to each single-point substitution through the Gibbs free-energy equation: \r\n\r\n$$ \\Delta\\Delta G^{app} = -RT\\ln(s) $$\r\n\r\nwhere R is the gas constant and T is the absolute temperature (310K). The count data table includes the count of each variant in the reference and selected pool.",
        "short_description": "A deep mutational scanning experiment targeting the GpA helix region on the bacterial inner membrane.",
        "title": "Helix region of Glycophorin A",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.7554/eLife.12125",
                "url": "https://doi.org/10.7554/eLife.12125",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "26824389",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/26824389",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Glycophorin A",
            "reference_sequence": {
                "sequence": "CTCATTATTTTTGGGGTGATGGCTGGTGTTATTGGAACGATCCTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 93,
                "identifier": "P02724",
                "url": "http://purl.uniprot.org/uniprot/P02724",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000051-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "frequency_ref",
            "frequency_sel",
            "ratio"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "count_reference",
            "count_selected"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000051-c-1",
        "variant_count": 300,
        "experiment": "urn:mavedb:00000051-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-03-14",
        "modification_date": "2021-04-15",
        "urn": "urn:mavedb:00000063-a-1",
        "publish_date": "2021-04-15",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study tested the variant effects of <i>E.coli</i> dihydrofolate reductase (DHFR) whose function is necessary for the survival of the bacteria. The study cultured the bacteria in an early Log phase growth period under the control of Lon protease and determined the variant frequency change after cultivation, indicating the impact of DHFR variants.",
        "method_text": "This variant library consists of four sub-libraries which cover different positions in the target protein (40 residues each). Each mutant library is generated by parallel inverse PCR reactions. The libraries are transformed to <i>E.coli</i> with deficient Lon gene by electroporation, and the selection experiments take 2 libraries at a time. Samples after 0, 2, 4, 6, 8, 12, 16 and 18 hours of cultivation are collected and sequenced by Illumina NextSeq. \r\nThe results with less than four timepoints reported and larger standard error from regression are discarded. The rest are analyzed by Enrich2 and normalized. Each sub-library is tested three times and the final scores are the average of them. The results are discarded when there are less than two replicates available or the standard deviation is larger than the threshold.\r\nThe count data records the counts result from transformation rescue medium, overnight outgrowth and each timepoint outgrowth. Each library is tested three times and two of the four libraries are tested at the same time which gives the six repeats in the count data.",
        "short_description": "Growth rate of E.coli with mutated dihydrofolate reductase (DHFR) with functioning Lon protease",
        "title": "Growth rate of mutated DHFR with functioning Lon",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.7554/eLife.53476",
                "url": "https://doi.org/10.7554/eLife.53476",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32701056",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32701056",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "DHFR",
            "reference_sequence": {
                "sequence": "ATGATCAGTCTGATTGCGGCGTTAGCGGTAGATCGCGTTATCGGCATGGAAAACGCCATGCCGTGGAACCTGCCTGCCGATCTCGCCTGGTTTAAACGCAACACCTTAAATAAACCCGTGATTATGGGCCGCCATACCTGGGAATCAATCGGTCGTCCGTTGCCAGGACGCAAAAATATTATCCTCAGCAGTCAACCGGGTACGGACGATCGCGTAACGTGGGTGAAGTCGGTGGATGAAGCCATCGCGGCGTGTGGTGACGTACCAGAAATCATGGTGATTGGCGGCGGTCGCGTTTATGAACAGTTCTTGCCAAAAGCGCAAAAACTGTATCTGACGCATATCGACGCAGAAGTGGAAGGCGACACCCATTTCCCGGATTACGAGCCGGATGACTGGGAATCGGTATTCAGCGAATTCCACGATGCTGATGCGCAGAACTCTCACAGCTATTGCTTTGAGATTCTGGAGCGGCGGTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P0ABQ4",
                "url": "http://purl.uniprot.org/uniprot/P0ABQ4",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000063-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "standard deviation",
            "standard error"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Repeat1_library_transformation",
            "Repeat1_outgrowth",
            "Repeat1_t_00hrs",
            "Repeat1_t_02hrs",
            "Repeat1_t_04hrs",
            "Repeat1_t_06hrs",
            "Repeat1_t_08hrs",
            "Repeat1_t_12hrs",
            "Repeat1_t_16hrs",
            "Repeat1_t_18hrs",
            "Repeat2_library_transformation",
            "Repeat2_outgrowth",
            "Repeat2_t_00hrs",
            "Repeat2_t_02hrs",
            "Repeat2_t_04hrs",
            "Repeat2_t_06hrs",
            "Repeat2_t_08hrs",
            "Repeat2_t_12hrs",
            "Repeat2_t_16hrs",
            "Repeat2_t_18hrs",
            "Repeat3_library_transformation",
            "Repeat3_outgrowth",
            "Repeat3_t_00hrs",
            "Repeat3_t_02hrs",
            "Repeat3_t_04hrs",
            "Repeat3_t_06hrs",
            "Repeat3_t_08hrs",
            "Repeat3_t_12hrs",
            "Repeat3_t_16hrs",
            "Repeat3_t_18hrs",
            "Repeat4_library_transformation",
            "Repeat4_outgrowth",
            "Repeat4_t_00hrs",
            "Repeat4_t_02hrs",
            "Repeat4_t_04hrs",
            "Repeat4_t_06hrs",
            "Repeat4_t_08hrs",
            "Repeat4_t_12hrs",
            "Repeat4_t_16hrs",
            "Repeat4_t_18hrs",
            "Repeat5_library_transformation",
            "Repeat5_outgrowth",
            "Repeat5_t_00hrs",
            "Repeat5_t_02hrs",
            "Repeat5_t_04hrs",
            "Repeat5_t_06hrs",
            "Repeat5_t_08hrs",
            "Repeat5_t_12hrs",
            "Repeat5_t_16hrs",
            "Repeat5_t_18hrs",
            "Repeat6_library_transformation",
            "Repeat6_outgrowth",
            "Repeat6_t_00hrs",
            "Repeat6_t_02hrs",
            "Repeat6_t_04hrs",
            "Repeat6_t_06hrs",
            "Repeat6_t_08hrs",
            "Repeat6_t_12hrs",
            "Repeat6_t_16hrs",
            "Repeat6_t_18hrs"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000063-a-1",
        "variant_count": 3132,
        "experiment": "urn:mavedb:00000063-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000040-a-2",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effect of variants in yeast HSP90 under different combinations of temperature (30C or 36C) and presence/absence of salt (0.5 M NaCl). The results explore the adaptive potential of this essential gene.",
        "method_text": "Sequencing reads were filtered based on a minimum Phred quality score of 20 across all 36 bases. For each time point, the log2 ratio of each variant's count to the wild type count was calculated. The score of each variant was calculated as the slope of these log ratios to time in wild type generations. Scores of -0.5 are considered null-like.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae at 30C with 0.5 M NaCl.",
        "title": "Deep mutational scan of HSP90, 30C with salt",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24299404",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24299404",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000040-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000040-a-2",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000040-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-10",
        "modification_date": "2019-07-15",
        "urn": "urn:mavedb:00000036-a-1",
        "publish_date": "2019-07-15",
        "created_by": "0000-0002-4998-4368",
        "modified_by": "0000-0002-4998-4368",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of LDLRAP1 based on a Y2H assay with the interactor OBFC1.",
        "title": "LDLRAP1 OBFC1 imputed and refined",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-4998-4368"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LDLRAP1",
            "reference_sequence": {
                "sequence": "ATGGACGCGCTCAAGTCGGCGGGGCGGGCGCTGATCCGGAGCCCCAGCTTGGCCAAGCAGAGCTGGGGGGGCGGTGGCCGGCACCGCAAGCTGCCTGAGAACTGGACAGACACGCGGGAGACGCTGCTGGAGGGGATGCTGTTCAGCCTCAAGTACCTGGGCATGACGCTAGTGGAGCAGCCCAAGGGTGAGGAGCTGTCGGCCGCCGCCATCAAGAGGATCGTGGCTACAGCTAAGGCCAGTGGGAAGAAGCTGCAGAAGGTGACTCTGAAGGTGTCGCCACGGGGAATTATCCTGACAGACAACCTCACCAACCAGCTCATTGAGAACGTGTCCATATACAGGATCTCCTATTGCACAGCAGACAAGATGCACGACAAGGTGTTTGCATACATCGCCCAGAGCCAGCACAACCAGAGCCTCGAGTGCCACGCCTTCCTCTGCACCAAGCGGAAGATGGCACAGGCTGTTACCCTCACCGTAGCCCAGGCCTTCAAAGTCGCCTTTGAGTTTTGGCAGGTGTCCAAGGAAGAGAAAGAGAAGAGGGACAAAGCCAGCCAAGAGGGAGGGGACGTCCTGGGGGCCCGCCAAGACTGCACCCCCCCCTTGAAGAGCTTGGTCGCCACTGGGAACCTGCTGGACTTAGAGGAGACGGCTAAGGCCCCGCTGTCCACGGTCAGCGCCAACACCACCAACATGGACGAGGTGCCGCGGCCACAAGCCTTGAGTGGCAGCAGTGTTGTCTGGGAGCTGGATGATGGCCTGGATGAAGCGTTTTCGAGGCTTGCCCAGTCTCGGACAAACCCTCAGGTCCTGGACACTGGCCTGACAGCCCAGGACATGCATTACGCCCAGTGCCTCTCGCCTGTCGACTGGGACAAGCCTGACAGCAGCGGCACAGAGCAGGATGACCTCTTCAGCTTCTGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000036-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000036-a-1",
        "variant_count": 6385,
        "experiment": "urn:mavedb:00000036-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000034-b-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "7",
            "end": 156791604,
            "start": 156791119,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of ZRS enhancer in NIH3T3 cells. Co-transfected with Hoxd13 and Hand2.",
        "title": "Saturation mutagenesis MPRA of ZRS enhancer, Hoxd13+Hand2",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ZRS enhancer",
            "reference_sequence": {
                "sequence": "TGAGATATGGCTTCATTTTCTGTAATAAACACTAAGATCAAAACATGACCCAAGTTAAATTTCCTTGCAGGGTTCCCAGCAGGGGCTTCCCTTTTGTCTGTGATTTCCTCTCACCCACCAGAACCAGGCCAAATATGCGCATGTGCCACTAACACTAAGCAGCACTTCCTTAATCACTCATTTCCAACAATTTATGGATCATCAGTGGCAAAAAACGAGCAAAAATAATGAAAGAATGCAATGAAAGCTCGTGGAGACAGAGGCTGGACTTCCTACTCACTCTGTGTCTCTTTAAGATGGAGGCCTGATACAAATTAGCCACTGGGGGGAAAAAGTCATCTGGTCATAAAATACAGTACAAGGTCACTTTTATGTAAGTTTGCCAAAAGGGACATAAACCAGGACAATTTCAAACTGTGACACAGGATAGAAACATATTAAAAAAATCTTTGTTCCTCCTCTATTGTGCTGTCATGTTGCTCAGCA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000034-b-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000034-b-1",
        "variant_count": 1662,
        "experiment": "urn:mavedb:00000034-b",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-11-20",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-8",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 200ug/ml folate in A222V background",
        "title": "MTHFR at 200ug/ml folate in A222V background",
        "keywords": [
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-8",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-8",
        "variant_count": 12141,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-17",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000009-a-2",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the impact of nearly all possible point mutations in the *SUL1* promoter, which is frequently amplified under sulfate-limited growth conditions. The results demonstrated that the optimal set of observed point mutations were able to increase organismal fitness by 11%, which is far below the fitness increases previously measured for amplification of *SUL1* (35% or higher). These experiments also revealed the fitness effects of creating new transcription factor binding sites in the existing promoter sequence.",
        "method_text": "Scores were calculated by converting barcode frequencies at each time point to log ratios between that round's frequency and the input frequency. The fitness score for each barcode was calculated as the slope of the ordinary least-squares regression for these ratios on the number of generations elapsed for the sample. The fitness scores were normalized by subtracting the wild type fitness score from each measurement. Read count cutoff for each variant was set at 15, which was heuristically determined.",
        "short_description": "Comprehensive analysis of the SUL1 promoter under glucose limited conditions.",
        "title": "SUL1 promoter under glucose limited conditions",
        "keywords": [
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "26936925",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/26936925",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SUL1 promoter",
            "reference_sequence": {
                "sequence": "CGCCACCTCGAGTGCACTTTTTTTAATAAAGATCTCGTGTAATTGTCCAAATCTGACTTTTTCTTATAGTCTCGCTGGAACCACAGTGCGGCTTTGCAATTTTGCAAATCGGAATTTGAGTCACAGATCCCAGAAAAACTCCACACCTTCCCCACGCAGCAAGCGATAACGAACAAGTTGTCAAATTAGACCCATAATAATTTTGAACACTTCTACCTGTTCATGTCTTTTCTCGAACACTGTCATTTGAAATTATGCACTGTGAAAAAAAGAAACAAAGACCAAAAGAATAATATAAATAGTGAAGTAAAATGTGTTGTAATGCACATGGATCTTGTACTGCTCAAACTTAATATTTCTATTGTAGAAAAATTTTCGATTTAAAATTGTGAAACCGATTATATAAAAGTATATTAGCTGACATTAACGTCTCAAAACCAGGTCAATAGCTTTAAAAATAAAAATAAATCCCTGCAGAATACTCGGAAAGAAT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000009-a-2",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "r_sq",
            "slope",
            "barcode.count",
            "scored.unique.barcodes"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "count.0",
            "count.4",
            "count.12",
            "count.37"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000009-a-2",
        "variant_count": 116242,
        "experiment": "urn:mavedb:00000009-a",
        "is_meta_analysis": false,
        "data_usage_policy": "This study measured the impact of nearly all possible point mutations in the *SUL1* promoter, which is frequently amplified under sulfate-limited growth conditions. The results demonstrated that the optimal set of observed point mutations were able to increase organismal fitness by 11%, which is far below the fitness increases previously measured for amplification of *SUL1* (35% or higher). These experiments also revealed the fitness effects of creating new transcription factor binding sites in the existing promoter sequence."
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000047-c-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CCR5 and to map ligand interaction sites. Cells were selected for binding to HIV-1~BaL~ gp120-CD4.",
        "method_text": "Data obtained from sorting cells for both surface expression and HIV-1~BaL~ gp120-CD4 interaction was analysed using Enrich (version unspecified). Log~2~ enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence. Log~2~ enrichment ratios for two replicates were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for CCR5 binding to HIV-1(BaL) gp120-CD4 in Expi293F cells.",
        "title": "CCR5 HIV binding",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CCR5",
            "reference_sequence": {
                "sequence": "GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P51681",
                "url": "http://purl.uniprot.org/uniprot/P51681",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000047-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1",
            "rep2"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000047-c-1",
        "variant_count": 7020,
        "experiment": "urn:mavedb:00000047-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-10-14",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000004-a-2",
        "publish_date": "2018-12-03",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study identified multiple gain-of-function mutations in the ubiquitination factor E4B U-box domain by measuring auto-ubiquitination in a phage display system. E4B is an E3 ligase, responsible for transferring a ubiquitin from an E2 ligase to the target (in this case the phage displaying E4B). Many of these mutations are not specific to one E2 enzyme and so may be generalizable for mutiple E2s and E3s.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. In contrast to the originally reported scores, these scores use all rounds of phage display instead of only the input/last round to calculate scores.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in rounds.",
        "short_description": "Nucleotide variant scores for deep mutational scan of the E4B U-box domain using phage display calculated by Enrich2.",
        "title": "Enrich2 nucleotide variant scores for E4B",
        "keywords": [
            {
                "text": "U-box"
            },
            {
                "text": "E3"
            },
            {
                "text": "Phage display"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "E4B",
            "reference_sequence": {
                "sequence": "ATAGAGAAGTTTAAACTTCTTGCAGAGAAAGTGGAGGAAATCGTGGCAAAGAATGCGCGGGCAGAAATAGACTACAGCGATGCCCCGGACGAGTTCAGAGACCCTCTGATGGACACCCTGATGACCGATCCCGTGAGACTGCCCTCTGGCACCGTCATGGACCGTTCTATCATCCTGCGGCATCTGCTCAACTCCCCCACCGACCCCTTCAACCGCCAGATGCTGACTGAGAGCATGCTGGAGCCAGTGCCAGAGCTAAAGGAGCAGATTCAGGCCTGGATGAGAGAGAAACAGAGCAGTGACCACTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1071,
                "identifier": "Q9ES00",
                "url": "http://purl.uniprot.org/uniprot/Q9ES00",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": {
                "offset": 3939,
                "identifier": "NM_022022.3",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_022022.3",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "mm10",
                        "organism_name": "Mus musculus",
                        "assembly_identifier": {
                            "identifier": "GCF_000001635.20",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000004-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Rep_2",
            "score_Rep_2",
            "SE_Rep_3",
            "score_Rep_3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Rep_2_c_0",
            "Rep_2_c_1",
            "Rep_2_c_2",
            "Rep_2_c_3",
            "Rep_3_c_0",
            "Rep_3_c_1",
            "Rep_3_c_2",
            "Rep_3_c_3"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000004-a-2",
        "variant_count": 158939,
        "experiment": "urn:mavedb:00000004-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-2",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the ADH promoter, no 3'UTR added.",
        "title": "Deep mutational scan of HSP90, ADHdter construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-2",
        "variant_count": 185,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-08",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000041-b-1",
        "publish_date": "2019-08-08",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study infers the activity of variants in Src kinases's SH4 domain by measuring their effects on yeast growth, and therefore phosphotransferase activity. The resulting dataset includes variants classified as gain of function, loss of function. or neutral.",
        "method_text": "Variant scores were calculated using Enrich2 weighted least squares regression and wild-type normalization. The `activity_score` is `-1 * score`, because negative scores indicate depletion in the population, which is associated with higher kinase activity.\r\n\r\nVariants were classified as \"gain of function\" \"neutral\" or \"loss of function\" based on whether they were within +/- 2 standard deviations of the mean score of synonymous variants (variants with wild type amino acid sequence). These categories are denoted by 1, 0, or -1 in the table respectively.",
        "short_description": "Amino acid scores for deep mutational scan of the Src kinase catalytic domain.",
        "title": "Deep mutational scan of Src SH4",
        "keywords": [
            {
                "text": "kinase"
            },
            {
                "text": "NNK mutagenesis"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "regression"
            },
            {
                "text": "Enrich2"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "30956043",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/30956043",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Src SH4 domain",
            "reference_sequence": {
                "sequence": "GGTAGCAACAAGAGCAAGCCCAAGGATGCCAGCCAGCGGCGCCGCAGCCTGGAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P12931",
                "url": "http://purl.uniprot.org/uniprot/P12931",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000041-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "std",
            "epsilon",
            "activity_score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000041-b-1",
        "variant_count": 360,
        "experiment": "urn:mavedb:00000041-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-14",
        "modification_date": "2020-12-20",
        "urn": "urn:mavedb:00000053-a-2",
        "publish_date": "2020-12-20",
        "created_by": "0000-0002-2866-3880",
        "modified_by": "0000-0002-2866-3880",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "This contains the single mutations found in the dataset",
        "title": "Single mutations in PSD95 PDZ3",
        "keywords": [
            {
                "text": "Binding"
            },
            {
                "text": "PDZ"
            },
            {
                "text": "SPINE mutagenesis"
            },
            {
                "text": "DMS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2866-3880"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PSD95 PDZ3",
            "reference_sequence": {
                "sequence": "CCACGCCGCATCGTCATCCACCGTGGGTCAACGGGGTTAGGCTTCAATATCGTCGGTGGAGAGGATGGTGAGGGAATCTTCATCTCATTCATTCTGGCGGGAGGACCGGCCGATTTAAGCGGAGAACTTCGCAAAGGTGACCAGATCCTTTCGGTGAATGGCGTAGATTTGCGCAACGCATCACACGAACAGGCGGCCATCGCATTAAAGAACGCCGGCCAGACCGTTACGATTATCGCGCAGTATAAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 309,
                "identifier": "P78352",
                "url": "http://purl.uniprot.org/uniprot/P78352",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000053-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sigma"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "base_1",
            "base_2",
            "base_3",
            "base_4",
            "base_5",
            "base_6",
            "chlor_1",
            "chlor_2",
            "chlor_3",
            "chlor_4",
            "chlor_5",
            "chlor_6"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000053-a-2",
        "variant_count": 1235,
        "experiment": "urn:mavedb:00000053-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-04-12",
        "modification_date": "2021-04-15",
        "urn": "urn:mavedb:00000064-b-1",
        "publish_date": "2021-04-15",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study tested the variant effects of _E.coli_ LamB which facilitates the transportation of maltodextrin. The bacteria are cultured in the media that maltodextrin is the only carbon source and the growth rate for each mutant is determined to indicate its impact on maltodextrin transportation function.",
        "method_text": "The selection experiment lasts 36h at 37°C. The mutation frequency is estimated by short-read sequencing of random fragments using the Illumina Nextera kit. Mutants with less than five reads are discarded. \r\n\r\nThe frequency of each mutant in the selection group is calculated by the count ratio between mutation type and wild type. This value is then divided by the frequency in the control group and transformed by log2. The enrichment scores are then converted to functional scores by subtracting the median enrichment of nonsense mutations and divided by the difference between the median enrichment of the synonymous and nonsense mutations.\r\n\r\nThe count data contains input and selected mutant counts for 6 replicates and the mean of it. The score data contains the input frequency, selected frequency, log_ratio for them and the final enrichment scores are in the _score_ column.",
        "short_description": "Growth rate of E.coli with mutated LamB where maltodextrin is the sole carbon source",
        "title": "Maltose transportation of LamB",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1099/mgen.0.000364",
                "url": "https://doi.org/10.1099/mgen.0.000364",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32238226",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32238226",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LamB",
            "reference_sequence": {
                "sequence": "ATGATGATTACTCTGCGCAAACTTCCTCTGGCGGTTGCCGTCGCAGCGGGCGTAATGTCTGCTCAGGCAATGGCTGTTGATTTCCACGGCTATGCACGTTCCGGTATTGGTTGGACAGGTAGCGGCGGTGAACAACAGTGTTTCCAGACTACCGGTGCTCAAAGTAAATACCGTCTTGGCAACGAATGTGAAACTTATGCTGAATTAAAATTGGGTCAGGAAGTGTGGAAAGAGGGCGATAAGAGCTTCTATTTCGACACTAACGTGGCCTATTCCGTCGCACAACAGAATGACTGGGAAGCTACCGATCCGGCCTTCCGTGAAGCAAACGTGCAGGGTAAAAACCTGATCGAATGGCTGCCAGGCTCCACCATCTGGGCAGGTAAGCGCTTCTACCAACGTCATGACGTTCATATGATCGACTTCTACTACTGGGATATTTCTGGTCCTGGTGCCGGTCTGGAAAACATCGATGTTGGCTTCGGTAAACTCTCTCTGGCAGCAACCCGCTCCTCTGAAGCTGGTGGTTCTTCCTCTTTCGCCAGCAACAATATTTATGACTATACCAACGAAACCGCGAACGACGTTTTCGATGTGCGTTTAGCGCAGATGGAAATCAACCCGGGCGGCACATTAGAACTGGGTGTCGACTACGGTCGTGCCAACTTGCGTGATAACTATCGTCTGGTTGATGGCGCATCGAAAGACGGCTGGTTATTCACTGCTGAACATACTCAGAGTGTCCTGAAGGGCTTTAACAAGTTTGTTGTTCAGTACGCTACTGACTCGATGACCTCGCAGGGTAAAGGGCTGTCGCAGGGTTCTGGCGTTGCATTTGATAACGAAAAATTTGCCTACAATATCAACAACAACGGTCACATGCTGCGTATCCTCGACCACGGTGCGATCTCCATGGGCGACAACTGGGACATGATGTACGTGGGTATGTACCAGGATATCAACTGGGATAACGACAACGGCACCAAGTGGTGGACCGTCGGTATTCGCCCGATGTACAAGTGGACGCCAATCATGAGCACCGTGATGGAAATCGGCTACGACAACGTCGAATCCCAGCGCACCGGCGACAAGAACAATCAGTACAAAATTACCCTCGCACAACAATGGCAGGCTGGCGACAGCATCTGGTCACGCCCGGCTATTCGTGTCTTCGCAACCTACGCCAAGTGGGATGAGAAATGGGGTTACGACTACACCGGTAACGCTGATAACAACGCGAACTTCGGCAAAGCCGTTCCTGCTGATTTCAACGGCGGCAGCTTCGGTCGTGGCGACAGCGACGAGTGGACCTTCGGTGCCCAGATGGAAATCTGGTGGTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P02943",
                "url": "http://purl.uniprot.org/uniprot/P02943",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000064-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "input_freq",
            "selected_freq",
            "log_ratio",
            "log_ratio.0",
            "input_freq.0",
            "selected_freq.0",
            "log_ratio.1",
            "input_freq.1",
            "selected_freq.1",
            "log_ratio.2",
            "input_freq.2",
            "selected_freq.2",
            "log_ratio.3",
            "input_freq.3",
            "selected_freq.3",
            "log_ratio.4",
            "input_freq.4",
            "selected_freq.4",
            "log_ratio.5",
            "input_freq.5",
            "selected_freq.5"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "input_counts",
            "selected_counts",
            "input_counts.0",
            "selected_counts.0",
            "input_counts.1",
            "selected_counts.1",
            "input_counts.2",
            "selected_counts.2",
            "input_counts.3",
            "selected_counts.3",
            "input_counts.4",
            "selected_counts.4",
            "input_counts.5",
            "selected_counts.5",
            "min_input"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000064-b-1",
        "variant_count": 4023,
        "experiment": "urn:mavedb:00000064-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-a-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with MG-132",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-a-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-01-29",
        "modification_date": "2021-01-29",
        "urn": "urn:mavedb:00000060-a-1",
        "publish_date": "2021-01-29",
        "created_by": "0000-0002-9712-9163",
        "modified_by": "0000-0002-9712-9163",
        "extra_metadata": {},
        "abstract_text": "Insoluble protein aggregates are the hallmarks of many neurodegenerative diseases. For example, aggregates of TDP-43 occur in nearly all cases of amyotrophic lateral sclerosis (ALS). However, whether aggregates cause cellular toxicity is still not clear, even in simpler cellular systems. We reasoned that deep mutagenesis might be a powerful approach to disentangle the relationship between aggregation and toxicity. We generated >50,000 mutations in the prion-like domain (PRD) of TDP-43 and quantified their toxicity in yeast cells. Surprisingly, mutations that increase hydrophobicity and aggregation strongly decrease toxicity. In contrast, toxic variants promote the formation of dynamic liquid-like condensates. Mutations have their strongest effects in a hotspot that genetic interactions reveal to be structured in vivo, illustrating how mutagenesis can probe the in vivo structures of unstructured proteins. Our results show that aggregation of TDP-43 is not harmful but protects cells, most likely by titrating the protein away from a toxic liquid-like phase.",
        "method_text": "Variants for TDP-43 were constructed using a \"doped\" oligo for each of the TDP-43 libraries (290-331 and 332-373). Libraries were transformed in Saccharomyces cerevisiae and TDP-43 expression was induced for 5-6 generations. Variant counts were processed with DiMSum (Faure et al. 2020) to obtain a toxicity score for each variant. \r\nSee Bolognesi et al. 2019 for details.",
        "short_description": "Deep mutational scanning of TDP-43 prion-like domain (AA 290-331) using a toxicity assay",
        "title": "TDP-43 toxicity",
        "keywords": [
            {
                "text": "instrinsic disorder"
            },
            {
                "text": "prion-like domain"
            },
            {
                "text": "TDP-43"
            },
            {
                "text": "toxicity"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-12101-z",
                "url": "https://doi.org/10.1038/s41467-019-12101-z",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31519910",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31519910",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0002-9712-9163"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TARDBP",
            "reference_sequence": {
                "sequence": "ATGTCTGAATATATTCGGGTAACCGAAGATGAGAACGATGAGCCCATTGAAATACCATCGGAAGACGATGGGACGGTGCTGCTCTCCACGGTTACAGCCCAGTTTCCAGGGGCGTGTGGGCTTCGCTACAGGAATCCAGTGTCTCAGTGTATGAGAGGTGTCCGGCTGGTAGAAGGAATTCTGCATGCCCCAGATGCTGGCTGGGGAAATCTGGTGTATGTTGTCAACTATCCAAAAGATAACAAAAGAAAAATGGATGAGACAGATGCTTCATCAGCAGTGAAAGTGAAAAGAGCAGTCCAGAAAACATCCGATTTAATAGTGTTGGGTCTCCCATGGAAAACAACCGAACAGGACCTGAAAGAGTATTTTAGTACCTTTGGAGAAGTTCTTATGGTGCAGGTCAAGAAAGATCTTAAGACTGGTCATTCAAAGGGGTTTGGCTTTGTTCGTTTTACGGAATATGAAACACAAGTGAAAGTAATGTCACAGCGACATATGATAGATGGACGATGGTGTGACTGCAAACTTCCTAATTCTAAGCAAAGCCAAGATGAGCCTTTGAGAAGCAGAAAAGTGTTTGTGGGGCGCTGTACAGAGGACATGACTGAGGATGAGCTGCGGGAGTTCTTCTCTCAGTACGGGGATGTGATGGATGTCTTCATCCCCAAGCCATTCAGGGCCTTTGCCTTTGTTACATTTGCAGATGATCAGATTGCGCAGTCTCTTTGTGGAGAGGACTTGATCATTAAAGGAATCAGCGTTCATATATCCAATGCCGAACCTAAGCACAATAGCAATAGACAGTTAGAAAGAAGTGGAAGATTTGGTGGTAATCCAGGTGGCTTTGGGAATCAGGGTGGATTTGGTAATAGCAGAGGGGGTGGAGCTGGTTTGGGAAACAATCAAGGTAGTAATATGGGTGGTGGGATGAACTTTGGTGCGTTCAGCATTAATCCAGCCATGATGGCTGCCGCCCAGGCAGCACTACAGAGCAGTTGGGGTATGATGGGCATGTTAGCCAGCCAGCAGAACCAGTCAGGCCCATCGGGTAATAACCAAAACCAAGGCAACATGCAGAGGGAGCCAAACCAGGCCTTCGGTTCTGGAAATAACTCTTATAGTGGCTCTAATTCTGGTGCAGCAATTGGTTGGGGATCAGCATCCAATGCAGGGTCGGGCAGTGGTTTTAATGGAGGCTTTGGCTCAAGCATGGATTCTAAGTCTTCTGGCTGGGGAATG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 868,
                "identifier": "Q13148",
                "url": "http://purl.uniprot.org/uniprot/Q13148",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000120948",
                "url": "http://www.ensembl.org/id/ENSG00000120948",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 0,
                "identifier": "NP_031401.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NP_031401.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg19",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.13",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000060-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000060-a-1",
        "variant_count": 704,
        "experiment": "urn:mavedb:00000060-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-h-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 48h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 48h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-h-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-h-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-h",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-18",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000013-a-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "abundance_codes": {
                "low": 1,
                "wt-like": 4,
                "possibly_low": 2,
                "possibly_wt-like": 3
            }
        },
        "abstract_text": "This study demonstrated variant abundance by massively parallel sequencing (VAMP-seq), a technique for measuring the effect of protein variants on abundance using fluorescence. VAMP-seq is a generally-applicable assay for protein stability that can identify loss-of-function variants.",
        "method_text": "Barcodes for each bin were counted and associated with variant sequences using Enrich2. Scores were calculated based on the frequency of each variant in each bin using a weighted average such that increased abundance in high-signal bins gives a higher score. Abundance scores were calculated based on a min-max normalization using wild type (score of 1) and the average nonsense variant score (score of 0). The scores reported are the average of the eight replicate scores. Confidence intervals and variance estimates are based on the replicate scores.\r\n\r\nMetadata contains the mapping between abundance classes and the integer values found in the database.",
        "short_description": "Amino acid scores for variant abundance by massively parallel sequencing (VAMP-seq) applied to PTEN.",
        "title": "PTEN VAMP-seq",
        "keywords": [
            {
                "text": "Enrich2"
            },
            {
                "text": "VAMP-seq"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29785012",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29785012",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PTEN",
            "reference_sequence": {
                "sequence": "ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P60484",
                "url": "http://purl.uniprot.org/uniprot/P60484",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000013-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "expts",
            "se",
            "lower_ci",
            "upper_ci",
            "score1",
            "score2",
            "score3",
            "score4",
            "score5",
            "score6",
            "score7",
            "score8",
            "median_w_ave",
            "exp1_w_ave",
            "exp2_w_ave",
            "exp3_w_ave",
            "exp4_w_ave",
            "exp5_w_ave",
            "exp6_w_ave",
            "exp7_w_ave",
            "exp8_w_ave",
            "snv",
            "abundance_class"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000013-a-1",
        "variant_count": 4409,
        "experiment": "urn:mavedb:00000013-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000022-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "1",
            "end": 209816390,
            "start": 209815790,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of IRF6 enhancer in HaCaT cells.",
        "title": "Saturation mutagenesis MPRA of IRF6 enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "IRF6 enhancer",
            "reference_sequence": {
                "sequence": "GTAAATGGTGAGTAGGAAGTTGATTCTGCCCGATCTGTCTGACTCCATGCCCTTTCTATTAGGTCATGAAGGGGAACCTGAGGATTGGAGCTTTGGAATGTTAATCTTACCCAAAGGCCTGAAGTAATACCCCAGAATGTGAACATGTGTGACCATCTGCCTGTCCTGGGGGTGGGAAGAAGGCAGCATGCTCTATCCTTGACCCTGATTGAGCCCAGGGGCTGAATCTGGAGCTTTGGGGCCTGGGAACCTCTCTACCTGCGTCAATGTCTGGAGGCCCTGAGAGTTTCGCTCAGGCTCAGAGCAGGCATCGCAACCTCCCAGTTACTATTCTGTGCTGTGGCAAGTGCCAGCTTGTCCTCTCTTCCCCACCCAGCCCGGGAAACCGGCAGCATTTCTAGTTCAGGCCCAGACCCGTCCTGGCAGCCTGGATTCCACTGCCTAGGCAGGAAGCTCATCTCAGCCCAGTGACCTTTTCTCTCTGTTTTTTGTCACAGAGGAATTTCCATGCCAGCAGTATGGGGCAATGGGGGTGGGTGGCCAAAGGTTTCCCCCTTAAGCCACAAGAGCCATGGAGTGGAGGTAAGCTAAGCAAACAGAG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000022-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000022-a-1",
        "variant_count": 1906,
        "experiment": "urn:mavedb:00000022-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-08",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000042-a-1",
        "publish_date": "2019-08-08",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This is a dataset demonstrating the use of landing pad cell lines to generate recombinant mammalian cells. The experiment measured the effect of mutating the start codon of an EGFP construct with an N-terminal ubiquitin fusion. Once the ubiquitin is removed by cellular DUBs, the protein exposes a new N-terminal amino acid, which may target the protein for degradation.",
        "method_text": "Read counts for each bin were converted into frequencies. Scores were calculated by multiplying the frequency in each of the four bins bin by the bin number (lowest abundance=1, highest abundance=4) and taking the sum. Scores for each of four replicates are reported individually, and the overall score is the mean of the four replicates.",
        "short_description": "Nucleotide scores for the start codon of EGFP and its effect on an N-terminal fusion.",
        "title": "EGFP N-terminal codon nucleotide scores",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28335006",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28335006",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "EGFP",
            "reference_sequence": {
                "sequence": "ATG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "C5MKY7",
                "url": "http://purl.uniprot.org/uniprot/C5MKY7",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000042-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "e1_score",
            "e2_score",
            "e3_score",
            "e4_score",
            "sd",
            "sem",
            "lower_ci",
            "upper_ci"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000042-a-1",
        "variant_count": 65,
        "experiment": "urn:mavedb:00000042-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-08-23",
        "modification_date": "2020-12-04",
        "urn": "urn:mavedb:00000051-a-1",
        "publish_date": "2020-12-02",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-2449-7034",
        "extra_metadata": {},
        "abstract_text": "This study mutated the residues on CLS helix region of the bacterial inner membrane. The experiment links the insertion and self-association property of the tested region to the survive of bacteria and studied the membrane-protein energetics landscape of missense mutations of the target.",
        "method_text": "The C-terminal of L-Selectin is used as the membrane-spanning segment for dsT$\\beta$L in this experiment. The frequency of the count of each mutant relative to wild-type in the selected and reference pools was computed. Variants with <100 counts in the reference population were removed. The selection coefficients were calculated as the ratio of variant relative frequency in the selected and reference pool. The selection coeffcients were then transformed to apparent changes in free energy due to each single-point substitution through the Gibbs free-energy equation: \r\n\r\n$$ \\Delta\\Delta G^{app} = -RT\\ln(s) $$\r\n\r\nwhere R is the gas constant and T is the absolute temperature (310K). The count data table includes the count of each variant in the reference and selected pool.",
        "short_description": "A deep mutational scanning experiment targeting the CLS helix region on the bacterial inner membrane.",
        "title": "C-terminal of L-Selectin",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.7554/eLife.12125",
                "url": "https://doi.org/10.7554/eLife.12125",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "26824389",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/26824389",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "human L-Selectin",
            "reference_sequence": {
                "sequence": "CCGCTGTTCATCCCGGTTGCAGTTATGGTTACCGCTTTTAGTGGATTGGCGTTTATCATCTGGCTGGCT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 332,
                "identifier": "P14151",
                "url": "http://purl.uniprot.org/uniprot/P14151",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000051-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "frequency_ref",
            "frequency_sel",
            "ratio"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "count_reference",
            "count_selected"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000051-a-1",
        "variant_count": 540,
        "experiment": "urn:mavedb:00000051-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000021-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "6",
            "end": 396593,
            "start": 396143,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of IRF4 enhancer in SK-MEL-28 cells.",
        "title": "Saturation mutagenesis MPRA of IRF4 enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "IRF4 enhancer",
            "reference_sequence": {
                "sequence": "GGCGTGTCCGCCTGTTGGAATATGCTTCTCAGGTCTTCTGGGAAACAGATGTTTTGTGGAAGTGGAAGATTTTGGAAGTAGTGCCTTATCATGTGAAACCACAGGGCAGCTGATCTCTTCAGGCTTTCTTGATGTGAATGACAGCTTTGTTTCATCCACTTTGGTGGGTAAAAGAAGGCAAATTCCCCTGTGGTACTTTTGGTGCCAGGTTTAGCCATATGACGAAGCTTTACATAAAACAGTACAAGTATCTCCATTGTCCTTTATGATCCTCCATGAGTGTTTTCACTTAGTCTGATGAAGGGTTCACTCCAGTCTTTTCGGATGATAAAATGCTTCGGCTGTCAGTCTAATAAGGGATTCCCTGAGGAGTTTGGAGGCTGTAAGAGCACCCCCCGTCTCAATGCCAGTGCTTCTTATCTCAGCCTCTCCTGCACTCCTTTACCCCCGT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000021-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000021-a-1",
        "variant_count": 1510,
        "experiment": "urn:mavedb:00000021-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-e-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Menadione",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-e-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-e-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-e",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-04-12",
        "modification_date": "2021-04-15",
        "urn": "urn:mavedb:00000064-a-1",
        "publish_date": "2021-04-15",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study tested the variant effects of _E.coli_ LamB which is the binding protein of phage &lambda; prion during infection. The bacteria are cultured in media contains phage &lambda; and the growth rate for each mutant is determined to indicate their resistance to phage invasion.",
        "method_text": "The selection experiment lasts 16h at 37°C. The mutation frequency is estimated by short-read sequencing of random fragments using the Illumina Nextera kit. Mutants with less than five reads are discarded. \r\n\r\nThe frequency of each mutant in the selection group is calculated by the count ratio between mutation type and wild type. This value is then divided by the frequency in the control group and transformed by log2. The enrichment scores are then converted to functional scores by subtracting the median enrichment of nonsense mutations and divided by the difference between the median enrichment of the synonymous and nonsense mutations.\r\n\r\nThe count data contains input and selected mutant counts for 6 replicates and the mean of it. The score data contains the input frequency, selected frequency, log_ratio for them and the final enrichment scores are in the _score_ column.",
        "short_description": "Growth rate of E.coli with mutated LamB cultured with rich phage λ",
        "title": "LamB resistance to phage λ",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1099/mgen.0.000364",
                "url": "https://doi.org/10.1099/mgen.0.000364",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "32238226",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32238226",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LamB",
            "reference_sequence": {
                "sequence": "ATGATGATTACTCTGCGCAAACTTCCTCTGGCGGTTGCCGTCGCAGCGGGCGTAATGTCTGCTCAGGCAATGGCTGTTGATTTCCACGGCTATGCACGTTCCGGTATTGGTTGGACAGGTAGCGGCGGTGAACAACAGTGTTTCCAGACTACCGGTGCTCAAAGTAAATACCGTCTTGGCAACGAATGTGAAACTTATGCTGAATTAAAATTGGGTCAGGAAGTGTGGAAAGAGGGCGATAAGAGCTTCTATTTCGACACTAACGTGGCCTATTCCGTCGCACAACAGAATGACTGGGAAGCTACCGATCCGGCCTTCCGTGAAGCAAACGTGCAGGGTAAAAACCTGATCGAATGGCTGCCAGGCTCCACCATCTGGGCAGGTAAGCGCTTCTACCAACGTCATGACGTTCATATGATCGACTTCTACTACTGGGATATTTCTGGTCCTGGTGCCGGTCTGGAAAACATCGATGTTGGCTTCGGTAAACTCTCTCTGGCAGCAACCCGCTCCTCTGAAGCTGGTGGTTCTTCCTCTTTCGCCAGCAACAATATTTATGACTATACCAACGAAACCGCGAACGACGTTTTCGATGTGCGTTTAGCGCAGATGGAAATCAACCCGGGCGGCACATTAGAACTGGGTGTCGACTACGGTCGTGCCAACTTGCGTGATAACTATCGTCTGGTTGATGGCGCATCGAAAGACGGCTGGTTATTCACTGCTGAACATACTCAGAGTGTCCTGAAGGGCTTTAACAAGTTTGTTGTTCAGTACGCTACTGACTCGATGACCTCGCAGGGTAAAGGGCTGTCGCAGGGTTCTGGCGTTGCATTTGATAACGAAAAATTTGCCTACAATATCAACAACAACGGTCACATGCTGCGTATCCTCGACCACGGTGCGATCTCCATGGGCGACAACTGGGACATGATGTACGTGGGTATGTACCAGGATATCAACTGGGATAACGACAACGGCACCAAGTGGTGGACCGTCGGTATTCGCCCGATGTACAAGTGGACGCCAATCATGAGCACCGTGATGGAAATCGGCTACGACAACGTCGAATCCCAGCGCACCGGCGACAAGAACAATCAGTACAAAATTACCCTCGCACAACAATGGCAGGCTGGCGACAGCATCTGGTCACGCCCGGCTATTCGTGTCTTCGCAACCTACGCCAAGTGGGATGAGAAATGGGGTTACGACTACACCGGTAACGCTGATAACAACGCGAACTTCGGCAAAGCCGTTCCTGCTGATTTCAACGGCGGCAGCTTCGGTCGTGGCGACAGCGACGAGTGGACCTTCGGTGCCCAGATGGAAATCTGGTGGTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P02943",
                "url": "http://purl.uniprot.org/uniprot/P02943",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000064-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "input_freq",
            "selected_freq",
            "log_ratio",
            "log_ratio.0",
            "in_freq.0",
            "sel_freq.0",
            "log_ratio.1",
            "in_freq.1",
            "sel_freq.1",
            "log_ratio.2",
            "in_freq.2",
            "sel_freq.2",
            "log_ratio.3",
            "in_freq.3",
            "sel_freq.3",
            "log_ratio.4",
            "in_freq.4",
            "sel_freq.4",
            "log_ratio.5",
            "in_freq.5",
            "sel_freq.5"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "input_counts",
            "selected_counts",
            "in_count.0",
            "sel_count.0",
            "in_count.1",
            "sel_count.1",
            "in_count.2",
            "sel_count.2",
            "in_count.3",
            "sel_count.3",
            "in_count.4",
            "sel_count.4",
            "in_count.5",
            "sel_count.5",
            "min_input"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000064-a-1",
        "variant_count": 4023,
        "experiment": "urn:mavedb:00000064-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000031-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "5",
            "end": 1295247,
            "start": 1294989,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of TERT promoter in HEK293T cells.",
        "title": "Saturation mutagenesis MPRA of TERT promoter, HEK",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TERT promoter",
            "reference_sequence": {
                "sequence": "TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000031-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000031-a-1",
        "variant_count": 975,
        "experiment": "urn:mavedb:00000031-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-3",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 40 hours of selection.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, SEL_A_40h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "DNA-binding"
            },
            {
                "text": "Yeast two-hybrid"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-3",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000047-b-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CCR5 and to map ligand interaction sites. Cells were selected for binding to Ab 2D7.",
        "method_text": "Data obtained from sorting cells for both surface expression and Ab 2D7 binding was analysed using Enrich (version unspecified). Log~2~ enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence. Log~2~ enrichment ratios for two replicates were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for CCR5 binding to Ab 2D7 in Expi293F cells.",
        "title": "CCR5 Ab 2D7 binding",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CCR5",
            "reference_sequence": {
                "sequence": "GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P51681",
                "url": "http://purl.uniprot.org/uniprot/P51681",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000047-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1",
            "rep2"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000047-b-1",
        "variant_count": 7020,
        "experiment": "urn:mavedb:00000047-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-07-10",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000003-b-1",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "These experiments measured the functional consequences of mutations in the BRCA1 RING domain, where most clinically-relevant single nucleotide variants reside. One goal of the study was to create a \"look-up table\" of single nucleotide variants for clinical use, by prospectively measuring the impact of all possible variants that are likely to appear in patients. The study combines two different assays - one based on E3 ubiquitin ligase activity and one based on BRCA1-BARD1 heterodimer formation - and makes one of the first attempts to combine data from different MAVEs on the same target.\r\n\r\nThis entry contains scores from the yeast two-hybrid assay, which tested the BRCA1-BARD1 heterodimer formation in BRCA1 variants.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. Counts for each variant were calculated as the sum of counts for all barcodes associated with that variant.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in hours.",
        "short_description": "Nucleotide variant scores for deep mutational scan of the BRCA1 RING domain using yeast two-hybrid calculated by Enrich2.",
        "title": "Enrich2 nucleotide variant scores for BRCA1 Y2H",
        "keywords": [
            {
                "text": "Yeast two-hybrid"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "BRCA1 RING domain",
            "reference_sequence": {
                "sequence": "GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000003-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Y2H_1_Rep1",
            "score_Y2H_1_Rep1",
            "SE_Y2H_1_Rep2",
            "score_Y2H_1_Rep2",
            "SE_Y2H_1_Rep3",
            "score_Y2H_1_Rep3",
            "SE_Y2H_2_Rep1",
            "score_Y2H_2_Rep1",
            "SE_Y2H_2_Rep2",
            "score_Y2H_2_Rep2",
            "SE_Y2H_2_Rep3",
            "score_Y2H_2_Rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "PlusE2NewRep3_c_0",
            "PlusE2NewRep3_c_1",
            "PlusE2NewRep3_c_2",
            "PlusE2NewRep3_c_3",
            "PlusE2NewRep3_c_4",
            "PlusE2NewRep3_c_5",
            "PlusE2NewRep4_c_0",
            "PlusE2NewRep4_c_1",
            "PlusE2NewRep4_c_2",
            "PlusE2NewRep4_c_3",
            "PlusE2NewRep4_c_4",
            "PlusE2NewRep4_c_5",
            "PlusE2NewRep5_c_0",
            "PlusE2NewRep5_c_1",
            "PlusE2NewRep5_c_2",
            "PlusE2NewRep5_c_3",
            "PlusE2NewRep5_c_4",
            "PlusE2NewRep5_c_5",
            "PlusE2Rep3_c_0",
            "PlusE2Rep3_c_1",
            "PlusE2Rep3_c_2",
            "PlusE2Rep3_c_3",
            "PlusE2Rep3_c_4",
            "PlusE2Rep3_c_5",
            "PlusE2Rep4_c_0",
            "PlusE2Rep4_c_1",
            "PlusE2Rep4_c_2",
            "PlusE2Rep4_c_3",
            "PlusE2Rep4_c_4",
            "PlusE2Rep4_c_5",
            "PlusE2Rep5_c_0",
            "PlusE2Rep5_c_1",
            "PlusE2Rep5_c_2",
            "PlusE2Rep5_c_3",
            "PlusE2Rep5_c_4",
            "PlusE2Rep5_c_5",
            "Y2H_1_Rep1_c_0",
            "Y2H_1_Rep1_c_18",
            "Y2H_1_Rep1_c_37",
            "Y2H_1_Rep1_c_45",
            "Y2H_1_Rep2_c_0",
            "Y2H_1_Rep2_c_18",
            "Y2H_1_Rep2_c_37",
            "Y2H_1_Rep2_c_45",
            "Y2H_1_Rep3_c_0",
            "Y2H_1_Rep3_c_18",
            "Y2H_1_Rep3_c_37",
            "Y2H_1_Rep3_c_45",
            "Y2H_2_Rep1_c_0",
            "Y2H_2_Rep1_c_16",
            "Y2H_2_Rep1_c_41",
            "Y2H_2_Rep1_c_64",
            "Y2H_2_Rep2_c_0",
            "Y2H_2_Rep2_c_16",
            "Y2H_2_Rep2_c_41",
            "Y2H_2_Rep2_c_64",
            "Y2H_2_Rep3_c_0",
            "Y2H_2_Rep3_c_16",
            "Y2H_2_Rep3_c_41",
            "Y2H_2_Rep3_c_64"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000003-b-1",
        "variant_count": 20724,
        "experiment": "urn:mavedb:00000003-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-01-03",
        "modification_date": "2021-01-03",
        "urn": "urn:mavedb:00000054-a-1",
        "publish_date": "2021-01-03",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured variant lipid phosphatase activity using massively parallel functional testing with a humanised yeast assay. It measured the effects of PTEN mutation on lipid phosphatase activity in vivo, which is vital for preventing the accumulation of phospholipids, and is therefore required for cell survival.",
        "method_text": "Pair-ended reads were merged with PEAR. Custom Python scripts were used to identify and count sequence variants. Variant count files were then analysed with Enrich2 v.1.2.0. Scores and standard errors were calculated for each variant. A variant was considered high-confidence if the 95% confidence interval of the fitness score was <= 1, or if measurements from each biological replicate were both lower or both higher than the 95% bound of the synonymous distribution.\r\n\r\nVariants were considered wild-type like if they achieved a fitness score between the lower and upper 95th percentile (two-tailed) of synonymous variants, -1.11 and 0.89.\r\n\r\nThe score and SE columns are combined across all replicates as applicable. Scores and SE for each biological replicate (A and B) and technical replicate (A1, A2, etc.) are also provided.",
        "short_description": "PTEN activity measured using a lipid phosphatase assay in a humanized yeast model.",
        "title": "PTEN lipid phosphatase scores",
        "keywords": [
            {
                "text": "phosphatase"
            },
            {
                "text": "humanized yeast"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29706350",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29706350",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-9955-0656"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PTEN",
            "reference_sequence": {
                "sequence": "ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P60484",
                "url": "http://purl.uniprot.org/uniprot/P60484",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000054-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "High_conf",
            "A_score",
            "A_SE",
            "B_score",
            "B_SE",
            "A1_score",
            "A1_SE",
            "A2_score",
            "A2_SE",
            "A3_score",
            "A3_SE",
            "B1_score",
            "B1_SE",
            "B2_score",
            "B2_SE",
            "B3_score",
            "B3_SE"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000054-a-1",
        "variant_count": 8681,
        "experiment": "urn:mavedb:00000054-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-08-07",
        "modification_date": "2021-01-18",
        "urn": "urn:mavedb:00000059-a-1",
        "publish_date": "2021-01-18",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "To comprehensively characterize the consequences of the p53 mutation spectrum, the author generated ~10,000 variants on the DNA-binding domain (DBD) of p53 variants in human cells and measured the relative growth rate of each mutant through selection.",
        "method_text": "The log (base 2) fold change for each mutation at each time point is first calculated. The values in each of the sub-library is normalized by all of the variants to make sure that the mean synonymous mutation values are the same across the sub-libraries. The relative fitness (RFS) score for each variant was calculated as the median of the enrichment/depletion ratio at each time point.",
        "short_description": "The impact of p53 variant is represented by the frequency change of cells harboring that mutation over several time point.",
        "title": "p53 variant effect measured by cell growth",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29979965",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29979965",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "p53",
            "reference_sequence": {
                "sequence": "ACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGGCATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 101,
                "identifier": "P04637",
                "url": "http://purl.uniprot.org/uniprot/P04637",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000059-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000059-a-1",
        "variant_count": 9273,
        "experiment": "urn:mavedb:00000059-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-a-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 2h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 2h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-a-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000047-a-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CCR5 and to map ligand interaction sites. Cells were selected for CCR5 surface expression.",
        "method_text": "Data obtained from sorting cells for surface expression using anti-myc staining was analysed using Enrich (version unspecified). Log~2~ enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence. Log~2~ enrichment ratios for four replicates (two replicates each for two variant libraries) were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for cell surface expression of CCR5 in Expi293F cells.",
        "title": "CCR5 surface expression",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23827138",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23827138",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CCR5",
            "reference_sequence": {
                "sequence": "GATTATCAAGTGTCAAGTCCAATCTATGACATCAATTATTATACATCGGAGCCCTGCCAAAAAATCAATGTGAAGCAAATCGCAGCCCGCCTCCTGCCTCCGCTCTACTCACTGGTGTTCATCTTTGGTTTTGTGGGCAACATGCTGGTCATCCTCATCCTGATAAACTGCAAAAGGCTGAAGAGCATGACTGACATCTACCTGCTCAACCTGGCCATCTCTGACCTGTTTTTCCTTCTTACTGTCCCCTTCTGGGCTCACTATGCTGCCGCCCAGTGGGACTTTGGAAATACAATGTGTCAACTCTTGACAGGGCTCTATTTTATAGGCTTCTTCTCTGGAATCTTCTTCATCATCCTCCTGACAATCGATAGGTACCTGGCTGTCGTCCATGCTGTGTTTGCTTTAAAAGCCAGGACGGTCACCTTTGGGGTGGTGACAAGTGTGATCACTTGGGTGGTGGCTGTGTTTGCGTCTCTCCCAGGAATCATCTTTACCAGATCTCAAAAAGAAGGTCTTCATTACACCTGCAGCTCTCATTTTCCATACAGTCAGTATCAATTCTGGAAGAATTTCCAGACATTAAAGATAGTCATCTTGGGGCTGGTCCTGCCGCTGCTTGTCATGGTCATCTGCTACTCGGGAATCCTAAAAACTCTGCTTCGGTGTCGAAATGAGAAGAAGAGGCACAGGGCTGTGAGGCTTATCTTCACCATCATGATTGTTTATTTTCTCTTCTGGGCTCCCTACAACATTGTCCTTCTCCTGAACACCTTCCAGGAATTCTTTGGCCTGAATAATTGCAGTAGCTCTAACAGGTTGGACCAAGCTATGCAGGTGACAGAGACTCTTGGGATGACGCACTGCTGCATCAACCCCATCATCTATGCCTTTGTCGGGGAGAAGTTCAGAAACTACCTCTTAGTCTTCTTCCAAAAGCACATTGCCAAACGCTTCTGCAAATGCTGTTCTATTTTCCAGCAAGAGGCTCCCGAGCGAGCAAGCTCAGTTTACACCCGATCCACTGGGGAGCAGGAAATATCTGTGGGCTTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P51681",
                "url": "http://purl.uniprot.org/uniprot/P51681",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000047-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1_anti-myc-FITC",
            "rep2_anti-myc-FITC",
            "rep1_anti-myc-Alexa",
            "rep2_anti-myc-Alexa"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000047-a-1",
        "variant_count": 7020,
        "experiment": "urn:mavedb:00000047-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-g-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Dopamine",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-g-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-g-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-g",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-15",
        "modification_date": "2019-07-15",
        "urn": "urn:mavedb:00000035-a-1",
        "publish_date": "2019-07-15",
        "created_by": "0000-0002-4998-4368",
        "modified_by": "0000-0002-4998-4368",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of HMG-CoA reductase (HMGCR) based on a functional complementation assay in yeast via DMS-TIleSeq in rosuvastatin media.",
        "title": "HMGCR rosuvastatin imputed and refined",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-4998-4368"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HMGCR",
            "reference_sequence": {
                "sequence": "ATGTTGTCAAGACTTTTTCGAATGCATGGCCTCTTTGTGGCCTCCCATCCCTGGGAAGTCATAGTGGGGACAGTGACACTGACCATCTGCATGATGTCCATGAACATGTTTACTGGTAACAATAAGATCTGTGGTTGGAATTATGAATGTCCAAAGTTTGAAGAGGATGTTTTGAGCAGTGACATTATAATTCTGACAATAACACGATGCATAGCCATCCTGTATATTTACTTCCAGTTCCAGAATTTACGTCAACTTGGATCAAAATATATTTTGGGTATTGCTGGCCTTTTCACAATTTTCTCAAGTTTTGTATTCAGTACAGTTGTCATTCACTTCTTAGACAAAGAATTGACAGGCTTGAATGAAGCTTTGCCCTTTTTCCTACTTTTGATTGACCTTTCCAGAGCAAGCACATTAGCAAAGTTTGCCCTCAGTTCCAACTCACAGGATGAAGTAAGGGAAAATATTGCTCGTGGAATGGCAATTTTAGGTCCTACGTTTACCCTCGATGCTCTTGTTGAATGTCTTGTGATTGGAGTTGGTACCATGTCAGGGGTACGTCAGCTTGAAATTATGTGCTGCTTTGGCTGCATGTCAGTTCTTGCCAACTACTTCGTGTTCATGACTTTCTTCCCAGCTTGTGTGTCCTTGGTATTAGAGCTTTCTCGGGAAAGCCGCGAGGGTCGTCCAATTTGGCAGCTCAGCCATTTTGCCCGAGTTTTAGAAGAAGAAGAAAATAAGCCGAATCCTGTAACTCAGAGGGTCAAGATGATTATGTCTCTAGGCTTGGTTCTTGTTCATGCTCACAGTCGCTGGATAGCTGATCCTTCTCCTCAAAACAGTACAGCAGATACTTCTAAGGTTTCATTAGGACTGGATGAAAATGTGTCCAAGAGAATTGAACCAAGTGTTTCCCTCTGGCAGTTTTATCTCTCTAAAATGATCAGCATGGATATTGAACAAGTTATTACCCTAAGTTTAGCTCTCCTTCTGGCTGTCAAGTACATCTTCTTTGAACAAACAGAGACAGAATCTACACTCTCATTAAAAAACCCTATCACATCTCCTGTAGTGACACAAAAGAAAGTCCCAGACAATTGTTGTAGACGTGAACCTATGCTGGTCAGAAATAACCAGAAATGTGATTCAGTAGAGGAAGAGACAGGGATAAACCGAGAAAGAAAAGTTGAGGTTATAAAACCCTTAGTGGCTGAAACAGATACCCCAAACAGAGCTACATTTGTGGTTGGTAACTCCTCCTTACTCGATACTTCATCAGTACTGGTGACACAGGAACCTGAAATTGAACTTCCCAGGGAACCTCGGCCTAATGAAGAATGTCTACAGATACTTGGGAATGCAGAGAAAGGTGCAAAATTCCTTAGTGATGCTGAGATCATCCAGTTAGTCAATGCTAAGCATATCCCAGCCTACAAGTTGGAAACTCTGATGGAAACTCATGAGCGTGGTGTATCTATTCGCCGACAGTTACTTTCCAAGAAGCTTTCAGAACCTTCTTCTCTCCAGTACCTACCTTACAGGGATTATAATTACTCCTTGGTGATGGGAGCTTGTTGTGAGAATGTTATTGGATATATGCCCATCCCTGTTGGAGTGGCAGGACCCCTTTGCTTAGATGAAAAAGAATTTCAGGTTCCAATGGCAACAACAGAAGGTTGTCTTGTGGCCAGCACCAATAGAGGCTGCAGAGCAATAGGTCTTGGTGGAGGTGCCAGCAGCCGAGTCCTTGCAGATGGGATGACTCGTGGCCCAGTTGTGCGTCTTCCACGTGCTTGTGACTCTGCAGAAGTGAAAGCCTGGCTCGAAACATCTGAAGGGTTCGCAGTGATAAAGGAGGCATTTGACAGCACTAGCAGATTTGCACGTCTACAGAAACTTCATACAAGTATAGCTGGACGCAACCTTTATATCCGTTTCCAGTCCAGGTCAGGGGATGCCATGGGGATGAACATGATTTCAAAGGGTACAGAGAAAGCACTTTCAAAACTTCACGAGTATTTCCCTGAAATGCAGATTCTAGCCGTTAGTGGTAACTATTGTACTGACAAGAAACCTGCTGCTATAAATTGGATAGAGGGAAGAGGAAAATCTGTTGTTTGTGAAGCTGTCATTCCAGCCAAGGTTGTCAGAGAAGTATTAAAGACTACCACAGAGGCTATGATTGAGGTCAACATTAACAAGAATTTAGTGGGCTCTGCCATGGCTGGGAGCATAGGAGGCTACAACGCCCATGCAGCAAACATTGTCACCGCCATCTACATTGCCTGTGGACAGGATGCAGCACAGAATGTTGGTAGTTCAAACTGTATTACTTTAATGGAAGCAAGTGGTCCCACAAATGAAGATTTATATATCAGCTGCACCATGCCATCTATAGAGATAGGAACGGTGGGTGGTGGGACCAACCTACTACCTCAGCAAGCCTGTTTGCAGATGCTAGGTGTTCAAGGAGCATGCAAAGATAATCCTGGGGAAAATGCCCGGCAGCTTGCCCGAATTGTGTGTGGGACCGTAATGGCTGGGGAATTGTCACTTATGGCAGCATTGGCAGCAGGACATCTTGTCAAAAGTCACATGATTCACAACAGGTCGAAGATCAATTTACAAGACCTCCAAGGAGCTTGCACCAAGAAGACAGCCTGA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000035-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000035-a-1",
        "variant_count": 18448,
        "experiment": "urn:mavedb:00000035-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-5",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 40 hours of selection.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, SEL_C_40h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "DNA-binding"
            },
            {
                "text": "Yeast two-hybrid"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-5",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-5",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-17",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000010-a-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the sequence-function relationships for variants in a common, eukaryotic RNA binding domain, the RRM2 domain of yeast Pab1. The results identified clusters of residues with similar mutational patterns as well as highly-conserved residues that are critical for stability and/or function.",
        "method_text": "Variant reads were filtered such that all bases had at least Q20 for all positions. Log ratio scores were calculated using the Enrich software package and normalized to the wild type score.",
        "short_description": "Deep mutational scan of the RRM2 domain of the Saccharomyces cerevisiae poly(A)-binding protein (Pab1). Data for single and double mutants only.",
        "title": "Deep mutational scan of Pab1 RRM domain",
        "keywords": [
            {
                "text": "RNA-binding"
            },
            {
                "text": "log ratios"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22006916",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22006916",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "24064791",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24064791",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PAB1",
            "reference_sequence": {
                "sequence": "GGTAACATCTTTATCAAGAACTTGCACCCTGATATTGACAACAAGGCTTTGTATGACACTTTCTCTGTGTTTGGTGACATCTTGTCCAGCAAGATTGCCACCGACGAAAACGGAAAATCCAAGGGTTTTGGGTTTGTTCACTTCGAAGAAGAAGGTGCTGCCAAGGAAGCTATTGATGCTTTGAATGGTATGCTGTTGAACGGTCAAGAAATTTATGTTGCTCCT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 125,
                "identifier": "P04147",
                "url": "http://purl.uniprot.org/uniprot/P04147",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000010-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000010-a-1",
        "variant_count": 40852,
        "experiment": "urn:mavedb:00000010-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-j-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Tunicamycin",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-j-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-j-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-j",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000015-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "X",
            "end": 139530765,
            "start": 139530463,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of F9 promoter in HepG2 cells.",
        "title": "Saturation mutagenesis MPRA of F9 promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "F9 promoter",
            "reference_sequence": {
                "sequence": "GTCCCACTGATGAACTGTGCTGCCACAGTAAATGTAGCCACTATGCCTATCTCCATTCTGAAGATGTGTCACTTCCTGTTTCAGACTCAAATCAGCCACAGTGGCAGAAGCCCACGAAATCAGAGGTGAAATTTAATAATGACCACTGCCCATTCTCTTCACTTGTCCCAAGAGGCCATTGGAAATAGTCCAAAGACCCATTGAGGGAGATGGACATTATTTCCCAGAAGTAAATACAGCTCAGCTTGTACTTTGGTACAACTAATCGACCTTACCACTTTCACAATCTGCTAGCAAAGGTTA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000015-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000015-a-1",
        "variant_count": 984,
        "experiment": "urn:mavedb:00000015-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000025-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "8",
            "end": 127401428,
            "start": 127400829,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of MYC enhancer (rs6983267) in HEK293T cells.",
        "title": "Saturation mutagenesis MPRA of MYC enhancer (rs6983267)",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MYC enhancer (rs6983267)",
            "reference_sequence": {
                "sequence": "CTGCATCGCTCCATAGAGCCTGCAGAGGGCACTAGACTGGGAATTAGAAAACCTGATTTCCCTTCCAGCTCCACCTCTGACCAATTGCCTGACCCTGGTCAAATTGCTTAACCTCTTCCTATCTCAGCTCCCTATCCATAAAACAGAGGGACGAATAAACTCTCCTCCTACCACTAAGAGGTGTAGCCAGAGTTAATACCCTCATCGTCCTTTGAGCTCAGCAGATGAAAGGCACTGAGAAAAGTACAAAGAATTTTTATGTGCTATTGACTTTATTTTATTTTATGTGGGGGAGGGAGCCGGCCCCAGCTGGAAAGCTGCTTTCTCTGAATCAAAGGGCAGGAACCCAGCAAGTTTCTCAGGATTGGGGCCTTAGACTGGGCTGTGTATACAGACAGTGCCAGCCAACCCCACAGTTCAGTTTCCTTTAACCTGGTGCTCCAGGCAATAACTGTGCAACTCTGCAATTTAACAATGTGTTCTTTGTCCCACAACTGTTCTCGTTTCTCAACTGCCCAGGTAATATGTTTGGGCCTGTAGGAAGAGTCAAATAGTTAATAAGGGAAGGGTTTGGCATGCCCTACGTAAGTTCTACCAGCA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000025-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000025-a-1",
        "variant_count": 1950,
        "experiment": "urn:mavedb:00000025-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-08-24",
        "modification_date": "2020-12-10",
        "urn": "urn:mavedb:00000052-a-1",
        "publish_date": "2020-12-10",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study generated thousands of mutated sequence on Gcn4 activation domain which contains one or several amino acid substitutions. They used a high-throughput fluorescent reporter assay to identify the transcription ability of mutated Gcn4 sequences through cell sorting.",
        "method_text": "The variant abundance in each bin is determined by high-throughput sequencing to the barcodes in an Illumina platform. The reads are first normalized by the total reads in each bin. A weighted mean value is calculated through the median GFP/mCherry ratio in each bin. This mean value is further divided by wildtype median value and logarithm (base 2) transformed. The raw activity result for the two replicates are shown in the score data (\"Raw_Replicate_1\" & \"Raw_Replicate_2\"). The original paper only used the normalized result for the first replicate which becomes the \"score\" column and the normalized result for replicate two is also deposited as \"Norm_Replicate_2\". Count data includes 8 bins for experiments in complete media with 2 replicates and sorted on mCherry only.\r\n\r\nBecause each barcode was scored individually and reported in the supplement, there are multiple scores for each variant. Each of these is the score from one barcode.",
        "short_description": "Mutagenesis study of Gcn4 activation domain in complete media using fluorescent reporter assay.",
        "title": "Gcn4 activation domain activity",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29525204",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29525204",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gcn4",
            "reference_sequence": {
                "sequence": "AGCACTGATTCAACTCCAATGTTTGAGTATGAAAACCTAGAAGACAACTCTAAAGAATGGACATCCTTGTTTGACAATGACATTCCAGTTACCACTGACGATGTTTCATTGGCTGATAAGGCAATTGAATCC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 100,
                "identifier": "P03069",
                "url": "http://purl.uniprot.org/uniprot/P03069",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000052-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "Norm_Replicate_2",
            "Raw_Replicate_2",
            "Raw_Replicate_1"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000052-a-1",
        "variant_count": 6500,
        "experiment": "urn:mavedb:00000052-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-l-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Brefeldin A",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-l-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-l-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-l",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-18",
        "modification_date": "2019-07-25",
        "urn": "urn:mavedb:00000011-a-1",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This was the first dataset described that used the EMPIRIC approach (\"extremely methodical and parallel investigation of randomized individual codons\"). It generated experimental fitness measurements for a nine amino acid region of yeast Hsp90. The experimental design is based on calculating growth rates for yeast containing variant Hsp90 sequences as compared to yeast containing the wild type sequence.",
        "method_text": "Sequencing reads were filtered based on a minimum Phred quality score of 20 across all 36 bases. For each time point, the log2 ratio of each variant's count to the wild type count was calculated. The score of each variant was calculated as the slope of these log ratios to time in wild type generations. The integers in the counts column headings indicate the number of wild type generations for each time point. Scores for stop codons and mutants with scores within three SD of the stop codon mean were considered null-like and scored using only the first three time points.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae.",
        "title": "Deep mutational scan of HSP90",
        "keywords": [
            {
                "text": "log ratios"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "21464309",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/21464309",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000011-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Gen_0",
            "Gen_3",
            "Gen_6",
            "Gen_9",
            "Gen_12",
            "Gen_15",
            "Gen_18",
            "Gen_21"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000011-a-1",
        "variant_count": 568,
        "experiment": "urn:mavedb:00000011-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000026-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "8",
            "end": 127519732,
            "start": 127519270,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of MYC enhancer (rs11986220) in LNCaP cells with 100nM DHT.",
        "title": "Saturation mutagenesis MPRA of MYC enhancer (rs11986220)",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MYC enhancer (rs11986220)",
            "reference_sequence": {
                "sequence": "GGTAAGTCAACATGAAATTATAAACCATGTTTAACTAACCCACAAGAAAAACAGAAAAAGAAAACAGATACATGAAAATCTGAGAGGAAAAAAAAAAAACAGAGAACACAATGGGAAGCTTCATTCAATGTAAGGGTACTAGAAGTTCTAGCCAGTGCAATTAAGAGGAAAAAAATAAATAAAAAGGCATATGTGTTGAAAGGAAGAAATTAAACTGTCTTTATTTGCAAATGACATGATTATCAGCACAGATAATCAAGATAAATATATAAAAAGATTTCTGAAACTAATAAGTTAGTTCAGTAAGGTCGTAAGCTATAAGACAAACAAAGGAAAATCAATTGTATTTGAATGTATCGACAGTAAACATATGGACATTAAAATTAACAATACAATATAATTTATATTTATTAAAAATATAAAATGCTTAGGCATAAATCTAACAAAACCCCCACAGTACTTG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000026-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000026-a-1",
        "variant_count": 1681,
        "experiment": "urn:mavedb:00000026-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-09-05",
        "modification_date": "2020-09-05",
        "urn": "urn:mavedb:00000046-a-3",
        "publish_date": "2020-09-05",
        "created_by": "0000-0001-7684-5841",
        "modified_by": "0000-0001-7684-5841",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "MARCH1, which targets CD86 for down-regulation, was expressed in cells with CD86 at the cell surface.  Cells that remained CD86 positive were sorted. Enrich2 log ratios were calculated with wild-type normalisation to determine which variants were enriched after selection thereby determining which variants were resistant to MARCH1-mediated down-regulation.",
        "title": "CD86 susceptibility to MARCH1",
        "keywords": [
            {
                "text": "MIR2"
            },
            {
                "text": "CD86"
            },
            {
                "text": "Flow Cytometry"
            },
            {
                "text": "DMS"
            },
            {
                "text": "mRNA"
            },
            {
                "text": "MARCH1"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-7684-5841"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CD86",
            "reference_sequence": {
                "sequence": "GACCACATTCCTTGGATTACAGCTGTACTTCCAACAGTTATTATATGTGTGATGGTTTTCTGTCTAATTCTATGGAAATGG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 243,
                "identifier": "P42081",
                "url": "http://purl.uniprot.org/uniprot/P42081",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000046-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_Replicate_1",
            "score_Replicate_1",
            "SE_Replicate_2",
            "score_Replicate_2",
            "SE_Replicate_3",
            "score_Replicate_3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "Replicate_1_c_0",
            "Replicate_1_c_1",
            "Replicate_2_c_0",
            "Replicate_2_c_1",
            "Replicate_3_c_0",
            "Replicate_3_c_1"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000046-a-3",
        "variant_count": 7635,
        "experiment": "urn:mavedb:00000046-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-4",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 12ug/ml folate in WT background",
        "title": "MTHFR at 12ug/ml folate in WT background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-4",
        "variant_count": 13704,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-11-09",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000005-a-2",
        "publish_date": "2018-12-04",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee [Sun et al 2018](https://www.biorxiv.org/content/early/2018/11/19/473983)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [tileseq_package](https://bitbucket.org/rothlabto/tileseq_package) and [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Random-Forest-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee [Sun et al 2018](https://www.biorxiv.org/content/early/2018/11/19/473983) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at high levels of Vitamin B6.",
        "title": "CBS high-B6 imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": "urn:mavedb:00000005-a-3",
        "current_version": "urn:mavedb:00000005-a-3",
        "variant_count": 11243,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-e-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 10h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 10h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-e-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-e-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-e",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000019-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "11",
            "end": 5250078,
            "start": 5249805,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of HBG1 promoter in HEL 92.1.7 cells.",
        "title": "Saturation mutagenesis MPRA of HBG1 promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HBG1 promoter",
            "reference_sequence": {
                "sequence": "GGCGTCTGGACTAGGAGCTTATTGATAACCTCAGACGTTCCAGAAGCGAGTGTGTGGAACTGCTGAAGGGTGCTTCCTTTTATTCTTCATCCCTAGCCAGCCGCCGGCCCCTGGCCTCACTGGATACTCTAAGACTATTGGTCAAGTTTGCCTTGTCAAGGCTATTGGTCAAGGCAAGGCTGGCCAACCCATGGGTGGAGTTTAGCCAGGGACCGTTTCAGACAGATATTTGCATTGAGATAGTGTGGGGAAGGGGCCCCCAAGAGGATACTGC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000019-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000019-a-1",
        "variant_count": 907,
        "experiment": "urn:mavedb:00000019-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-07-29",
        "modification_date": "2019-07-29",
        "urn": "urn:mavedb:00000038-b-2",
        "publish_date": "2019-07-29",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment used the same library of ubiquitin variants as previously described in Roscoe et al. 2013. The ubiquitin variants were displayed as C-terminal fusions with Aga2-HA and reacted with E1. Bound cells were isolated using FACS and deep sequencing was used to quantify enrichment or depletion of cells in the bound population.\r\n\r\nThis entry describes the second of two regions assayed with excess E1, covering ubiquitin positions 68-76.",
        "method_text": "Scores were calculated using the EMPIRIC log ratios approach, using the ratio of E1-reactive cells to HA-displaying cells. Scores were normalized such that the average wild type synonymous variant was 1 and the average nonsense mutation was 0. The raw $log_2$ ratio is included as an extra column. The \"Std\" column lists the standard deviation calculated from multiple measurements of synonymous codons. This value is `nan` when only one codon was measured.",
        "short_description": "Amino acid scores for deep mutational scan of ubiquitin in a yeast display binding assay to E1 with excess E1. Region 2 (positions 68-76).",
        "title": "Ubiquitin-E1 yeast display amino acid scores, excess E1, region 2",
        "keywords": [
            {
                "text": "ubiquitin"
            },
            {
                "text": "E1"
            },
            {
                "text": "yeast display"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "Binding"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24862281",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24862281",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23376099",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23376099",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ubiquitin",
            "reference_sequence": {
                "sequence": "CACTTGGTCTTGAGATTGAGAGGTGGT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 67,
                "identifier": "P0CG63",
                "url": "http://purl.uniprot.org/uniprot/P0CG63",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000038-b-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "log2 (E1react/display)",
            "Std"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000038-b-2",
        "variant_count": 185,
        "experiment": "urn:mavedb:00000038-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000014-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "2",
            "end": 60495539,
            "start": 60494940,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of BCL11A enhancer in HEL 92.1.7 cells.",
        "title": "Saturation mutagenesis MPRA of BCL11A enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "BCL11A enhancer",
            "reference_sequence": {
                "sequence": "CCTAACACAGTAGCTGGTACCTGATAGGTGCCTATATGTGATGGATGGGTGGACAGCCCGACAGATGAAAAATGGACAATTATGAGGAGGGGAGAGTGCAGACAGGGGAAGCTTCACCTCCTTTACAATTTTGGGAGTCCACACGGCATGGCATACAAATTATTTCATTCCCATTGAGAAATAAAATCCAATTCTCCATCACCAAGAGAGCCTTCCGAAAGAGGCCCCCCTGGGCAAACGGCCACCGATGGAGAGGTCTGCCAGTCCTCTTCTACCCCACCCACGCCCCCACCCTAATCAGAGGCCAAACCCTTCCTGGAGCCTGTGATAAAAGCAACTGTTAGCTTGCACTAGACTAGCTTCAAAGTTGTATTGACCCTGGTGTGTTATGTCTAAGAGTAGATGCCATATCTCTTTTCTGGCCTATGTTATTACCTGTATGGACTTTGCACTGGAATCAGCTATCTGCTCTTACTTATGCACACCTGGGGCATAGAGCCAGCCCTGTATCGCTTTTCAGCCATCTCACTACAGATAACTCCCAAGTCCTGTCTAGCTGCCTTCCTTATCACAGGAATAGCACCCAAGGTCCATCAGTAC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000014-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000014-a-1",
        "variant_count": 2062,
        "experiment": "urn:mavedb:00000014-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2021-01-12",
        "modification_date": "2021-01-12",
        "urn": "urn:mavedb:00000055-b-1",
        "publish_date": "2021-01-12",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment identified candidate pharmacogenomic variants in NUDT15 that could contribute to thiopurine toxicity. Two functional assays were performed on the same variant library, one for NUDT15 activity and one for protein stability.",
        "method_text": "Barcode-variant counts were used as input for the ABSSeq RNA-sequencing analysis pipeline (Yang et al., 2016). Variant scores are based on the fold change in variant frequency between drug treated and untreated populations.\r\n\r\nReported are the non-normalized average score and standard deviation for each missense change, but the nature of the averaging (e.g. distinct codons with synonymous consequences or replicate assays) was not specified.",
        "short_description": "NUDT15 activity scores measured by a thiopurine-cytotoxicity-based screen.",
        "title": "NUDT15 activity scores",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "32094176",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32094176",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "27488180",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/27488180",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "NUDT15",
            "reference_sequence": {
                "sequence": "ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "Q9NV35",
                "url": "http://purl.uniprot.org/uniprot/Q9NV35",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000055-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000055-b-1",
        "variant_count": 3100,
        "experiment": "urn:mavedb:00000055-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-29",
        "modification_date": "2019-07-29",
        "urn": "urn:mavedb:00000038-b-1",
        "publish_date": "2019-07-29",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment used the same library of ubiquitin variants as previously described in Roscoe et al. 2013. The ubiquitin variants were displayed as C-terminal fusions with Aga2-HA and reacted with E1. Bound cells were isolated using FACS and deep sequencing was used to quantify enrichment or depletion of cells in the bound population.\r\n\r\nThis entry describes the first of two regions assayed with excess E1, covering ubiquitin positions 40-48.",
        "method_text": "Scores were calculated using the EMPIRIC log ratios approach, using the ratio of E1-reactive cells to HA-displaying cells. Scores were normalized such that the average wild type synonymous variant was 1 and the average nonsense mutation was 0. The raw $log_2$ ratio is included as an extra column. The \"Std\" column lists the standard deviation calculated from multiple measurements of synonymous codons. This value is `nan` when only one codon was measured.",
        "short_description": "Amino acid scores for deep mutational scan of ubiquitin in a yeast display binding assay to E1 with excess E1. Region 1 (positions 40-48).",
        "title": "Ubiquitin-E1 yeast display amino acid scores, excess E1, region 1",
        "keywords": [
            {
                "text": "ubiquitin"
            },
            {
                "text": "E1"
            },
            {
                "text": "yeast display"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "Binding"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24862281",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24862281",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23376099",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23376099",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ubiquitin",
            "reference_sequence": {
                "sequence": "CAACAAAGATTGATCTTTGCTGGTAAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 39,
                "identifier": "P0CG63",
                "url": "http://purl.uniprot.org/uniprot/P0CG63",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000038-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "log2 (E1react/display)",
            "Std"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000038-b-1",
        "variant_count": 188,
        "experiment": "urn:mavedb:00000038-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-29",
        "modification_date": "2019-07-29",
        "urn": "urn:mavedb:00000037-a-1",
        "publish_date": "2019-07-29",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study performed a bulk growth competition experiment measuring the effect of point mutations on ubiquitin structure and function. The results were consistent with previous studies of individual mutants as well as Alanine-scanning. Positions were clustered by their tolerance of mutations and highly sensitive regions were mapped.",
        "method_text": "Scores were calculated using EMPIRIC [Hietpas et al.]. Scores for each amino acid substitution are provided. The \"Std\" column lists the standard deviation calculated from multiple measurements of synonymous codons. This value is `nan` when only one codon was measured.\r\n\r\nVariants that were severe loss of function or low plasmid count (and therefore not scored) were excluded from this dataset.",
        "short_description": "EMPIRIC amino acid scores for deep mutational scan of ubiquitin and its effect on yeast growth.",
        "title": "Ubiquitin EMPIRIC amino acid scores",
        "keywords": [
            {
                "text": "ubiquitin"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "21464309",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/21464309",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23376099",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23376099",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ubiquitin",
            "reference_sequence": {
                "sequence": "CAAATTTTCGTCAAGACTTTAACCGGTAAGACTATTACCCTGGAAGTTGAATCTTCTGACACTATTGACAATGTCAAGTCCAAGATCCAAGACAAGGAAGGTATTCCACCTGACCAACAAAGATTGATCTTTGCTGGTAAGCAATTGGAAGATGGTAGAACTTTGTCCGACTACAACATCCAAAAGGAATCTACTCTACACTTGGTCTTGAGATTGAGAGGTGGT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P0CG63",
                "url": "http://purl.uniprot.org/uniprot/P0CG63",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000037-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "Std"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000037-a-1",
        "variant_count": 1343,
        "experiment": "urn:mavedb:00000037-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-b-2",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score.\r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A Deep Mutational Scan of the human SUMO1 using functional complementation in yeast via DMS-TileSeq",
        "title": "SUMO1 DMS-TileSeq",
        "keywords": [
            {
                "text": "sumoylation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SUMO1",
            "reference_sequence": {
                "sequence": "ATGTCTGACCAGGAGGCAAAACCTTCAACTGAGGACTTGGGGGATAAGAAGGAAGGTGAATATATTAAACTCAAAGTCATTGGACAGGATAGCAGTGAGATTCACTTCAAAGTGAAAATGACAACACATCTCAAGAAACTCAAAGAATCATACTGTCAAAGACAGGGTGTTCCAATGAATTCACTCAGGTTTCTCTTTGAGGGTCAGAGAATTGCTGATAATCATACTCCAAAAGAACTGGGAATGGAGGAAGAAGATGTGATTGAAGTTTATCAGGAACAAACGGGGGGTCATTCAACAGTTTAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P63165",
                "url": "http://purl.uniprot.org/uniprot/P63165",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000116030",
                "url": "http://www.ensembl.org/id/ENSG00000116030",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 149,
                "identifier": "NM_001005781.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_001005781.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-b-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-b-2",
        "variant_count": 4698,
        "experiment": "urn:mavedb:00000001-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-01-24",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000008-a-2",
        "publish_date": "2019-01-24",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "chr": "7",
            "end": 29161744,
            "start": 29161443,
            "reference": "mm9"
        },
        "abstract_text": "This study described the functional consequence of over 100,000 enhancer variants *in vivo* in mouse liver. Two human enhancers (ALDOB, ECR11) and one mouse enhancer (LTV1) were known to be active in mouse liver and therefore variants in these enhancers should show a measurable difference in transcription. The results were broadly consistent with evolutionary data and transcription factor binding sites, but were not always concordant demonstrating the importance of measuring the effect of enhancer variants directly.\r\n\r\nThis MaveDB entry describes the LTV1 enhancer data. Datasets for other enhancers described in the same publication are also available: [ALDOB](https://www.mavedb.org/experiment/urn:mavedb:00000006-a/) [ECR11](https://www.mavedb.org/experiment/urn:mavedb:00000007-a/)",
        "method_text": "Scores were calculated using a trivariate linear regression model. A separate model was built for each position in the enhancer, with a predictor for each possible variant nucleotide at that position.\r\n\r\nBecause most enhancer haplotypes in the LTV1 dataset had multiple tags, the data were normalized by dividing the total number of counts for a given haplotype by the number of tags for that haplotype.\r\n\r\nThe scores presented are therefore a combination of the effects of each individual variant on diverse enhancer haplotype backgrounds.\r\n\r\nSee metadata (available via download button) for wild type genomic coordinates in JSON format.",
        "short_description": "Trivariate regression scores for each nucleotide change as described in Patwardhan et al. 2012.",
        "title": "Trivariate regression scores for LTV1 replicate 2 of 2",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "doped oligo synthesis"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "liver"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22371081",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22371081",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LTV1 enhancer",
            "reference_sequence": {
                "sequence": "CTTTGGGTGACCCCTGACCCTGGCCGCCTGGGCTCGCCTTCCCGCACATTCCGTCCTCGCCGCCCCGCCCCACCCCGCCCTCCTTCCTTGGCCCTGTGGGGACGGAAACATCCCGTTCCTGCCCAAGCTGGGTCAAGAGCCGGAGGGACAGGACCAGAGCACCCCTTACGCCAGAACTAGCTCTCCTTGTTCCTACTGGGTGACCTCATCTCGCCACGCCTCCTCAGGTGAACACCCGGGCTGGTAACGTCACTTCCTGCCAGGTAAGCGCCCCCAGGCAGCACTGCTCACGGAAAGGTCTG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "mm9",
                        "organism_name": "Mus musculus",
                        "assembly_identifier": {
                            "identifier": "GCF_000001635.18",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.18",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000008-a-2",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "pvalue"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000008-a-2",
        "variant_count": 906,
        "experiment": "urn:mavedb:00000008-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-04-14",
        "urn": "urn:mavedb:00000061-g-1",
        "publish_date": "2021-04-14",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors generated a library of RAF variant and use the PACS system to test KRAS4b/RAF protein-protein interaction (PPI). The experimental data revealed positions along the binding interface as well as which substitutions are tolerated at each position.",
        "method_text": "Samples are collected after 24h and sequenced by Illumina. The counts for each variant is first added by 1, then divided by total sequence counts at this time point to calculate variant frequency. The functional score equals the division of a variant frequency at this time point and its frequency in initial library. Further normalizing the functional scores by wild type scores will give the relative enrichment values. The score data includes scores from three replicates which are suffixed by: _rep1, _rep2 & _rep3. The final score is the median of them.",
        "short_description": "Measuring the interaction of mutated RAF to RAS by a new phage-assisted continuous selection (PACS) system.",
        "title": "RAF variant selected after 24h",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.1021/acschembio.9b00669",
                "url": "https://doi.org/10.1021/acschembio.9b00669",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31808666",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31808666",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "RAF",
            "reference_sequence": {
                "sequence": "TCTAAGACAAGCAACACTATCCGTGTTTTCTTGCCGAACAAGCAAAGAACAGTGGTCAATGTGCGAAATGGAATGAGCTTGCATGACTGCCTTATGAAAGCACTCAAGGTGAGGGGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 51,
                "identifier": "P04049",
                "url": "http://purl.uniprot.org/uniprot/P04049",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000061-g-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "score_rep1",
            "score_rep2",
            "score_rep3"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000061-g-1",
        "variant_count": 298,
        "experiment": "urn:mavedb:00000061-g",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-d-1",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.",
        "short_description": "A Deep Mutational Scan of human TPK1 using functional complementation in yeast via DMS-TileSeq.",
        "title": "TPK1 DMS-TileSeq",
        "keywords": [
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TPK1",
            "reference_sequence": {
                "sequence": "ATGGAGCATGCCTTTACCCCGTTGGAGCCCCTGCTTTCCACTGGGAATTTGAAGTACTGCCTTGTAATTCTTAATCAGCCTTTGGACAACTATTTTCGTCATCTTTGGAACAAAGCTCTTTTAAGAGCCTGTGCCGATGGAGGTGCCAACCGCTTATATGATATCACCGAAGGAGAGAGAGAAAGCTTTTTGCCTGAATTCATCAATGGAGACTTTGATTCTATTAGGCCTGAAGTCAGAGAATACTATGCTACTAAGGGATGTGAGCTCATTTCAACTCCTGATCAAGACCACACTGACTTTACTAAGTGCCTTAAAATGCTCCAAAAGAAGATAGAAGAAAAAGACTTAAAGGGAAAGCACAGGTTGCATGTAGACACTGGAATGGAGGGTGATTGGTGTGGCCTTATTCCTGTTGGACAGCCTTGTATGCAGGTTACAACCACAGGCCTCAAGTGGAACCTCACAAATGATGTGCTTGCTTTTGGAACATTGGTCAGTACTTCCAATACCTACGACGGGTCTGGTGTTGTGACTGTGGAAACTGACCACCCACTCCTCTGGACCATGGCCATCAAAAGCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "Q9H3S4",
                "url": "http://purl.uniprot.org/uniprot/Q9H3S4",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000196511",
                "url": "http://www.ensembl.org/id/ENSG00000196511",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 104,
                "identifier": "NM_022445.3",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_022445.3",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-d-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-d-1",
        "variant_count": 9542,
        "experiment": "urn:mavedb:00000001-d",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-10-14",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000004-a-1",
        "publish_date": "2018-12-02",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study identified multiple gain-of-function mutations in the ubiquitination factor E4B U-box domain by measuring auto-ubiquitination in a phage display system. E4B is an E3 ligase, responsible for transferring a ubiquitin from an E2 ligase to the target (in this case the phage displaying E4B). Many of these mutations are not specific to one E2 enzyme and so may be generalizable for mutiple E2s and E3s.",
        "method_text": "Scores were calculated using the Enrich software package, corrected for nonspecific carryover as described in (1). Uncorrected log2 enrichment ratios are also provided. This dataset used only the variant counts in the input and round 3 selected time points as the basis for the log ratio scoring.",
        "short_description": "Scores as presented in Starita et al. 2013, analyzed using the Enrich software package.",
        "title": "Enrich scores for E4B",
        "keywords": [
            {
                "text": "U-box"
            },
            {
                "text": "E3"
            },
            {
                "text": "Phage display"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "ubiquitin"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22006916",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22006916",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23509263",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23509263",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2870-5099"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "E4B",
            "reference_sequence": {
                "sequence": "ATAGAGAAGTTTAAACTTCTTGCAGAGAAAGTGGAGGAAATCGTGGCAAAGAATGCGCGGGCAGAAATAGACTACAGCGATGCCCCGGACGAGTTCAGAGACCCTCTGATGGACACCCTGATGACCGATCCCGTGAGACTGCCCTCTGGCACCGTCATGGACCGTTCTATCATCCTGCGGCATCTGCTCAACTCCCCCACCGACCCCTTCAACCGCCAGATGCTGACTGAGAGCATGCTGGAGCCAGTGCCAGAGCTAAAGGAGCAGATTCAGGCCTGGATGAGAGAGAAACAGAGCAGTGACCACTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1071,
                "identifier": "Q9ES00",
                "url": "http://purl.uniprot.org/uniprot/Q9ES00",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": {
                "offset": 3939,
                "identifier": "NM_022022.3",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_022022.3",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "mm10",
                        "organism_name": "Mus musculus",
                        "assembly_identifier": {
                            "identifier": "GCF_000001635.20",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.20",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000004-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "log2_ratio"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000004-a-1",
        "variant_count": 98298,
        "experiment": "urn:mavedb:00000004-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-01-17",
        "urn": "urn:mavedb:00000057-a-1",
        "publish_date": "2021-01-17",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors used saturation mutagenesis to study the variant effect of Ras, without the regulation of GTPase activating protein (GAP) or guanine nucleotide exchange factor (GEF). The variants were selected by bacterial two-hybrid strategy.",
        "method_text": "The frequency of each mutant was determined through sequencing. The natural logarithm of the frequency ratio for each mutant after and before selection was calculated. These values were then normalized by dividing the wild type values which were calculated the same way.",
        "short_description": "Selection result of Ras mutants expressed without the GAP or the GEF",
        "title": "Unregulated-Ras",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28686159",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28686159",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ras",
            "reference_sequence": {
                "sequence": "ACGGAATATAAGCTGGTGGTGGTGGGCGCCGGCGGTGTGGGCAAGAGTGCGCTGACCATCCAGCTGATCCAGAACCATTTTGTGGACGAATACGACCCCACTATAGAGGATTCCTACCGGAAGCAGGTGGTCATTGATGGGGAGACGTGCCTGTTGGACATCCTGGATACCGCCGGCCAGGAGGAGTACAGCGCCATGCGGGACCAGTACATGCGCACCGGGGAGGGCTTCCTGTGTGTGTTTGCCATCAACAACACCAAGTCTTTTGAGGACATCCACCAGTACAGGGAGCAGATCAAACGGGTGAAGGACTCGGATGACGTGCCCATGGTGCTGGTGGGGAACAAGTGTGACCTGGCTGCACGCACTGTGGAATCTCGGCAGGCTCAGGACCTCGCCCGAAGCTACGGCATCCCCTACATCGAGACCTCGGCCAAGACCCGGCAGGGAGTGGAGGATGCCTTCTACACGTTGGTGCGTGAGATCCGGCAGCAC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P01112",
                "url": "http://purl.uniprot.org/uniprot/P01112",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000057-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000057-a-1",
        "variant_count": 3300,
        "experiment": "urn:mavedb:00000057-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2021-01-12",
        "modification_date": "2021-01-12",
        "urn": "urn:mavedb:00000055-a-1",
        "publish_date": "2021-01-12",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment identified candidate pharmacogenomic variants in NUDT15 that could contribute to thiopurine toxicity. Two functional assays were performed on the same variant library, one for NUDT15 activity and one for protein stability.",
        "method_text": "Barcode-variant counts were converted to variant counts and transformed to scores using a weighted average of counts in FACS bins as described by Matreyek et al. 2018.\r\n\r\nReported are the non-normalized average score and standard deviation for each missense change, but the nature of the averaging (e.g. distinct codons with synonymous consequences or replicate assays) was not specified.",
        "short_description": "NUDT15 protein stability scores measured by VAMP-seq.",
        "title": "NUDT15 protein stability scores",
        "keywords": [
            {
                "text": "VAMP-seq"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29785012",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29785012",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "32094176",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/32094176",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "NUDT15",
            "reference_sequence": {
                "sequence": "ATGACGGCCAGCGCACAGCCGCGCGGGCGGCGGCCAGGAGTCGGAGTCGGAGTCGTGGTGACCAGCTGCAAGCATCCGCGTTGCGTCCTCCTGGGGAAGAGGAAAGGCTCGGTTGGAGCTGGCAGTTTCCAACTCCCTGGAGGTCATCTGGAGTTCGGTGAAACCTGGGAAGAATGTGCTCAAAGGGAAACCTGGGAAGAAGCAGCTCTTCACCTGAAAAATGTTCACTTTGCCTCAGTTGTGAATTCTTTCATTGAGAAGGAGAATTACCATTATGTTACTATATTAATGAAAGGAGAAGTGGATGTGACTCATGATTCAGAACCAAAGAATGTAGAGCCTGAAAAAAATGAAAGTTGGGAGTGGGTTCCTTGGGAAGAACTACCTCCCCTGGACCAGCTTTTCTGGGGACTGCGTTGTTTAAAAGAACAAGGCTATGATCCATTTAAAGAAGATCTGAACCATCTGGTGGGATACAAAGGAAATCATCTCTAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "Q9NV35",
                "url": "http://purl.uniprot.org/uniprot/Q9NV35",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000055-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000055-a-1",
        "variant_count": 3100,
        "experiment": "urn:mavedb:00000055-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-d-2",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Random-Forest-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A machine-learning imputed and refined Deep Mutational Scan of human TPK1 using functional complementation in yeast.",
        "title": "TPK1 imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TPK1",
            "reference_sequence": {
                "sequence": "ATGGAGCATGCCTTTACCCCGTTGGAGCCCCTGCTTTCCACTGGGAATTTGAAGTACTGCCTTGTAATTCTTAATCAGCCTTTGGACAACTATTTTCGTCATCTTTGGAACAAAGCTCTTTTAAGAGCCTGTGCCGATGGAGGTGCCAACCGCTTATATGATATCACCGAAGGAGAGAGAGAAAGCTTTTTGCCTGAATTCATCAATGGAGACTTTGATTCTATTAGGCCTGAAGTCAGAGAATACTATGCTACTAAGGGATGTGAGCTCATTTCAACTCCTGATCAAGACCACACTGACTTTACTAAGTGCCTTAAAATGCTCCAAAAGAAGATAGAAGAAAAAGACTTAAAGGGAAAGCACAGGTTGCATGTAGACACTGGAATGGAGGGTGATTGGTGTGGCCTTATTCCTGTTGGACAGCCTTGTATGCAGGTTACAACCACAGGCCTCAAGTGGAACCTCACAAATGATGTGCTTGCTTTTGGAACATTGGTCAGTACTTCCAATACCTACGACGGGTCTGGTGTTGTGACTGTGGAAACTGACCACCCACTCCTCTGGACCATGGCCATCAAAAGCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "Q9H3S4",
                "url": "http://purl.uniprot.org/uniprot/Q9H3S4",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000196511",
                "url": "http://www.ensembl.org/id/ENSG00000196511",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 104,
                "identifier": "NM_022445.3",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_022445.3",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-d-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se",
            "exp.score",
            "exp.sd",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-d-2",
        "variant_count": 4860,
        "experiment": "urn:mavedb:00000001-d",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-6",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 64 hours of selection.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, SEL_C_64h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "DNA-binding"
            },
            {
                "text": "Yeast two-hybrid"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-6",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-6",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-14",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000012-a-4",
        "publish_date": "2019-02-18",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment demonstrated the programmed allelic series (PALS) method for site-directed mutagenesis using microarrays. The impact of nearly all singleton missense mutation in the Gal4 yeast transcription factor was measured in multiple selections.",
        "method_text": "Variant counts were calculated by summing the read counts of barcodes associated with each variant. The enrichment score for each variant is the $\\log_2$ ratio of the mutant count over the wild type count for the selected time point minus the $\\log_2$ ratio of the mutant count over the wild type count for the input time point (ratio of ratios).",
        "short_description": "Deep mutational scan of Gal4 DNA-binding domain using a yeast growth assay. 40 hours of selection.",
        "title": "Deep mutational scan of Gal4 DNA-binding domain, SEL_B_40h",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "log ratios"
            },
            {
                "text": "DNA-binding"
            },
            {
                "text": "Yeast two-hybrid"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "25559584",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/25559584",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gal4",
            "reference_sequence": {
                "sequence": "AAGCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCGCCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAAGACTGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P04386",
                "url": "http://purl.uniprot.org/uniprot/P04386",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000012-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000012-a-4",
        "variant_count": 1319,
        "experiment": "urn:mavedb:00000012-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-19",
        "modification_date": "2020-06-19",
        "urn": "urn:mavedb:00000044-b-2",
        "publish_date": "2020-06-19",
        "created_by": "0000-0001-6713-6904",
        "modified_by": "0000-0001-6713-6904",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "Full methods provided in the [preprint](https://www.biorxiv.org/content/10.1101/2020.06.17.157982v1) and full analysis provided in the [GitHub repo](https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS).\r\n\r\n`score` column is the change in log<sub>10</sub>(_K_<sub>D,app</sub>) relative to the average wildtype binding, polarized such that a positive score indicates improved binding, and negative indicates reduced binding affinity. The `library` column indicates which of our two duplicate mutant libraries a measurement is from. The `average` column is the average mutation effect from the duplicate libraries. The values in the `average` column were used in the paper analysis.",
        "short_description": "RBD mutation and expression score for each amino acid mutation",
        "title": "per-single-mutant expression score",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-6713-6904",
            "0000-0001-9278-3644",
            "0000-0003-1267-3408"
        ],
        "licence": {
            "long_name": "Other - See Data Usage Guidelines",
            "short_name": "Other - See Data Usage Guidelines",
            "link": "",
            "version": "1.0"
        },
        "target": {
            "name": "SARS-CoV-2 receptor binding domain",
            "reference_sequence": {
                "sequence": "AATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000044-b-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "average",
            "library"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000044-b-2",
        "variant_count": 8040,
        "experiment": "urn:mavedb:00000044-b",
        "is_meta_analysis": false,
        "data_usage_policy": "See licence from GitHub: https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS/blob/master/LICENSE.md\r\n\r\nBSD 3-Clause License\r\n\r\nCopyright (c) 2020, Tyler N. Starr, Allison J. Greaney, and Jesse D. Bloom\r\nAll rights reserved.\r\n\r\nRedistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n1. Redistributions of source code must retain the above copyright notice, this\r\n   list of conditions and the following disclaimer.\r\n\r\n2. Redistributions in binary form must reproduce the above copyright notice,\r\n   this list of conditions and the following disclaimer in the documentation\r\n   and/or other materials provided with the distribution.\r\n\r\n3. Neither the name of the copyright holder nor the names of its\r\n   contributors may be used to endorse or promote products derived from\r\n   this software without specific prior written permission."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000017-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "22",
            "end": 19723650,
            "start": 19723266,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of GP1BB promoter in HEL 92.1.7 cells.",
        "title": "Saturation mutagenesis MPRA of GP1BB promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "GP1BB promoter",
            "reference_sequence": {
                "sequence": "GTTGTGAATGCCGCGTCCTGTCCTGGTGACAGGAGAACAATGTTGGTGAACGTCGCAGCGGGTGTCCGAGTGCTCCGTGTGCCCCTGAGAGCGGGTGGGAGCGGAAGCCTGAGCGGCCTGCGGCCTCCGGCGATAGTGTGCTATCTGCCGCTGCAGCGCGCGTCCGCGCGGCCTCTGGGCTATTTCTGGCCAGGCCGCAGCACTGTGGTCGGTGCGGGCGTGGCAGGGGCGGGGCGGCCTTATCGCTCGGCTCTCCCGCCTACGCCTCCCGCTGCAGAGTAAGCCGGGCTGCCGTCTTCTCGCCATGGGCTCCGGTGAGTCTGGAGTCCGGTCGGGCCCCCGGCTGCTCCCTAGGCCGACCCGGGTTGAGAGGAGCTCTGGTCGT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000017-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000017-a-1",
        "variant_count": 1268,
        "experiment": "urn:mavedb:00000017-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-10-04",
        "modification_date": "2019-10-22",
        "urn": "urn:mavedb:00000005-a-3",
        "publish_date": "2019-10-22",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee Sun et al 2018",
        "method_text": "Scoring procedure:\r\nDMS-TileSeq reads were processed using the tileseq_package and tilsesqMave softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Gradient boosted tree-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee Sun et al 2018 for more details.",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at high levels of Vitamin B6.",
        "title": "CBS high-B6 imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": "urn:mavedb:00000005-a-2",
        "next_version": null,
        "current_version": "urn:mavedb:00000005-a-3",
        "variant_count": 11550,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-i-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Rapamycin",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-i-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-i-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-i",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2020-12-10",
        "urn": "urn:mavedb:00000052-b-1",
        "publish_date": "2020-12-10",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study generated thousands of mutated sequence on Gcn4 activation domain which contains one or several amino acid substitutions. They used a high-throughput fluorescent reporter assay to identify the transcription ability of mutated Gcn4 sequences through cell sorting in nutrient stress condition.",
        "method_text": "The variant abundance in each bin is determined by high-throughput sequencing to the barcodes in an Illumina platform. The reads are first normalized by the total reads in each bin. A weighted mean value is calculated through the median GFP/mCherry ratio in each bin. This mean value is further divided by wildtype median value and logarithm (base 2) transformed. The induction ratio is the activity under starvation (Raw_Starvation) divided by the activity (Raw_Replicate_1) in complete media. Count data includes 8 bins for experiments in complete media, starvation media and sorted on mCherry only.\r\n\r\nBecause each barcode was scored individually and reported in the supplement, there are multiple scores for each variant. Each of these is the score from one barcode.",
        "short_description": "Mutagenesis study of Gcn4 activation domain in starvation media using fluorescent reporter assay.",
        "title": "Gcn4 activation domain induction ratio",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29525204",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29525204",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Gcn4",
            "reference_sequence": {
                "sequence": "AGCACTGATTCAACTCCAATGTTTGAGTATGAAAACCTAGAAGACAACTCTAAAGAATGGACATCCTTGTTTGACAATGACATTCCAGTTACCACTGACGATGTTTCATTGGCTGATAAGGCAATTGAATCC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 100,
                "identifier": "P03069",
                "url": "http://purl.uniprot.org/uniprot/P03069",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000052-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "Raw_Replicate_1",
            "Raw_Starvation"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "CompleteRep1_Counts_Bin1_ratio85574",
            "CompleteRep1_Counts_Bin2_ratio24083",
            "CompleteRep1_Counts_Bin3_ratio26171",
            "CompleteRep1_Counts_Bin4_ratio45347",
            "CompleteRep1_Counts_Bin5_ratio68588",
            "CompleteRep1_Counts_Bin6_ratio100275",
            "CompleteRep1_Counts_Bin7_ratio163927",
            "CompleteRep1_Counts_Bin8_ratio262143",
            "AAstarvation_Counts_Bin1_ratio104362",
            "AAstarvation_Counts_Bin2_ratio27008",
            "AAstarvation_Counts_Bin3_ratio31517",
            "AAstarvation_Counts_Bin4_ratio57827",
            "AAstarvation_Counts_Bin5_ratio83988",
            "AAstarvation_Counts_Bin6_ratio122734",
            "AAstarvation_Counts_Bin7_ratio186622",
            "AAstarvation_Counts_Bin8_ratio262143",
            "mCherry_Counts_Bin1_mCherryfluor80",
            "mCherry_Counts_Bin2_mCherryfluor203",
            "mCherry_Counts_Bin3_mCherryfluor310",
            "mCherry_Counts_Bin4_mCherryfluor408",
            "mCherry_Counts_Bin5_mCherryfluor515",
            "mCherry_Counts_Bin6_mCherryfluor651",
            "mCherry_Counts_Bin7_mCherryfluor840",
            "mCherry_Counts_Bin8_mCherryfluor1321"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000052-b-1",
        "variant_count": 6500,
        "experiment": "urn:mavedb:00000052-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000032-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "2",
            "end": 161238998,
            "start": 161238408,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of UC88 (ultraconserved element) enhancer in Neuro-2a cells.",
        "title": "Saturation mutagenesis MPRA of UC88 enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "UC88 enhancer",
            "reference_sequence": {
                "sequence": "ATACAGATAAATGCACACATGTATACGCGAAAGGTTAACTCGGCGGAGGACTCGCCCAAATAAGCACCGGGATTGCATTTAAAATAATAATAATAAATAAATAAATAAACTAGGAAGGAAAGCGGGGGGAGGGAAGCAGAAGTCGGGAAGAAAAGAGAAAAGCAGCAGGCTGATTACGAGGTGTCAAAACTGCCAGGAGCAAGAAGGTGATAGCAATCAGGGGTGAGAAGAGTGCGGCATTCGTGCGGGGCAACTAATTATCCGTCTCATTTGAGAAGAGCAGCATTTGAGGCAGCAGCGTTCGCCTGCTGAACGGTGACAGATTGGCGCGGAGGAGAGGGGAGGTGTTAAAACAATGGAGCCGGGCGCGCGAGCGCTGCTGCATGCTAATCAGCCCTCCCTCCGCCTGCCTGCCGCGCTCCCTCCTTCCTCCCGGCCTCCCTCCTCCGCGCTCCCTCCTCCCGCCTGCGGCGCTCCCTCCTTTCCAGCGGGCCCCGCGCCGCCGCCGCCACCCGCTTCCTGCTCCCTCGCTTTCCCGCGCGTCCTTCCCGCCGCTGGCGAGTGGAACCCAGCCACCGCCACCGAGTCCCA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000032-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000032-a-1",
        "variant_count": 1964,
        "experiment": "urn:mavedb:00000032-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-12",
        "urn": "urn:mavedb:00000031-d-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "chr": "5",
            "end": 1295247,
            "start": 1294989,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of TERT promoter in glioblastoma SF7996 (GBM) cells, siRNA scrambled control.",
        "title": "Saturation mutagenesis MPRA of TERT promoter, GBM siRNA control",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TERT promoter",
            "reference_sequence": {
                "sequence": "TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000031-d-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000031-d-1",
        "variant_count": 974,
        "experiment": "urn:mavedb:00000031-d",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000020-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "20",
            "end": 44355804,
            "start": 44355520,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of HNF4A promoter in HEK293T cells.",
        "title": "Saturation mutagenesis MPRA of HNF4A promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HNF4A promoter",
            "reference_sequence": {
                "sequence": "CCCCAGAGTGCAGGACTAGGACCCGAGTGGACCTCAGGTCTGGCCAGGTCGCCATTGCCATGGAGACAGCAACAGTCCCCAGCCGCGGGTTCCCTAAGTGACTGGTTACTCTTTAACGTATCCACCCACCTTGGGTGATTAGAAGAATCAATAAGATAACCGGGCGGTGGCAGCTGGCCGCACTCACCGCCTTCCTGGTGGACGGGCTCCTGGTGGCTGTGCTGCTGCTGTGAGCGGGCCCCTGCTCCTCCATGCCCCCAGCTCTCCGGCTGGGTGGGCTTGGCC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000020-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000020-a-1",
        "variant_count": 977,
        "experiment": "urn:mavedb:00000020-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-01-24",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000007-a-1",
        "publish_date": "2019-01-24",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "chr": "2",
            "end": 169939701,
            "start": 169939082,
            "reference": "hg19"
        },
        "abstract_text": "This study described the functional consequence of over 100,000 enhancer variants *in vivo* in mouse liver. Two human enhancers (ALDOB, ECR11) and one mouse enhancer (LTV1) were known to be active in mouse liver and therefore variants in these enhancers should show a measurable difference in transcription. The results were broadly consistent with evolutionary data and transcription factor binding sites, but were not always concordant demonstrating the importance of measuring the effect of enhancer variants directly.\r\n\r\nThis MaveDB entry describes the ECR11 enhancer data. Datasets for other enhancers described in the same publication are also available: [ALDOB](https://www.mavedb.org/experiment/urn:mavedb:00000006-a/) [LTV1](https://www.mavedb.org/experiment/urn:mavedb:00000008-a/)",
        "method_text": "Scores were calculated using a trivariate linear regression model. A separate model was built for each position in the enhancer, with a predictor for each possible variant nucleotide at that position.\r\n\r\nThe scores presented are therefore a combination of the effects of each individual variant on diverse enhancer haplotype backgrounds.\r\n\r\nSee metadata (available via download button) for wild type genomic coordinates in JSON format.",
        "short_description": "Trivariate regression scores for each nucleotide change as described in Patwardhan et al. 2012.",
        "title": "Trivariate regression scores for ECR11",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "doped oligo synthesis"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "liver"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22371081",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22371081",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ECR11 enhancer",
            "reference_sequence": {
                "sequence": "CTCTGAAGCTCAAAAGCAATGATTTGATAAGGCTTCGATTTTTAACACTTGAATTCCAACACCTTTAAAAATACTAAATGTTTCCCATTTTAAACAAGCCAAGTGAATGACTGAATTCTTAACCAAAAATAAATGTGAAGTAGATTGATATCACTCTTTGTCCATACAGAACATTATATAAATATTCTCTGGCCTTACTATCTAGCAAGGCAGGAAAAATAGATCAATTTGTTCTCACTCATAGGTGGGAATTGAACAATGAGAACACATGGACACAGGAAGGGGAACATCACACATCGGGGCCTGTTGTGGGGTGGGGGGAGGGGGGAGGGATAGCATTAGGAGATATATCTAACGTTAAATGACGTGTTAATGGGAGCAGCACACCAACATGGCACATGTATACATATGTAACAAACTGCATGTTGTGCACATGTACCCTAAAACTTAAAGTATAATAAGAAAAATAGATCAATTTACTCTACATCTGAGATTAAAAAGCAGAAAGACTCACTCACAGAGTTTCAGTATTTGACATTCAGAACCAGAAATAGAGTAACAGCGAGAACTTGAACTATTTCAGTTTAGCCTCCCACCCTCTCTGCTATCACTTCCCAAAA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg19",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.13",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.13",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000007-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "pvalue"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000007-a-1",
        "variant_count": 1860,
        "experiment": "urn:mavedb:00000007-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-07",
        "urn": "urn:mavedb:00000039-a-3",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effects of yeast HSP90 variants under the control of different promoters to explore the relationship between protein sequence and expression level. The results showed that reduced expression level (compared to wild-type expression) revealed new partial loss of function mutations.",
        "method_text": "Growth rates were calculated for each variant and converted into selection coefficients. The selection coefficient for each variant under control of this promoter/UTR combination is reported as the score. For variants with multiple synonymous codons, the reported coefficient is the average of all synonymous variant's selection coefficients.\r\n\r\nVariants annotated as \"null-like\" have a score of -1.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae under the control of the CYC promoter with CYC 3'UTR.",
        "title": "Deep mutational scan of HSP90, CYC construct",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "promoter"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "23825969",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23825969",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000039-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000039-a-3",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000039-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-07-29",
        "modification_date": "2019-07-29",
        "urn": "urn:mavedb:00000038-a-1",
        "publish_date": "2019-07-29",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effect of single amino acid changes in ubiquitin on binding to E1 (Uba1). Combined with work from a previous study on the effect of ubiquitin mutations on yeast growth rate, this showed that ubiquitin-E1 binding was not linearly related to growth rate and that mutations in ubiquitin are likely to affect multiple function that modulate growth rate.\r\n\r\nThis experiment was performed in limiting E1 conditions (as opposed to excess E1).",
        "method_text": "Scores were calculated using the EMPIRIC log ratios approach, using the ratio of E1-reactive cells to HA-displaying cells. Scores were normalized such that the average wild type synonymous variant was 1 and the average nonsense mutation was 0. The raw $log_2$ ratio is included as an extra column. The \"Std\" column lists the standard deviation calculated from multiple measurements of synonymous codons. This value is `nan` when only one codon was measured.",
        "short_description": "Amino acid scores for deep mutational scan of ubiquitin in a yeast display binding assay to E1 with limiting E1.",
        "title": "Ubiquitin-E1 yeast display amino acid scores, limiting E1",
        "keywords": [
            {
                "text": "ubiquitin"
            },
            {
                "text": "E1"
            },
            {
                "text": "yeast display"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "Binding"
            },
            {
                "text": "FACS"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24862281",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24862281",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23376099",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23376099",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ubiquitin",
            "reference_sequence": {
                "sequence": "CAAATTTTCGTCAAGACTTTAACCGGTAAGACTATTACCCTGGAAGTTGAATCTTCTGACACTATTGACAATGTCAAGTCCAAGATCCAAGACAAGGAAGGTATTCCACCTGACCAACAAAGATTGATCTTTGCTGGTAAGCAATTGGAAGATGGTAGAACTTTGTCCGACTACAACATCCAAAAGGAATCTACTCTACACTTGGTCTTGAGATTGAGAGGTGGT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P0CG63",
                "url": "http://purl.uniprot.org/uniprot/P0CG63",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000038-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "log2 (E1react/display)",
            "Std"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000038-a-1",
        "variant_count": 1528,
        "experiment": "urn:mavedb:00000038-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-14",
        "modification_date": "2020-12-20",
        "urn": "urn:mavedb:00000053-a-1",
        "publish_date": "2020-12-20",
        "created_by": "0000-0002-2866-3880",
        "modified_by": "0000-0002-2866-3880",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "This contains the double mutations found in the dataset",
        "title": "Pairwise mutations in PSD95 PDZ3",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2866-3880"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "PSD95 PDZ3",
            "reference_sequence": {
                "sequence": "CCACGCCGCATCGTCATCCACCGTGGGTCAACGGGGTTAGGCTTCAATATCGTCGGTGGAGAGGATGGTGAGGGAATCTTCATCTCATTCATTCTGGCGGGAGGACCGGCCGATTTAAGCGGAGAACTTCGCAAAGGTGACCAGATCCTTTCGGTGAATGGCGTAGATTTGCGCAACGCATCACACGAACAGGCGGCCATCGCATTAAAGAACGCCGGCCAGACCGTTACGATTATCGCGCAGTATAAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 309,
                "identifier": "P78352",
                "url": "http://purl.uniprot.org/uniprot/P78352",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000053-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sigma"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "base_1",
            "base_2",
            "base_3",
            "base_4",
            "base_5",
            "base_6",
            "chlor_1",
            "chlor_2",
            "chlor_3",
            "chlor_4",
            "chlor_5",
            "chlor_6"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000053-a-1",
        "variant_count": 648022,
        "experiment": "urn:mavedb:00000053-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000048-c-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CXCR4 and to map ligand interaction sites. Cells were selected for binding to Ab 12G5.",
        "method_text": "Data obtained from selecting cells for Ab 12G5 binding was analysed using Enrich (version unspecified). Log~2~ enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence. Log~2~ enrichment ratios for two replicates were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for CXCR4 binding to Ab 12G5 in Expi293F cells.",
        "title": "CXCR4 Ab 12G5 binding",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23827138",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23827138",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CXCR4",
            "reference_sequence": {
                "sequence": "GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P61073",
                "url": "http://purl.uniprot.org/uniprot/P61073",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000048-c-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1",
            "rep2"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000048-c-1",
        "variant_count": 7021,
        "experiment": "urn:mavedb:00000048-c",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-29",
        "modification_date": "2020-06-29",
        "urn": "urn:mavedb:00000045-b-1",
        "publish_date": "2020-06-29",
        "created_by": "0000-0002-2020-2641",
        "modified_by": "0000-0002-2020-2641",
        "extra_metadata": {},
        "abstract_text": "alpha-Synuclein is a conformationally dynamic protein linked to a variety of neurodegenerative diseases, including Parkinson’s. Conformational transitions of this protein are believed to contribute to disease etiology, but the conformations that drive pathology remain unclear. To address this question, we screened an exhaustive library of alpha-synuclein missense variants for their toxicity in yeast, a well-validated cellular model for alpha-synuclein pathobiology. By examining the pattern of mutations that disrupts cellular toxicity, we were able to build a model for the structure of the toxic species.",
        "method_text": "A double-stranded DNA library based on human alpha-synuclein cDNA was produced by commercial oligonucleotide synthesis and assembly. The designed library encodes all single missense variants of alpha-synuclein, each encoded by a single codon. This library was cloned in frame with a C-terminal GFP fusion, and 26bp random barcodes were appended 3’ to the stop codon to facilitate repeated selection. This construct was cloned under control of an inducible promoter and transformed into E. coli. Following restrictive transformation, the final library diversity was ~60,000 unique clones, corresponding to ~20 barcodes per missense variant. The barcoded coding region was amplified and analyzed by long-read MiSeq in order to associate barcodes with coding sequences. The resulting lookup table expedites subsequent quantification of variant frequencies.\r\n\r\nThis plasmid library was then transformed into yeast. Selection was performed by inducing expression and collecting aliquots over time. Additional experiments were performed in yeast treated with small molecules. Finally, the expression level of each variant was estimated by cell sorting yeast cells based on the fluorescence of the GFP fusion.",
        "short_description": "The toxicity of alpha-synuclein missense variants was determined by measuring their change in frequency during yeast outgrowth",
        "title": "Deep Mutational Scanning of alpha-Synuclein based on Toxicity in Yeast Treated with Geldanamycin",
        "keywords": [
            {
                "text": "alpha-synuclein"
            },
            {
                "text": "yeast"
            },
            {
                "text": "protein folding"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1101/2020.05.01.072884",
                "url": "https://doi.org/10.1101/2020.05.01.072884",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [],
        "contributors": [
            "0000-0002-2020-2641"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "alpha-synuclein",
            "reference_sequence": {
                "sequence": "ATGGATGTATTCATGAAAGGACTTTCAAAGGCCAAGGAGGGAGTTGTGGCTGCTGCTGAGAAAACCAAACAGGGTGTGGCAGAAGCAGCAGGAAAGACAAAAGAGGGTGTTCTCTATGTAGGCTCCAAAACCAAGGAGGGAGTGGTGCATGGTGTGGCAACAGTGGCTGAGAAGACCAAAGAGCAAGTGACAAATGTTGGAGGAGCAGTGGTGACGGGTGTGACAGCAGTAGCCCAGAAGACAGTGGAGGGAGCAGGGAGCATTGCAGCAGCCACTGGCTTTGTCAAAAAGGACCAGTTGGGCAAGAATGAAGAAGGAGCCCCACAGGAAGGAATTCTGGAAGATATGCCTGTGGATCCTGACAATGAGGCTTATGAAATGCCTTCTGAGGAAGGGTATCAAGACTACGAACCTGAAGCCTAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P37840",
                "url": "http://purl.uniprot.org/uniprot/P37840",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg16",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.10",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.10",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000045-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000045-b-1",
        "variant_count": 2800,
        "experiment": "urn:mavedb:00000045-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-11-11",
        "modification_date": "2020-11-11",
        "urn": "urn:mavedb:00000048-b-1",
        "publish_date": "2020-11-11",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This experiment utilised site-saturation mutagenesis (SSM) to measure the functional consequences of mutations in the human chemokine receptor, CXCR4 and to map ligand interaction sites. Cells were selected for binding to the CXCR4 physiological ligand CXCL12.",
        "method_text": "Data obtained from selecting cells for CXCL12 binding was analysed using Enrich (version unspecified). Log~2~ enrichment ratios were calculated and normalised by subtracting the frequency of the WT sequence. Log~2~ enrichment ratios for two replicates were averaged to obtain variant scores. Note that the scores here were not reported in the manuscript tables, but were calculated from the replicate enrichment ratios that were reported.",
        "short_description": "Deep mutational scan selecting for CXCR4 binding to CXCL12 in Expi293F cells.",
        "title": "CXCR4 CXCL12 binding",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29678950",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29678950",
                "dbversion": null,
                "dbname": "PubMed"
            },
            {
                "identifier": "23827138",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/23827138",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-6681-7994"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CXCR4",
            "reference_sequence": {
                "sequence": "GAGGGGATCAGTATATACACTTCAGATAACTACACCGAGGAAATGGGCTCAGGGGACTATGACTCCATGAAGGAACCCTGTTTCCGTGAAGAAAATGCTAATTTCAATAAAATCTTCCTGCCCACCATCTACTCCATCATCTTCTTAACTGGCATTGTGGGCAATGGATTGGTCATCCTGGTCATGGGTTACCAGAAGAAACTGAGAAGCATGACGGACAAGTACAGGCTGCACCTGTCAGTGGCCGACCTCCTCTTTGTCATCACGCTTCCCTTCTGGGCAGTTGATGCCGTGGCAAACTGGTACTTTGGGAACTTCCTATGCAAGGCAGTCCATGTCATCTACACAGTCAACCTCTACAGCAGTGTCCTCATCCTGGCCTTCATCAGTCTGGACCGCTACCTGGCCATCGTCCACGCCACCAACAGTCAGAGGCCAAGGAAGCTGTTGGCTGAAAAGGTGGTCTATGTTGGCGTCTGGATCCCTGCCCTCCTGCTGACTATTCCCGACTTCATCTTTGCCAACGTCAGTGAGGCAGATGACAGATATATCTGTGACCGCTTCTACCCCAATGACTTGTGGGTGGTTGTGTTCCAGTTTCAGCACATCATGGTTGGCCTTATCCTGCCTGGTATTGTCATCCTGTCCTGCTATTGCATTATCATCTCCAAGCTGTCACACTCCAAGGGCCACCAGAAGCGCAAGGCCCTCAAGACCACAGTCATCCTCATCCTGGCTTTCTTCGCCTGTTGGCTGCCTTACTACATTGGGATCAGCATCGACTCCTTCATCCTCCTGGAAATCATCAAGCAAGGGTGTGAGTTTGAGAACACTGTGCACAAGTGGATTTCCATCACCGAGGCCCTAGCTTTCTTCCACTGTTGTCTGAACCCCATCCTCTATGCTTTCCTTGGAGCCAAATTTAAAACCTCTGCCCAGCACGCACTCACCTCTGTGAGCAGAGGGTCCAGCCTCAAGATCCTCTCCAAAGGAAAGCGAGGTGGACATTCATCTGTTTCCACTGAGTCTGAGTCTTCAAGTTTTCACTCCAGC",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 1,
                "identifier": "P61073",
                "url": "http://purl.uniprot.org/uniprot/P61073",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000048-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "rep1",
            "rep2"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000048-b-1",
        "variant_count": 7021,
        "experiment": "urn:mavedb:00000048-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000031-c-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "5",
            "end": 1295247,
            "start": 1294989,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of TERT promoter in glioblastoma SF7996 (GBM) cells, siRNA knockdown of GABPA.",
        "title": "Saturation mutagenesis MPRA of TERT promoter, GBM siRNA knockdown",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TERT promoter",
            "reference_sequence": {
                "sequence": "TCGCGGGGGTGGCCGGGGCCAGGGCTTCCCACGTGCGCAGCAGGACGCAGCGCTGCCTGAAACTCGCGCCGCGAGGAGAGGGCGGGGCCGCGGAAAGGAAGGGGAGGGGCTGGGAGGGCCCGGAGGGGGCTGGGCCGGGGACCCGGGAGGGGTCGGGACGGGGCGGGGTCCGCGCGGAGGAGGCGGAGCTGGAAGGTGAAGGGGCAGGACGGGTGCCCGGGTCCCCAGTCCCTCCGCCACGTGGGAAGCGCGGTCCTGG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000031-c-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000031-c-1",
        "variant_count": 974,
        "experiment": "urn:mavedb:00000031-c",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000016-a-1",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "9",
            "end": 97853854,
            "start": 97853255,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://doi.org/10.1038/s41467-019-11526-w>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of FOXE1 promoter in HeLa cells.",
        "title": "Saturation mutagenesis MPRA of FOXE1 promoter",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "FOXE1 promoter",
            "reference_sequence": {
                "sequence": "CTCGCCAGCGGTCCGCAGGGCTGGAGACCCACGCCGTGGAGAGGACCAGCCTCAGGTCGCCCCGCCTGGGCCCGCGCCCCGACCTCGCTGCCCCCGCCTCGCCTCTCTGCCCGTGGCGCTTACGGCCACCTTGGCCTCGGGGGCAGGGCATGGGCGGCCCCCGCCAGATCGCCCAGCGCCAGTACTAACTGCCCTCGCTCTGGCCTTCGAGCCCGAAGCCTCTTCTGCGCGCACAACCTAGGCAGTAATCCTAAACTAGCGGGCACCACAGACCAGCTGCAGCCACCCCAACCCAGGGATCACTTCCGGACCCCTCGACCGCCCGGCACCAGCGCGCAAGGGACCCTTCAGCCGGAGACCAGAGTCCAGTCCCGGTCACGAGGCCACCGCCGCTGCCCGCCTCGAGAAGCACCACGCGGGCTGAGCCGTCGGCTAGCGGGTCACTCCCGAGCCTCTGTCTGCACCGCGCCAGCCCCAGACCACGGACGCTGAGCCTCCAGCGCGTGCCAGCCTGGGCCGCTGGGCTCTCGGGGCCAGCCCGCGACGATCCCCTGAGCTCTCCGCAGAAGGGCCGAGCGTCCGTTCCGGGGACGCCAGGCC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000016-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000016-a-1",
        "variant_count": 2048,
        "experiment": "urn:mavedb:00000016-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2020-08-23",
        "modification_date": "2020-12-10",
        "urn": "urn:mavedb:00000051-b-1",
        "publish_date": "2020-12-10",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study mutated the residues on ErbB2 helix region of the bacterial inner membrane. The experiment links the insertion and self-association property of the tested region to the survive of bacteria and studied the membrane-protein energetics landscape of missense mutations of the target.",
        "method_text": "The helix region of ErbB2 is used as the membrane-spanning segment for dsT$\\beta$L in this experiment. The frequency of the count of each mutant relative to wild-type in the selected and reference pools was computed. Variants with <100 counts in the reference population were removed. The selection coefficients were calculated as the ratio of variant relative frequency in the selected and reference pool. The selection coeffcients were then transformed to apparent changes in free energy due to each single-point substitution through the Gibbs free-energy equation: \r\n\r\n$$ \\Delta\\Delta G^{app} = -RT\\ln(s) $$\r\n\r\nwhere R is the gas constant and T is the absolute temperature (310K). The count data table includes the count of each variant in the reference and selected pool.",
        "short_description": "A deep mutational scanning experiment targeting ErbB2 helix region on the bacterial inner membrane.",
        "title": "Helix region of ErbB2",
        "keywords": [],
        "doi_ids": [
            {
                "identifier": "10.7554/eLife.12125",
                "url": "https://doi.org/10.7554/eLife.12125",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "26824389",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/26824389",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ErbB2",
            "reference_sequence": {
                "sequence": "CTGACGTCTATCATCTCTGCGGTGGTTGGCATTCTGCTGGTCGTGGTCTTGGGCGTGGTCTTTGGCATCCTG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 650,
                "identifier": "P04626",
                "url": "http://purl.uniprot.org/uniprot/P04626",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000051-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "frequency_ref",
            "frequency_sel",
            "ratio"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "count_reference",
            "count_selected"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000051-b-1",
        "variant_count": 480,
        "experiment": "urn:mavedb:00000051-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000029-b-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "1",
            "end": 109275251,
            "start": 109274651,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of SORT1 enhancer (reversed orientation) in HepG2 cells.",
        "title": "Saturation mutagenesis MPRA of SORT1 enhancer, flipped",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SORT1 enhancer (flipped)",
            "reference_sequence": {
                "sequence": "TGAACTGGAAAAGCCCTGTCCGGTGAGGGGGCAGAAGGACTCAGCGCCCCTGGACCCCCAAATGCTGCATGAACACATTTTCAGGGGAGCCTGTGCCCCCAGGCGGGGGTCGGGCAGCCCCAGCCCCTCTCCTTTTCCTGGACTCTGGCCGTGCGCGGCAGCCCAGGTGTTTGCTCAGTTGCTGACCCAAAAGTGCTTCATTTTTCGTGCCCGCCCCGCGCCCCGGGCAGGCCAGTCATGTGTTAAGTTGCGCTTCTTTGCTGTGATGTGGGTGGGGGAGGAAGAGTAAACACAGTGCTGGCTCGGCTGCCCTGAGGGTGCTCAATCAAGCACAGGTTTCAAGTCTGGGTTCTGGTGTCCACTCACCCACCCCACCCCCCAAAATCAGACAAATGCTACTTTGTCTAACCTGCTGTGGCCTCTGAGACATGTTCTATTTTTAACCCCTTCTTGGAATTGGCTCTCTTCTTCAAAGGACCAGGTCCTGTTCCTCTTTCTCCCCGACTCCACCCCAGCTCCCTGTGAAGAGAGAGTTAATATATTTGTTTTATTTATTTGCTTTTTGTGTTGGGATGGGTTCGTGTCCAGTCCCGGGGGTCTG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000029-b-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000029-b-1",
        "variant_count": 1974,
        "experiment": "urn:mavedb:00000029-b",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000040-a-1",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effect of variants in yeast HSP90 under different combinations of temperature (30C or 36C) and presence/absence of salt (0.5 M NaCl). The results explore the adaptive potential of this essential gene.",
        "method_text": "Sequencing reads were filtered based on a minimum Phred quality score of 20 across all 36 bases. For each time point, the log2 ratio of each variant's count to the wild type count was calculated. The score of each variant was calculated as the slope of these log ratios to time in wild type generations. Scores of -0.5 are considered null-like.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae at 30C without addition of NaCl.",
        "title": "Deep mutational scan of HSP90, 30C no salt",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24299404",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24299404",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000040-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000040-a-1",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000040-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-27",
        "modification_date": "2020-11-20",
        "urn": "urn:mavedb:00000049-a-5",
        "publish_date": "2020-11-20",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "",
        "short_description": "A deep mutational scan of human MTHFR via functional complementation in yeast at 12ug/ml folate in A222V background",
        "title": "MTHFR at 12ug/ml folate in A222V background",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390",
            "0000-0002-9219-4310",
            "0000-0002-2550-2141",
            "0000-0001-6465-5776"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "MTHFR",
            "reference_sequence": {
                "sequence": "ATGGTGAACGAAGCCAGAGGAAACAGCAGCCTCAACCCCTGCTTGGAGGGCAGTGCCAGCAGTGGCAGTGAGAGCTCCAAAGATAGTTCGAGATGTTCCACCCCGGGCCTGGACCCCGAGCGGCATGAGAGACTCCGGGAGAAGATGAGGCGGCGATTGGAATCTGGTGACAAGTGGTTCTCCCTGGAATTCTTCCCTCCTCGAACTGCTGAGGGAGCTGTCAATCTCATCTCAAGGTTTGACCGGATGGCAGCAGGTGGCCCCCTCTACATAGACGTGACCTGGCACCCAGCAGGTGACCCTGGCTCAGACAAGGAGACCTCCTCCATGATGATCGCCAGCACCGCCGTGAACTACTGTGGCCTGGAGACCATCCTGCACATGACCTGCTGCCGTCAGCGCCTGGAGGAGATCACGGGCCATCTGCACAAAGCTAAGCAGCTGGGCCTGAAGAACATCATGGCGCTGCGGGGAGACCCAATAGGTGACCAGTGGGAAGAGGAGGAGGGAGGCTTCAACTACGCAGTGGACCTGGTGAAGCACATCCGAAGTGAGTTTGGTGACTACTTTGACATCTGTGTGGCAGGTTACCCCAAAGGCCACCCCGAAGCAGGGAGCTTTGAGGCTGACCTGAAGCACTTGAAGGAGAAGGTGTCTGCGGGAGCCGATTTCATCATCACGCAGCTTTTCTTTGAGGCTGACACATTCTTCCGCTTTGTGAAGGCATGCACCGACATGGGCATCACTTGCCCCATCGTCCCCGGGATCTTTCCCATCCAGGGCTACCACTCCCTTCGGCAGCTTGTGAAGCTGTCCAAGCTGGAGGTGCCACAGGAGATCAAGGACGTGATTGAGCCAATCAAAGACAACGATGCTGCCATCCGCAACTATGGCATCGAGCTGGCCGTGAGCCTGTGCCAGGAGCTTCTGGCCAGTGGCTTGGTGCCAGGCCTCCACTTCTACACCCTCAACCGCGAGATGGCTACCACAGAGGTGCTGAAGCGCCTGGGGATGTGGACTGAGGACCCCAGGCGTCCCCTACCCTGGGCTCTCAGCGCCCACCCCAAGCGCCGAGAGGAAGATGTACGTCCCATCTTCTGGGCCTCCAGACCAAAGAGTTACATCTACCGTACCCAGGAGTGGGACGAGTTCCCTAACGGCCGCTGGGGCAATTCCTCTTCCCCTGCCTTTGGGGAGCTGAAGGACTACTACCTCTTCTACCTGAAGAGCAAGTCCCCCAAGGAGGAGCTGCTGAAGATGTGGGGGGAGGAGCTGACCAGTGAAGAAAGTGTCTTTGAAGTCTTCGTTCTTTACCTCTCGGGAGAACCAAACCGGAATGGTCACAAAGTGACTTGCCTGCCCTGGAACGATGAGCCCCTGGCGGCTGAGACCAGCCTGCTGAAGGAGGAGCTGCTGCGGGTGAACCGCCAGGGCATCCTCACCATCAACTCACAGCCCAACATCAACGGGAAGCCGTCCTCCGACCCCATCGTGGGCTGGGGCCCCAGCGGGGGCTATGTCTTCCAGAAGGCCTACTTAGAGTTTTTCACTTCCCGCGAGACAGCGGAAGCACTTCTGCAAGTGCTGAAGAAGTACGAGCTCCGGGTTAATTACCACCTTGTCAATGTGAAGGGTGAAAACATCACCAATGCCCCTGAACTGCAGCCGAATGCTGTCACTTGGGGCATCTTCCCTGGGCGAGAGATCATCCAGCCCACCGTAGTGGATCCCGTCAGCTTCATGTTCTGGAAGGACGAGGCCTTTGCCCTGTGGATTGAGCGGTGGGGAAAGCTGTATGAGGAGGAGTCCCCGTCCCGCACCATCATCCAGTACATCCACGACAACTACTTCCTGGTCAACCTGGTGGACAATGACTTCCCACTGGACAACTGCCTCTGGCAGGTGGTGGAAGACACATTGGAGCTTCTCAACAGGCCCACCCAGAATGCGAGAGAAACGGAGGCTCCATGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P42898",
                "url": "http://purl.uniprot.org/uniprot/P42898",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000177000",
                "url": "http://www.ensembl.org/id/ENSG00000177000",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 230,
                "identifier": "NM_005957",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_005957",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000049-a-5",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se",
            "exp.score",
            "exp.se",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000049-a-5",
        "variant_count": 13690,
        "experiment": "urn:mavedb:00000049-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-11-09",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000005-a-1",
        "publish_date": "2018-12-04",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee [Sun et al 2018](https://www.biorxiv.org/content/early/2018/11/19/473983)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [tileseq_package](https://bitbucket.org/rothlabto/tileseq_package) and [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Random-Forest-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee [Sun et al 2018](https://www.biorxiv.org/content/early/2018/11/19/473983) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at low levels of Vitamin B6.",
        "title": "CBS low-B6 imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": "urn:mavedb:00000005-a-4",
        "current_version": "urn:mavedb:00000005-a-4",
        "variant_count": 11263,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-08-07",
        "modification_date": "2019-08-09",
        "urn": "urn:mavedb:00000040-a-3",
        "publish_date": "2019-08-07",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This study measured the effect of variants in yeast HSP90 under different combinations of temperature (30C or 36C) and presence/absence of salt (0.5 M NaCl). The results explore the adaptive potential of this essential gene.",
        "method_text": "Sequencing reads were filtered based on a minimum Phred quality score of 20 across all 36 bases. For each time point, the log2 ratio of each variant's count to the wild type count was calculated. The score of each variant was calculated as the slope of these log ratios to time in wild type generations. Scores of -0.5 are considered null-like.",
        "short_description": "Deep mutational scan of all single mutants in a nine-amino acid region of Hsp90 (Hsp82) in Saccharomyces cerevisiae at 36C without addition of NaCl.",
        "title": "Deep mutational scan of HSP90, 36C no salt",
        "keywords": [
            {
                "text": "NNN mutagenesis"
            },
            {
                "text": "EMPIRIC"
            },
            {
                "text": "growth assay"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "24299404",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/24299404",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "HSP90",
            "reference_sequence": {
                "sequence": "CAATTTGGTTGGTCTGCTAATATGGAA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 581,
                "identifier": "P02829",
                "url": "http://purl.uniprot.org/uniprot/P02829",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "sacCer3/R64",
                        "organism_name": "Saccharomyces cerevisiae",
                        "assembly_identifier": {
                            "identifier": "GCF_000146045.2",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000146045.2",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000040-a-3",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000040-a-3",
        "variant_count": 189,
        "experiment": "urn:mavedb:00000040-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2018-06-29",
        "modification_date": "2019-08-08",
        "urn": "urn:mavedb:00000001-b-1",
        "publish_date": "2018-06-29",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Although we now routinely sequence human genomes, we can confidently identify only a fraction of the sequence variants that have a functional impact. Here, we developed a deep mutational scanning framework that produces exhaustive maps for human missense variants by combining random codon mutagenesis and multiplexed functional variation assays with computational imputation and refinement. We applied this framework to four proteins corresponding to six human genes: UBE2I (encoding SUMO E2 conjugase), SUMO1 (small ubiquitin-like modifier), TPK1 (thiamin pyrophosphokinase), and CALM1/2/3 (three genes encoding the protein calmodulin). The resulting maps recapitulate known protein features and confidently identify pathogenic variation. Assays potentially amenable to deep mutational scanning are already available for 57% of human disease genes, suggesting that DMS could ultimately map functional variation for all human disease genes. \r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957)",
        "method_text": "##Scoring procedure:\r\nDMS-TileSeq reads were processed using the [dmsPipeline](https://bitbucket.org/rothlabto/dmspipeline) software. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). log ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Random-Forest-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee [**Weile *et al.* 2017**](http://msb.embopress.org/content/13/12/957) for more details.\r\n\r\n## Additional columns:\r\n* exp.score = experimental score from the joint DMS-BarSeq/DMS-TileSeq screens\r\n* exp.sd = standard deviation of the experimental score\r\n* df = degrees of freedom (number of replicates contributing to the experimental score)\r\n* pred.score = machine-learning predicted score",
        "short_description": "A machine-learning imputed and refined Deep Mutational Scan of the human SUMO1 using functional complementation in yeast.",
        "title": "SUMO1 imputed and refined",
        "keywords": [
            {
                "text": "sumoylation"
            },
            {
                "text": "imputation"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "29269382",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/29269382",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "SUMO1",
            "reference_sequence": {
                "sequence": "ATGTCTGACCAGGAGGCAAAACCTTCAACTGAGGACTTGGGGGATAAGAAGGAAGGTGAATATATTAAACTCAAAGTCATTGGACAGGATAGCAGTGAGATTCACTTCAAAGTGAAAATGACAACACATCTCAAGAAACTCAAAGAATCATACTGTCAAAGACAGGGTGTTCCAATGAATTCACTCAGGTTTCTCTTTGAGGGTCAGAGAATTGCTGATAATCATACTCCAAAAGAACTGGGAATGGAGGAAGAAGATGTGATTGAAGTTTATCAGGAACAAACGGGGGGTCATTCAACAGTTTAG",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P63165",
                "url": "http://purl.uniprot.org/uniprot/P63165",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000116030",
                "url": "http://www.ensembl.org/id/ENSG00000116030",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": {
                "offset": 149,
                "identifier": "NM_001005781.1",
                "url": "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?val=NM_001005781.1",
                "dbversion": null,
                "dbname": "RefSeq"
            },
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000001-b-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se",
            "exp.score",
            "exp.sd",
            "df",
            "pred.score"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000001-b-1",
        "variant_count": 2020,
        "experiment": "urn:mavedb:00000001-b",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-19",
        "modification_date": "2020-06-19",
        "urn": "urn:mavedb:00000044-a-2",
        "publish_date": "2020-06-19",
        "created_by": "0000-0001-6713-6904",
        "modified_by": "0000-0001-6713-6904",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "Full methods provided in the [preprint](https://www.biorxiv.org/content/10.1101/2020.06.17.157982v1) and full analysis provided in the [GitHub repo](https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS).\r\n\r\n`score` column is the change in log(MFI) relative to the average wildtype expression, polarized such that a positive score indicates improved expression and negative indicates reduced expression. The `library` column indicates which of our two duplicate mutant libraries a measurement is from.",
        "short_description": "RBD genotype and binding score for each unique barcoded variant in the mutant library",
        "title": "per-barcode binding score",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-6713-6904",
            "0000-0001-9278-3644",
            "0000-0003-1267-3408"
        ],
        "licence": {
            "long_name": "Other - See Data Usage Guidelines",
            "short_name": "Other - See Data Usage Guidelines",
            "link": "",
            "version": "1.0"
        },
        "target": {
            "name": "SARS-CoV-2 receptor binding domain",
            "reference_sequence": {
                "sequence": "AATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000044-a-2",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "avg_count",
            "library"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000044-a-2",
        "variant_count": 195081,
        "experiment": "urn:mavedb:00000044-a",
        "is_meta_analysis": false,
        "data_usage_policy": "See licence from GitHub: https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS/blob/master/LICENSE.md\r\n\r\nBSD 3-Clause License\r\n\r\nCopyright (c) 2020, Tyler N. Starr, Allison J. Greaney, and Jesse D. Bloom\r\nAll rights reserved.\r\n\r\nRedistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n1. Redistributions of source code must retain the above copyright notice, this\r\n   list of conditions and the following disclaimer.\r\n\r\n2. Redistributions in binary form must reproduce the above copyright notice,\r\n   this list of conditions and the following disclaimer in the documentation\r\n   and/or other materials provided with the distribution.\r\n\r\n3. Neither the name of the copyright holder nor the names of its\r\n   contributors may be used to endorse or promote products derived from\r\n   this software without specific prior written permission."
    },
    {
        "creation_date": "2018-07-10",
        "modification_date": "2019-07-28",
        "urn": "urn:mavedb:00000003-a-1",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "These experiments measured the functional consequences of mutations in the BRCA1 RING domain, where most clinically-relevant single nucleotide variants reside. One goal of the study was to create a \"look-up table\" of single nucleotide variants for clinical use, by prospectively measuring the impact of all possible variants that are likely to appear in patients. The study combines two different assays - one based on E3 ubiquitin ligase activity and one based on BRCA1-BARD1 heterodimer formation - and makes one of the first attempts to combine data from different MAVEs on the same target.\r\n\r\nThis entry contains scores from the phage autoubiquitination assay, which tested the E3 ubiquitin ligase activity of BRCA1 variants.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Replicate scores were combined using the Enrich2 random-effects model. Counts for each variant were calculated as the sum of counts for all barcodes associated with that variant.\r\n\r\nThe scores and standard errors calculated for each of replicate appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected). Time points are given in rounds.",
        "short_description": "Nucleotide variant scores for deep mutational scan of the BRCA1 RING domain using autoubiquitination calculated by Enrich2.",
        "title": "Enrich2 nucleotide variant scores for BRCA1 E3",
        "keywords": [
            {
                "text": "Phage display"
            },
            {
                "text": "ubiquitin"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "BRCA1 RING domain",
            "reference_sequence": {
                "sequence": "GATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAAAATCTTAGAGTGTCCCATCTGCCTGGAGTTGATCAAGGAACCTGTCTCCACAAAGTGTGACCACATATTTTGCAAATTTTGCATGCTGAAACTTCTCAACCAGAAGAAAGGGCCTTCACAGTGTCCTTTATGTAAGAATGATATAACCAAAAGGAGCCTACAAGAAAGTACGAGATTTAGTCAACTTGTTGAAGAGCTATTGAAAATCATTTGTGCTTTTCAGCTTGACACAGGTTTGGAGTATGCAAACAGCTATAATTTTGCAAAAAAGGAAAATAACTCTCCTGAACATCTAAAAGATGAAGTTTCTATCATCCAAAGTATGGGCTACAGAAACCGTGCCAAAAGACTTCTACAGAGTGAACCCGAAAATCCTTCCTTGCAGGAAACCAGTCTCAGTGTCCAACTCTCTAACCTTGGAACTGTGAGAACTCTGAGGACAAAGCAGCGGATACAACCTCAAAGGACGTCTGTCTACATTGAATTGGGATCTGATTCTTCTGAAGATACCGTTAATAAGGCAACTTATTGCAGTGTGGGAGATCAAGAATTGTTACAAATCACCCCTCAAGGAACCAGGGATGAAATCAGTTTGGATTCTGCAAAAAAGGCTGCTTGTGAATTTTCTGAGACGGATGTAACAAATACTGAACATCATCAACCCAGTAATAATGATTTGAACACCACTGAGAAGCGTGCAGCTGAGAGGCATCCAGAAAAGTATCAGGGTAGTTCTGTTTCAAACTTGCATGTGGAGCCATGTGGCACAAATACTCATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAGTTC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000003-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_PlusE2NewRep3",
            "score_PlusE2NewRep3",
            "SE_PlusE2NewRep4",
            "score_PlusE2NewRep4",
            "SE_PlusE2NewRep5",
            "score_PlusE2NewRep5",
            "SE_PlusE2Rep3",
            "score_PlusE2Rep3",
            "SE_PlusE2Rep4",
            "score_PlusE2Rep4",
            "SE_PlusE2Rep5",
            "score_PlusE2Rep5"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "PlusE2NewRep3_c_0",
            "PlusE2NewRep3_c_1",
            "PlusE2NewRep3_c_2",
            "PlusE2NewRep3_c_3",
            "PlusE2NewRep3_c_4",
            "PlusE2NewRep3_c_5",
            "PlusE2NewRep4_c_0",
            "PlusE2NewRep4_c_1",
            "PlusE2NewRep4_c_2",
            "PlusE2NewRep4_c_3",
            "PlusE2NewRep4_c_4",
            "PlusE2NewRep4_c_5",
            "PlusE2NewRep5_c_0",
            "PlusE2NewRep5_c_1",
            "PlusE2NewRep5_c_2",
            "PlusE2NewRep5_c_3",
            "PlusE2NewRep5_c_4",
            "PlusE2NewRep5_c_5",
            "PlusE2Rep3_c_0",
            "PlusE2Rep3_c_1",
            "PlusE2Rep3_c_2",
            "PlusE2Rep3_c_3",
            "PlusE2Rep3_c_4",
            "PlusE2Rep3_c_5",
            "PlusE2Rep4_c_0",
            "PlusE2Rep4_c_1",
            "PlusE2Rep4_c_2",
            "PlusE2Rep4_c_3",
            "PlusE2Rep4_c_4",
            "PlusE2Rep4_c_5",
            "PlusE2Rep5_c_0",
            "PlusE2Rep5_c_1",
            "PlusE2Rep5_c_2",
            "PlusE2Rep5_c_3",
            "PlusE2Rep5_c_4",
            "PlusE2Rep5_c_5",
            "Y2H_1_Rep1_c_0",
            "Y2H_1_Rep1_c_18",
            "Y2H_1_Rep1_c_37",
            "Y2H_1_Rep1_c_45",
            "Y2H_1_Rep2_c_0",
            "Y2H_1_Rep2_c_18",
            "Y2H_1_Rep2_c_37",
            "Y2H_1_Rep2_c_45",
            "Y2H_1_Rep3_c_0",
            "Y2H_1_Rep3_c_18",
            "Y2H_1_Rep3_c_37",
            "Y2H_1_Rep3_c_45",
            "Y2H_2_Rep1_c_0",
            "Y2H_2_Rep1_c_16",
            "Y2H_2_Rep1_c_41",
            "Y2H_2_Rep1_c_64",
            "Y2H_2_Rep2_c_0",
            "Y2H_2_Rep2_c_16",
            "Y2H_2_Rep2_c_41",
            "Y2H_2_Rep2_c_64",
            "Y2H_2_Rep3_c_0",
            "Y2H_2_Rep3_c_16",
            "Y2H_2_Rep3_c_41",
            "Y2H_2_Rep3_c_64"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000003-a-1",
        "variant_count": 20724,
        "experiment": "urn:mavedb:00000003-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-06-19",
        "modification_date": "2020-06-19",
        "urn": "urn:mavedb:00000044-a-1",
        "publish_date": "2020-06-19",
        "created_by": "0000-0001-6713-6904",
        "modified_by": "0000-0001-6713-6904",
        "extra_metadata": {},
        "abstract_text": "",
        "method_text": "Full methods provided in the [preprint](https://www.biorxiv.org/content/10.1101/2020.06.17.157982v1) and full analysis provided in the [GitHub repo](https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS).\r\n\r\n`score` column is the change in log<sub>10</sub>(_K_<sub>D,app</sub>) relative to the average wildtype binding, polarized such that a positive score indicates improved binding, and negative indicates reduced binding affinity. The `library` column indicates which of our two duplicate mutant libraries a measurement is from. The `average` column is the average mutation effect from the duplicate libraries. The values in the `average` column were used in the paper analysis.",
        "short_description": "RBD mutation and binding score for each amino acid mutation",
        "title": "per-single-mutant binding score",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0001-6713-6904",
            "0000-0001-9278-3644",
            "0000-0003-1267-3408"
        ],
        "licence": {
            "long_name": "Other - See Data Usage Guidelines",
            "short_name": "Other - See Data Usage Guidelines",
            "link": "",
            "version": "1.0"
        },
        "target": {
            "name": "SARS-CoV-2 receptor binding domain",
            "reference_sequence": {
                "sequence": "AATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "Other",
                        "organism_name": "Other - genome not listed",
                        "assembly_identifier": null
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000044-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "average",
            "library"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000044-a-1",
        "variant_count": 8040,
        "experiment": "urn:mavedb:00000044-a",
        "is_meta_analysis": false,
        "data_usage_policy": "See licence from GitHub: https://github.com/jbloomlab/SARS-CoV-2-RBD_DMS/blob/master/LICENSE.md\r\n\r\nBSD 3-Clause License\r\n\r\nCopyright (c) 2020, Tyler N. Starr, Allison J. Greaney, and Jesse D. Bloom\r\nAll rights reserved.\r\n\r\nRedistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n1. Redistributions of source code must retain the above copyright notice, this\r\n   list of conditions and the following disclaimer.\r\n\r\n2. Redistributions in binary form must reproduce the above copyright notice,\r\n   this list of conditions and the following disclaimer in the documentation\r\n   and/or other materials provided with the distribution.\r\n\r\n3. Neither the name of the copyright holder nor the names of its\r\n   contributors may be used to endorse or promote products derived from\r\n   this software without specific prior written permission."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000030-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "10",
            "end": 112998839,
            "start": 112998240,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of TCF7L2 enhancer in MIN6 cells.",
        "title": "Saturation mutagenesis MPRA of TCF7L2 enhancer",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "TCF7L2 enhancer",
            "reference_sequence": {
                "sequence": "AGGTTCTGTTTCTTGCTTAGTCACTTTCTGTTTGAACAAAATTGGAATTTCCTTTTTGGATCTGTTTCTTTAATTGTAAATTGAATCGGACTAAAACCTTTCCAATTTTTTCACATGTGAAGACATACACAAAAGTTTTATTGGAGGGTTGCACATGTGAAAGAAAAAGGGAGAAAGCAGGATTGAGCAGGGGGAGCCGTCAGATGGTAATGCAGATGTGATGAGATCTCTGCCGGACCAAAGAGAAGATTCCTTTTTAAATGGTGACAAATTCATGGGCTTTCTCTGCCTCAAAACCTAGCACAGCTGTTATTTACTGAACAATTAGAGAGCTAAGCACTTTTTAGATACTATATAATTTAATTGCCGTATGAGGCACCCTTAGTTTTCAGACGAGAAACCACAGTTACAGGGAAGGCAAGTAACTTAGTCAATGTCAGATAACTAGGAAAAGGTTAGAGGGGCCCTGGACACAGGCCTGTGTGACTGAGAAGCTTGGGCACTTCACTGCTACATTTCATCTCTTCGCTATAAACATTTTAGCTTTTTGTGTTTGCTGACTGGCAACAATACATAGTGAAAGTTCTAATAATTTGTAAT",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000030-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000030-a-1",
        "variant_count": 1910,
        "experiment": "urn:mavedb:00000030-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-10-04",
        "modification_date": "2019-10-22",
        "urn": "urn:mavedb:00000005-a-4",
        "publish_date": "2019-10-22",
        "created_by": "0000-0003-1628-9390",
        "modified_by": "0000-0003-1628-9390",
        "extra_metadata": {},
        "abstract_text": "Success in precision medicine depends on our ability to determine which rare human genetic variants have functional effects. Classical homocystinuria - characterized by elevated homocyst(e)ine in plasma and urine - is caused by primarily-rare variants in the cystathionine beta-synthase (CBS) gene. About half of patients respond to vitamin B6 therapy. With early detection in newborns, existing therapies are highly effective. Functional CBS variants, especially those that respond to vitamin B6, can be detected based on their ability to restore growth in yeast cells lacking CYS4 (the yeast ortholog of CBS). This assay has previously been carried out only reactively after first observation of a variant in patients. Here we describe a proactive comprehensive missense variant effect map for human CBS. Together, saturation codon-replacement mutagenesis, en masse growth selection at different vitamin B6 levels, and sequencing yielded a look-up table for CBS missense variant function and vitamin B6-remediability in yeast. The CBS variant effect map identified disease variants and predicted both disease severity (r = 0.82) and human clinical response to vitamin B6 (r = 0.89). Thus, highly-multiplexed cell-based assays can yield proactive maps of variant function and patient response to therapy, even for rare variants not previously seen in the clinic.\r\n\r\nSee Sun et al 2018",
        "method_text": "Scoring procedure:\r\n\r\nDMS-TileSeq reads were processed using the tileseq_package and tilsesqMave softwares. Briefly, TileSeq read counts were used to establish relative allele frequencies in each condition. Non-mutagenized control counts were subtracted from counts (as estimates of sequencing error). Log-ratios of selection over non-selection counts were calculated. The resulting TileSeq fitness values were then normalized to 0-1 scale where 0 corresponds to the median nonsense score and 1 corresponds to the median synonymous score. Gradient boosted tree-based machine learning was used to impute missing values and refine low-confidence measurements, based on intrinsic, structural, and biochemical features.\r\n\r\nSee Sun et al 2018 for more details.",
        "short_description": "A Deep Mutational Scan of the human cystathionine-beta-synthase (CBS) using functional complementation in yeast via DMS-TileSeq at low levels of Vitamin B6.",
        "title": "CBS low-B6 imputed and refined",
        "keywords": [
            {
                "text": "imputation"
            },
            {
                "text": "Vitamin B6"
            },
            {
                "text": "homocystinuria"
            },
            {
                "text": "DMS-TileSeq"
            },
            {
                "text": "complementation"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [],
        "contributors": [
            "0000-0003-1628-9390"
        ],
        "licence": {
            "long_name": "CC BY 4.0 (Attribution)",
            "short_name": "CC BY 4.0",
            "link": "https://creativecommons.org/licenses/by/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "CBS",
            "reference_sequence": {
                "sequence": "ATGCCTTCTGAGACCCCCCAGGCAGAAGTGGGGCCCACAGGCTGCCCCCACCGCTCAGGGCCACACTCGGCGAAGGGGAGCCTGGAGAAGGGGTCCCCAGAGGATAAGGAAGCCAAGGAGCCCCTGTGGATCCGGCCCGATGCTCCGAGCAGGTGCACCTGGCAGCTGGGCCGGCCTGCCTCCGAGTCCCCACATCACCACACTGCCCCGGCAAAATCTCCAAAAATCTTGCCAGATATTCTGAAGAAAATCGGGGACACCCCTATGGTCAGAATCAACAAGATTGGGAAGAAGTTCGGCCTGAAGTGTGAGCTCTTGGCCAAGTGTGAGTTCTTCAACGCGGGCGGGAGCGTGAAGGACCGCATCAGCCTGCGGATGATTGAGGATGCTGAGCGCGACGGGACGCTGAAGCCCGGGGACACGATTATCGAGCCGACATCCGGGAACACCGGGATCGGGCTGGCCCTGGCTGCGGCAGTGAGGGGCTATCGCTGCATCATCGTGATGCCAGAGAAGATGAGCTCCGAGAAGGTGGACGTGCTGCGGGCACTGGGGGCTGAGATTGTGAGGACGCCCACCAATGCCAGGTTCGACTCCCCGGAGTCACACGTGGGGGTGGCCTGGCGGCTGAAGAACGAAATCCCCAATTCTCACATCCTAGACCAGTACCGCAACGCCAGCAACCCCCTGGCTCACTACGACACCACCGCTGATGAGATCCTGCAGCAGTGTGATGGGAAGCTGGACATGCTGGTGGCTTCAGTGGGCACGGGCGGCACCATCACGGGCATTGCCAGGAAGCTGAAGGAGAAGTGTCCTGGATGCAGGATCATTGGGGTGGATCCCGAAGGGTCCATCCTCGCAGAGCCGGAGGAGCTGAACCAGACGGAGCAGACAACCTACGAGGTGGAAGGGATCGGCTACGACTTCATCCCCACGGTGCTGGACAGGACGGTGGTGGACAAGTGGTTCAAGAGCAACGATGAGGAGGCGTTCACCTTTGCCCGCATGCTGATCGCGCAAGAGGGGCTGCTGTGCGGTGGCAGTGCTGGCAGCACGGTGGCGGTGGCCGTGAAGGCCGCGCAGGAGCTGCAGGAGGGCCAGCGCTGCGTGGTCATTCTGCCCGACTCAGTGCGGAACTACATGACCAAGTTCCTGAGCGACAGGTGGATGCTGCAGAAGGGCTTTCTGAAGGAGGAGGACCTCACGGAGAAGAAGCCCTGGTGGTGGCACCTCCGTGTTCAGGAGCTGGGCCTGTCAGCCCCGCTGACCGTGCTCCCGACCATCACCTGTGGGCACACCATCGAGATCCTCCGGGAGAAGGGCTTCGACCAGGCGCCCGTGGTGGATGAGGCGGGGGTAATCCTGGGAATGGTGACGCTTGGGAACATGCTCTCGTCCCTGCTTGCCGGGAAGGTGCAGCCGTCAGACCAAGTTGGCAAAGTCATCTACAAGCAGTTCAAACAGATCCGCCTCACGGACACGCTGGGCAGGCTCTCGCACATCCTGGAGATGGACCACTTCGCCCTGGTGGTGCACGAGCAGATCCAGTACCACAGCACCGGGAAGTCCAGTCAGCGGCAGATGGTGTTCGGGGTGGTCACCGCCATTGACTTGCTGAACTTCGTGGCCGCCCAGGAGCGGGACCAGAAGTGA",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 0,
                "identifier": "P35520",
                "url": "http://purl.uniprot.org/uniprot/P35520",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": {
                "offset": 0,
                "identifier": "ENSG00000160200",
                "url": "http://www.ensembl.org/id/ENSG00000160200",
                "dbversion": null,
                "dbname": "Ensembl"
            },
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000005-a-4",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "sd",
            "se"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": "urn:mavedb:00000005-a-1",
        "next_version": null,
        "current_version": "urn:mavedb:00000005-a-4",
        "variant_count": 11550,
        "experiment": "urn:mavedb:00000005-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-01-24",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000008-a-1",
        "publish_date": "2019-01-24",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {
            "chr": "7",
            "end": 29161744,
            "start": 29161443,
            "reference": "mm9"
        },
        "abstract_text": "This study described the functional consequence of over 100,000 enhancer variants *in vivo* in mouse liver. Two human enhancers (ALDOB, ECR11) and one mouse enhancer (LTV1) were known to be active in mouse liver and therefore variants in these enhancers should show a measurable difference in transcription. The results were broadly consistent with evolutionary data and transcription factor binding sites, but were not always concordant demonstrating the importance of measuring the effect of enhancer variants directly.\r\n\r\nThis MaveDB entry describes the LTV1 enhancer data. Datasets for other enhancers described in the same publication are also available: [ALDOB](https://www.mavedb.org/experiment/urn:mavedb:00000006-a/) [ECR11](https://www.mavedb.org/experiment/urn:mavedb:00000007-a/)",
        "method_text": "Scores were calculated using a trivariate linear regression model. A separate model was built for each position in the enhancer, with a predictor for each possible variant nucleotide at that position.\r\n\r\nBecause most enhancer haplotypes in the LTV1 dataset had multiple tags, the data were normalized by dividing the total number of counts for a given haplotype by the number of tags for that haplotype.\r\n\r\nThe scores presented are therefore a combination of the effects of each individual variant on diverse enhancer haplotype backgrounds.\r\n\r\nSee metadata (available via download button) for wild type genomic coordinates in JSON format.",
        "short_description": "Trivariate regression scores for each nucleotide change as described in Patwardhan et al. 2012.",
        "title": "Trivariate regression scores for LTV1 replicate 1 of 2",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "doped oligo synthesis"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "liver"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "22371081",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/22371081",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LTV1 enhancer",
            "reference_sequence": {
                "sequence": "CTTTGGGTGACCCCTGACCCTGGCCGCCTGGGCTCGCCTTCCCGCACATTCCGTCCTCGCCGCCCCGCCCCACCCCGCCCTCCTTCCTTGGCCCTGTGGGGACGGAAACATCCCGTTCCTGCCCAAGCTGGGTCAAGAGCCGGAGGGACAGGACCAGAGCACCCCTTACGCCAGAACTAGCTCTCCTTGTTCCTACTGGGTGACCTCATCTCGCCACGCCTCCTCAGGTGAACACCCGGGCTGGTAACGTCACTTCCTGCCAGGTAAGCGCCCCCAGGCAGCACTGCTCACGGAAAGGTCTG",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "mm9",
                        "organism_name": "Mus musculus",
                        "assembly_identifier": {
                            "identifier": "GCF_000001635.18",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001635.18",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000008-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "pvalue"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000008-a-1",
        "variant_count": 906,
        "experiment": "urn:mavedb:00000008-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2019-02-20",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000034-a-1",
        "publish_date": "2019-02-20",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "7",
            "end": 156791604,
            "start": 156791119,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of ZRS enhancer in NIH3T3 cells. Co-transfected with Hoxd13.",
        "title": "Saturation mutagenesis MPRA of ZRS enhancer, Hoxd13",
        "keywords": [
            {
                "text": "enhancer"
            },
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "ZRS enhancer",
            "reference_sequence": {
                "sequence": "TGAGATATGGCTTCATTTTCTGTAATAAACACTAAGATCAAAACATGACCCAAGTTAAATTTCCTTGCAGGGTTCCCAGCAGGGGCTTCCCTTTTGTCTGTGATTTCCTCTCACCCACCAGAACCAGGCCAAATATGCGCATGTGCCACTAACACTAAGCAGCACTTCCTTAATCACTCATTTCCAACAATTTATGGATCATCAGTGGCAAAAAACGAGCAAAAATAATGAAAGAATGCAATGAAAGCTCGTGGAGACAGAGGCTGGACTTCCTACTCACTCTGTGTCTCTTTAAGATGGAGGCCTGATACAAATTAGCCACTGGGGGGAAAAAGTCATCTGGTCATAAAATACAGTACAAGGTCACTTTTATGTAAGTTTGCCAAAAGGGACATAAACCAGGACAATTTCAAACTGTGACACAGGATAGAAACATATTAAAAAAATCTTTGTTCCTCCTCTATTGTGCTGTCATGTTGCTCAGCA",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000034-a-1",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000034-a-1",
        "variant_count": 1661,
        "experiment": "urn:mavedb:00000034-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2019-02-19",
        "modification_date": "2019-11-21",
        "urn": "urn:mavedb:00000023-a-2",
        "publish_date": "2019-02-19",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0002-2032-6679",
        "extra_metadata": {
            "chr": "19",
            "end": 11089548,
            "start": 11089231,
            "reference": "hg38"
        },
        "abstract_text": "This study performed saturation mutagenesis on disease-associated enhancer and promoter regions and measured the effect of each mutation using massively parallel reporter assays (MPRA). The data describe potentially pathogenic mutations as well as the density of putative functional bases in each of the regulatory elements.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>",
        "method_text": "The effect of each variant was calculated using a multiple linear regression model based on the DNA counts and RNA counts. Each variant needed to have 10 unique barcode tags to be included in the downstream analyses.\r\n\r\nAs described by <https://www.biorxiv.org/content/10.1101/505362v1>\r\n\r\nMetadata contains the genomic coordinates for the target sequence.\r\n\r\nColumn descriptions:\r\n\r\n- 'score' is the Log2 variant expression effect derived from the fit of the linear model (coefficient).\r\n- 'p-value' is the P-value of the coefficient.\r\n- 'unique_tags' is the number of unique tags associated with the variant.\r\n- 'dna_sequences' is the count of DNA sequences that contain the variant (used for fitting the linear model).\r\n- 'rna_sequences' is the count of RNA sequences that contain the variant (used for fitting the linear model).",
        "short_description": "Saturation mutagenesis MPRA of LDLR promoter in HepG2 cells. Biological replicate 2 of 2.",
        "title": "Saturation mutagenesis MPRA of LDLR promoter, replicate 2",
        "keywords": [
            {
                "text": "barcode sequencing"
            },
            {
                "text": "MPRA"
            },
            {
                "text": "promoter"
            },
            {
                "text": "regression"
            }
        ],
        "doi_ids": [
            {
                "identifier": "10.1038/s41467-019-11526-w",
                "url": "https://doi.org/10.1038/s41467-019-11526-w",
                "dbversion": null,
                "dbname": "DOI"
            },
            {
                "identifier": "10.17605/OSF.IO/75B2M",
                "url": "https://doi.org/10.17605/OSF.IO/75B2M",
                "dbversion": null,
                "dbname": "DOI"
            }
        ],
        "pubmed_ids": [
            {
                "identifier": "31395865",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/31395865",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0002-2032-6679"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "LDLR promoter",
            "reference_sequence": {
                "sequence": "AGCTCTTCACCGGAGACCCAAATACAACAAATCAAGTCGCCTGCCCTGGCGACACTTTCGAAGGACTGGAGTGGGAATCAGAGCTTCACGGGTTAAAAAGCCGATGTCACATCGGCCGTTCGAAACTCCTCCTCTTGCAGTGAGGTGAAGACATTTGAAAATCACCCCACTGCAAACTCCTCCCCCTGCTAGAAACCTCACATTGAAATGCTGTAAATGACGTGGGCCCCGAGTGCAATCGCGGGAAGCCAGGGTTTCCAGCTAGGACACAGCAGGTCGTGATCCGGGTCGGGACACTGCCTGGCAGAGGCTGCGAGC",
                "sequence_type": "dna"
            },
            "uniprot": null,
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000023-a-2",
            "type": "Regulatory"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "p-value",
            "unique_tags",
            "dna_sequences",
            "rna_sequences"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000023-a-2",
        "variant_count": 1093,
        "experiment": "urn:mavedb:00000023-a",
        "is_meta_analysis": false,
        "data_usage_policy": "We are making our data available prior to publication in line with Fort Lauderdale principle, allowing others to use the data but allowing the data producers to make the first presentations and to publish the first paper with global analyses of the data. In addition, we also reserve the right to publish the first analysis of the differences seen in the TERT knock-down experiments and alternative cell-type experiments. Studies that do not overlap with these intentions may be submitted for publication at any time, but must appropriately cite the data source. After publication of the data, the first publication of the data producers should be cited for any use of these data."
    },
    {
        "creation_date": "2018-06-25",
        "modification_date": "2019-07-26",
        "urn": "urn:mavedb:00000002-a-1",
        "publish_date": "2018-07-10",
        "created_by": "0000-0003-1474-605X",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "This was the first published deep mutational scan. The experiment quantified binding affinity between the human YAP65 (YAP1) WW domain and a peptide binding partner using phage display. The phage display selection was tuned such that the enrichment in each round was moderate, allowing inefficient binders to be maintained in the population and subsequently quantified.\r\n\r\nNote that this score set does not describe the scores presented in the original publication. It is a reanalysis of the raw data that was produced as part of testing and development for Enrich2.",
        "method_text": "Scores were calculated using the Enrich2 weighted least squares regression scoring model. Scores for the two technical replicates were combined using the Enrich2 random-effects model.\r\n\r\nThe scores and standard errors calculated for each of the two technical replicates (101208 and 110307) appear as additional columns.\r\n\r\nCount columns are named using the format `<replicate>_c_<timepoint>`. The 0 time point is the input (unselected) and each replicate underwent three rounds of selection, numbered 1..3.",
        "short_description": "Nucleotide variant scores for deep mutational scan of the hYAP65 WW domain using phage display calculated by Enrich2.",
        "title": "Enrich2 nucleotide variant scores for YAP65 WW domain",
        "keywords": [
            {
                "text": "WW domain"
            },
            {
                "text": "Phage display"
            },
            {
                "text": "Binding"
            }
        ],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28784151",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28784151",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0001-7614-1713"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "hYAP65 WW domain",
            "reference_sequence": {
                "sequence": "GACGTTCCACTGCCGGCTGGTTGGGAAATGGCTAAAACTAGTTCTGGTCAGCGTTACTTCCTGAACCACATCGACCAGACCACCACGTGGCAGGACCCGCGT",
                "sequence_type": "dna"
            },
            "uniprot": {
                "offset": 169,
                "identifier": "P46937",
                "url": "http://purl.uniprot.org/uniprot/P46937",
                "dbversion": null,
                "dbname": "UniProt"
            },
            "ensembl": null,
            "refseq": null,
            "reference_maps": [
                {
                    "genome": {
                        "short_name": "hg38",
                        "organism_name": "Homo sapiens",
                        "assembly_identifier": {
                            "identifier": "GCF_000001405.26",
                            "url": "http://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.26",
                            "dbversion": null,
                            "dbname": "GenomeAssembly"
                        }
                    }
                }
            ],
            "scoreset": "urn:mavedb:00000002-a-1",
            "type": "Protein coding"
        },
        "score_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "score",
            "SE",
            "epsilon",
            "SE_101208",
            "score_101208",
            "SE_110307",
            "score_110307"
        ],
        "count_columns": [
            "hgvs_nt",
            "hgvs_splice",
            "hgvs_pro",
            "101208_c_0",
            "101208_c_1",
            "101208_c_2",
            "101208_c_3",
            "110307_c_0",
            "110307_c_1",
            "110307_c_2",
            "110307_c_3"
        ],
        "previous_version": null,
        "next_version": null,
        "current_version": "urn:mavedb:00000002-a-1",
        "variant_count": 48183,
        "experiment": "urn:mavedb:00000002-a",
        "is_meta_analysis": false,
        "data_usage_policy": ""
    },
    {
        "creation_date": "2020-12-03",
        "modification_date": "2021-01-17",
        "urn": "urn:mavedb:00000057-b-1",
        "publish_date": "2021-01-17",
        "created_by": "0000-0003-2449-7034",
        "modified_by": "0000-0003-1474-605X",
        "extra_metadata": {},
        "abstract_text": "The authors used saturation mutagenesis to study the variant effect of Ras, with the regulation of GTPase activating protein (GAP) but in the absence of guanine nucleotide exchange factor (GEF). The variants were selected by bacterial two-hybrid strategy.",
        "method_text": "The frequency of each mutant was determined through sequencing. The natural logarithm of the frequency ratio for each mutant after and before selection was calculated. These values were then normalized by dividing the wild type values which were calculated the same way.",
        "short_description": "Selection result of Ras mutants expressed in the presence of the GAP, but without the GEF",
        "title": "Attenuated-Ras",
        "keywords": [],
        "doi_ids": [],
        "pubmed_ids": [
            {
                "identifier": "28686159",
                "url": "http://www.ncbi.nlm.nih.gov/pubmed/28686159",
                "dbversion": null,
                "dbname": "PubMed"
            }
        ],
        "contributors": [
            "0000-0003-1474-605X",
            "0000-0003-2449-7034"
        ],
        "licence": {
            "long_name": "CC BY-NC-SA 4.0 (Attribution-NonCommercial-ShareAlike)",
            "short_name": "CC BY-NC-SA 4.0",
            "link": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
            "version": "4.0"
        },
        "target": {
            "name": "Ras",
            "reference_sequence": {