12:["$","$L14",null,{"formats":"$undefined","locale":"en-US","messages":{"common":{"nav":{"datasets":"Datasets","api":"API","about":"About","upload":"Upload"},"auth":{"signIn":"Sign in","signUp":"Sign up","signOut":"Sign out","emailNotVerified":"Please verify your email address","showPassword":"Show password","hidePassword":"Hide password"},"user":{"profile":"Profile","uploads":"Uploads","downloads":"Downloads","organization":"Organization","settings":"Settings","myDatasets":"My Datasets","credentials":"Credentials"},"actions":{"getStarted":"Get Started","learnMore":"Learn More","cancel":"Cancel","save":"Save","delete":"Delete","edit":"Edit","preview":"Preview","create":"Create","creating":"Creating...","search":"Search","clearSearch":"Clear search","share":"Share","loadMore":"Next","readMore":"Read More","verifying":"Verifying...","sending":"Sending...","copy":"Copy","copied":"Copied!","saving":"Saving...","goBack":"Go Back","update":"Update","updating":"Updating...","deleting":"Deleting..."},"inlineActions":{"seeMoreDetails":"See more details here."},"language":{"label":"Language","select":"Select language"}},"metadata":{"home":{"title":"Mozilla Data Collective","description":"Rebuilding the AI data ecosystem with communities at the center"},"datasets":{"title":"Datasets - Mozilla Data Collective","description":"Discover and contribute to open datasets for better AI"},"about":{"title":"About - Mozilla Data Collective","description":"Learn about our mission to democratize AI data"}},"pages":{"home":{"hero":{"title":"Create. Curate. Control","subtitle":"Rebuilding the AI data ecosystem with communities at the center.","description":"Mozilla Data Collective is rebuilding the AI data ecosystem with communities at the centre. Access over 470+ high-quality global datasets, built by and for the community in a transparent and ethical way."},"features":{"title":"Why Mozilla Data Collective?","community":{"title":"Community-Driven","description":"Built by and for communities who understand their data best"},"open":{"title":"Open & Transparent","description":"All datasets are openly licensed and transparently sourced"},"ethical":{"title":"Ethical by Design","description":"Prioritizing privacy, consent, and fair representation"}},"cta":{"title":"Ready to contribute?","description":"Join our community of data contributors and help shape the future of AI"},"faqs":{"title":"FAQs","subtitle":"Find answers quickly","readMore":"Read more FAQs","items":{"whatIs":{"question":"What is Mozilla Data Collective?","answer":"Mozilla Data Collective is a platform in the truest sense. It’s yours to stand on, and make of it what you will. We have dual roots in two Mozilla projects - Common Voice, a CC0 public dataset to help tech speak your language - and the Data Futures Lab - an experimental space for instigating new approaches to data stewardship challenges. Mozilla Data Collective works by allowing you to share your data, retain ownership of it, and control who uses it."},"howDoesItWork":{"question":"How does it work?","answer":"We partner with organizations and individuals to make their data available through Mozilla Data Collective. You can share openly, using existing licenses like Creative Commons, or you can build your own. You can open up your data for everyone, or just for some types of downloaders, you can set custom constraints, ask for exchange, compensation or recognition. You can govern it as an individual, a co-operative, a trust or something else. After all, it’s your data. The people who access your datasets are authenticated, and held in legally binding contracts, and we have a number of dataset protection features. If you are interested in hosting data on Mozilla Data Collective, please reach out to us at mozilladatacollective@mozillafoundation.org."},"whoIs":{"question":"Who is behind Mozilla Data Collective?","answer":"We are backed and stewarded by Mozilla Foundation - the non-profit, movement-building, and philanthropy arm of Mozilla."}}},"join":{"eyebrow":"JOIN THE MOVEMENT","heading":{"highlight":"Join","rest":"Mozilla Data Collective"},"body":"Mozilla Data Collective wants to radically reimagine our data as power. We are anti-extractivism, anti-monopoly and deeply, profoundly pro-people. We are a collective of linguists, technologists, activists, researchers and creatives who want AI to be all it promises to be - not all it threatens to be. Here, you can share your datasets on your own terms.","buttonLabel":"Join Mozilla Data Collective","ariaLabel":"Join Mozilla Data Collective","imageAlt":"Community members showing peace signs and smiling"},"uploads":{"eyebrow":"IT'S EASY TO UPLOAD & CONTROL YOUR DATA","heading":"Upload your dataset","body":"Mozilla Data Collective works by allowing you to share your data, retain ownership of it, and control who uses it. You can share openly, using existing licenses, or you can build your own.","buttonLabel":"Get started","ariaLabel":"Get started","imageAlt":"An illustration of a floppy disks"}},"profile":{"api":{"title":"API","version":"Version: {version}","credentials":{"title":"Credentials","createKey":"Create API Key","empty":"No API credentials yet. Generate your first credential to get started."},"table":{"name":"Name","clientId":"Client ID","apiKey":"API Key","created":"Created","actions":"Actions"},"deleteCredential":"Delete credential","copyClientId":"Copy client ID","copyApiKey":"Copy API key","copied":"Copied!","dialog":{"createTitle":"Create New API Credential","createDescription":"Give your API credential a name to help you identify it later.","nameLabel":"Credential Name","namePlaceholder":"e.g., Production API Key","creating":"Creating...","createButton":"Create Credential","successTitle":"API Credential Created","apiKeyWarning":"Treat this API key like a password. It grants access to our API. Do not share it publicly.","clientId":"Client ID","apiKey":"API Key","important":"Important:","apiKeyNote":"This is the only time you'll see this API key. Store it securely in your application's environment variables or API key manager.","done":"Done","deleteTitle":"Delete API Credential","deleteDescription":"Are you sure you want to delete this API credential? This action cannot be undone.","deleting":"Deleting..."},"documentation":{"title":"Links and Documentation","reference":"API Reference Documentation","browseApiDocs":"Browse API Docs","pythonLibrary":"Python Library","pythonLibrarySource":"Python Library Source"}},"downloads":{"title":"Downloads","description":"Your recent dataset downloads.","empty":{"message":"You haven't downloaded any datasets yet.","browseLink":"Browse available datasets"},"table":{"dataset":"Dataset","date":"Date","status":"Status"}},"uploads":{"publicPrivate":{"makePublic":"Make Dataset Public","makePrivate":"Make Dataset Private","makePublicContinue":"This will publish your dataset, making it publicly visible. Confirm to continue.","makePrivateContinue":"This will unpublish your dataset, making it no longer visible to the public. Confirm to conitnue.","publish":"Publish","unpublish":"Unpublish","processing":"Processing..."},"title":"Uploads","description":"View and manage your organization's dataset submissions and published datasets","newSubmission":"New Submission","itemCount":"{count, plural, =1 {# item} other {# items}}","deleteDraft":{"tooltip":"Delete draft","dialog":{"title":"Delete draft upload","description":"This is a destructive action. Deleting this draft will remove it from Mozilla Data Collective and delete the uploaded dataset file. If you continue, you’ll need to start again from the beginning (including uploading the dataset)."},"errors":{"notFound":"Draft not found. It may have already been deleted.","notAllowed":"You can only delete your own draft uploads."},"toast":{"successTitle":"Draft deleted","successDescription":"Your draft upload and its dataset file were deleted.","errorTitle":"Could not delete draft"}},"empty":{"message":"No uploads yet.","description":"Create your first data listing to get started."},"notUploader":{"message":"We review all member requests to upload datasets to Mozilla Data Collective. This helps ensure that all datasets are contributed in line with our community standards.","requestUploadButton":"Request to upload"},"requestForm":{"sections":{"aboutDataset":"About your dataset","sharingGoals":"Sharing goals"},"fields":{"aboutDataset":{"label":"Tell us about your dataset(s)","helpText":"Provide a short summary of your dataset, how the data was collected, and any additional links you'd like to share with the MDC team","placeholder":"Short summary about your dataset"},"sharingGoals":{"label":"What are your goals for sharing data on Mozilla Data Collective?","helpText":"Provide a few goals for why you want to share data on Mozilla Data Collective","placeholder":"Provide a few goals for why you want to share data on Mozilla Data Collective"}},"actions":{"submit":"Submit request","submitting":"Submitting..."}},"request_status":{"pending":"Request Pending","approved":"Request Approved","denied":"Request Denied","request_again":"Request Again"},"labels":{"status":"Status:","requested":"Requested:"},"canUpload":"You can upload!","table":{"dataset":"Dataset","date":"Date","status":"Status","name":"Name","size":"Size","downloads":"Downloads","updated":"Last Updated","actions":"Actions","viewPublic":"View Listing","edit":"Edit","loadMore":"Next Page","loading":"Loading..."},"stats":{"total":{"title":"Total Submissions","description":"All dataset submissions"},"approved":{"title":"Published","description":"Live on platform"},"downloads":{"title":"Total downloads","description":"Across all datasets"},"pending":{"title":"In Review","description":"Pending approval"}},"list":{"approvedBadge":"Your account can publish datasets on Mozilla Data Collective","type":"Type","size":"Size","created":"Created","updated":"Updated","published":"Published","downloads":"Downloads","viewPublic":"View Public Page","fileAttached":"File"},"status":{"draft":"draft","submitted":"submitted","in_review":"in review","edits_requested":"edits requested","approved":"approved","rejected":"rejected","retracted":"retracted"}},"submissions":{"title":"Dataset Submissions","create":{"title":"Datasheet","description":"This two-step form is designed to help you complete a detailed datasheet for your dataset, ensuring it meets the standards for quality and usability on the Mozilla Data Collective platform.","permissionRequired":"Permission Required","permissionDenied":"Your organization does not have permission to submit datasets. Please contact support to request upload access.","newForm":{"stepTitle":"Step 1 of 2 - Name your dataset","heading":"Dataset name","description":{"primary":"Use a descriptive name that is not too long or repetitive. This will show up in the data card below the organisation name.","note":"NOTE: Try to give your dataset a unique name without making it too long (e.g. if it is a collection of texts from a publisher, you could mention the publisher name in the title)"},"fieldLabel":"Dataset name*","placeholder":"Add a dataset name","createButton":"Create","validation":{"required":"Dataset name is required","maxLength":"Dataset name must be 100 characters or less"}},"nameLabel":"Dataset Name","namePlaceholder":"e.g., Common Voice Finnish v1.0","nameHelp":"3-100 characters","descriptionLabel":"Short Description","descriptionPlaceholder":"Brief overview of your dataset...","descriptionHelp":"Optional, max 500 characters","cancelButton":"Cancel","submitButton":"Create Submission","submitting":"Creating...","error":"Failed to create submission"},"edit":{"title":"Edit Submission","statusWarning":"This submission is {status} and cannot be edited.","sections":{"fileUpload":"Upload Dataset File","basicInfo":"Basic Information","license":"License","additionalInfo":"Additional Information","contacts":"Contacts","usageEthics":"Usage & Ethics","agreement":"Agreement","submitForReview":"Submit for Review"},"fileUpload":{"description":"Upload your dataset file (.tar.gz format, up to 80GB). This is required before submitting for review.","currentFile":"File uploaded successfully","uploadPrompt":"Upload a file","dragDrop":"or drag and drop","fileTypes":"tar.gz files up to 80GB","preparing":"Preparing upload...","uploading":"Uploading...","processing":"Processing...","complete":"Upload complete!","startUpload":"Start Upload","uploadError":"Upload Error","approvedFileNotice":"File: {filename} (file cannot be changed for approved datasets)","uploadNewVersion":"Upload new version","replaceDescription":"New versions will be published immediately once uploaded.","errors":{"invalidExtension":"File must be in .tar.gz or .tgz format","tooLarge":"File size must be less than {maxGB}GB","emptyFile":"File is empty","uploadFailed":"Upload failed","chunkFailed":"Failed to upload chunk {chunkNumber}","noEtag":"No ETag returned for chunk {chunkNumber}","completeFailed":"Failed to complete upload","initiateFailed":"Failed to initiate upload"}},"fileHistory":{"title":"File history","empty":"No previous versions yet."},"fields":{"name":"Dataset Name","nameRequired":"Required","description":"Description","descriptionOptional":"Optional","locale":"Locale","localeOptional":"Optional","localePlaceholder":"en-US","localeHelp":"Language/locale code (e.g., en-US, es-ES, fr-FR)","format":"File Format","formatOptional":"Optional","formatPlaceholder":"MP3, wav, etc.","formatHelp":"File format of the uploaded dataset","task":"Task/Classification","taskSelectPlaceholder":"Select a task type...","taskPlaceholder":"e.g., Speech recognition, Image classification, etc.","taskRequired":"Required - describe the primary task or classification","license":"License","licensePlaceholder":"e.g., CC-BY-4.0, MIT, Apache 2.0","licenseRequired":"Required","licenseUrl":"License URL","licenseUrlPlaceholder":"https://...","licenseUrlOptional":"Optional","licenseSearchPlaceholder":"Search licenses...","licenseNoResults":"No results found.","customLicensePrompt":"Can't find what you're looking for?","customLicenseAction":"Click here for a custom license.","customLicense":"Other license","customLicenseNamePlaceholder":"List your custom license","customLicenseAbbreviationPlaceholder":"Abbreviated license name","other":"Other Information","otherPlaceholder":"Any other relevant information...","restrictions":"Restrictions/Notes to Coordinators","restrictionsPlaceholder":"Special instructions or restrictions...","forbiddenUsage":"Forbidden Usage","forbiddenUsagePlaceholder":"Specify any prohibited uses of this dataset...","additionalConditions":"Additional Conditions","additionalConditionsPlaceholder":"Any additional terms or conditions users must accept...","additionalConditionsHelp":"Optional additional conditions that users must agree to before downloading","intendedUsage":"Intended Usage","intendedUsagePlaceholder":"Describe intended use cases for this dataset (e.g. evaluation of model performance, training, fine-tuning)","ethicalReview":"Ethical Review Process","ethicalReviewPlaceholder":"Describe any ethical review or approval process that this dataset went under"},"contacts":{"pointOfContact":"Point of Contact","pointOfContactRequired":"Required - primary contact for this dataset","fundedBy":"Funded By","legalContact":"Legal Contact","createdBy":"Created By","fullName":"Full Name","fullNamePlaceholder":"The individual or organization who created this dataset","email":"Email","emailPlaceholder":"email@example.com"},"agreement":{"label":"I agree to submit this dataset for review","description":"By checking this box, you confirm that you have the right to submit this dataset and that all information provided in the datasheet is accurate."},"exclusivity":{"label":"This dataset is non-exclusive to Mozilla Data Collective","description":"Mozilla Data Collective provides protections, management controls and visibility for Datasets hosted on the Platform. These safeguards and insights apply in full when your Dataset is hosted exclusively on the Platform. If your Dataset will also be hosted or made accessible in other places, certain of these protections and visibility features may not apply. Check this box if Mozilla Data Collective will not be the exclusive hosting and point for your Dataset."},"actions":{"saveDraft":"Save Draft","saveChanges":"Save Changes","saving":"Saving...","submitReview":"Submit for Review","submitting":"Submitting...","cannotSubmit":"Complete all required fields to submit"},"status":{"approvedPublished":"✓ This dataset has been approved and published. You can edit the dataset metadata, and changes will be applied immediately without requiring approval."},"messages":{"draftSaved":"Draft saved successfully!","uploadSuccess":"File uploaded successfully!","error":"An error occurred"},"validation":{"fileRequired":"File upload required","nameRequired":"Dataset name required","taskRequired":"Task/classification required","licenseRequired":"License required","licenseAbbreviationRequired":"License abbreviation required","localeRequired":"Locale required","contactRequired":"Point of contact required","agreementRequired":"Agreement required","missingFieldsTitle":"Missing Fields","missingFieldsDescription":"Please complete: {fields}"},"datasheet":{"intro":{"description":"This form is designed to help you complete a detailed datasheet for your dataset, ensuring it meets the standards for quality and usability on the Mozilla Data Collective platform.","stepIndicator":"Step 2 of 2 - Complete datasheet"},"sections":{"uploadingData":{"title":"Upload data","description":{"primary":"Upload your dataset as a single .tar.gz file. This archive should contain all relevant files and any additional metadata files that you would like to include as part of the download for your dataset."}},"datasetName":{"title":"Dataset name","description":{"primary":"Use a descriptive name that is not too long or repetitive. This will show up in the data card below the organisation name.","note":"NOTE: Try to give your dataset a unique name without making it too long (e.g. if it is a collection of texts from a publisher, you could mention the publisher name in the title)"},"fields":{"label":"Dataset name","placeholder":"Add the dataset name"}},"descriptions":{"title":"Description","description":{"short":"Add a short description of a single sentence. This will show up in the data card below the title as shown in the figure below.","long":"Add a long description of several sentences describing the dataset. This will show on the datasheet page."},"fields":{"description":{"label":"Description","placeholder":"Add a brief overview of your dataset"},"shortDescription":{"label":"Short Description","placeholder":"Add a short description"},"longDescription":{"label":"Long Description","placeholder":"Add a long description"}}},"task":{"title":"Task","description":"Task: If this dataset was curated for a certain downstream application / task, select it from the dropdown. Otherwise, select N/A.","fields":{"label":"Task","placeholder":"Select a task"}},"locale":{"title":"Locale","description":"Use ISO-639-1 (two letter) or ISO-639-3 (three letter) language codes, if there is an ISO-639-1 code prefer that code; if there are two codes one in English and one from the native language, choose the native language one","fields":{"label":"Locale","placeholder":"Add a locale"}},"fileFormat":{"title":"File format","description":{"uppercase":"Use uppercase formats without initial full stop, e.g.","uppercaseExample":"WAV not .wav","separator":"Use a comma and a space to separate formats, e.g.","separatorExample":"WAV, TSV not WAV; TSV"},"fields":{"label":"File Format","placeholder":"Add a file format"}},"license":{"description":{"primary":"Choose a license from the dropdown list.","secondary":"If you need to use another licence, choose \"custom licence\" and fill out:","longForm":"Long form: e.g. Creative Commons Attribution International 4.0","shortForm":"Short form: This should be an abbreviation, e.g. CC-BY-4.0"},"infoNote":"The license will be included in the user agreement prior to download."},"restrictions":{"title":"Restrictions","description":"In this section, you have the opportunity to specify any restrictions that apply to the use of this dataset. For example, you might indicate that it is intended solely for research and scientific purposes.","infoNote":"This section will be included in the user agreement prior to download.","fields":{"label":"Restrictions or special constraints","placeholder":"Describe any restrictions"}},"forbiddenUsage":{"title":"Forbidden usages","description":{"intro":"Here you can add any forbidden usages, for example:","items":{"first":"You agree not to attempt to determine the identity of speakers in this dataset","second":"Any attempt to clone the voice or train models that imitate the speakers in this dataset is forbidden","third":"It is forbidden to use this dataset to train chatbots or large language models"}},"infoNote":"This section will be included in the user agreement prior to download.","fields":{"label":"Forbidden usage","placeholder":"Describe forbidden usages"}},"additionalInfo":{"title":"Additional information","description":"Further details might encompass various aspects that haven't been covered here. For instance, it's important to clarify that users or downloaders of this dataset must refrain from trying to identify any speakers. Additionally, you could mention that the dataset should not be used to infer personal information about individuals, nor should it be employed in any way that could compromise privacy or confidentiality.","infoNote":"If completed, this section will be included in the user agreement prior to download.","fields":{"label":"Additional information","placeholder":"Describe anything else the reviewers or downloaders should know about your dataset"}},"otherInfo":{"title":"Other information","description":"This section is for any other relevant information about the dataset that hasn't been covered in the previous sections. This could include details about the data collection process, preprocessing steps, or any unique characteristics of the dataset that users should be aware of.","fields":{"label":"Other information","placeholder":"Any other relevant information about the dataset"}},"intendedUsage":{"title":"Intended usage","description":{"intro":"What is the dataset intended to be used for? This is a freeform field, and should consist of a sentence or paragraph describing the intended use, you could write something like:","example":"This dataset is intended for use in creating automatic speech recognition systems."},"fields":{"label":"Intended usage / example applications","placeholder":"Describe intended usage"}},"ethicalReview":{"title":"Ethical review","description":"For instance, you might describe the procedures followed to ensure that participants were fully aware of the study's purpose, how their data would be used, and any potential risks involved. This transparency is crucial for maintaining ethical standards in research.","fields":{"label":"Ethical review process","placeholder":"Describe the ethical review process"}},"contacts":{"title":"Contact information","description":{"primary":"The point of contact should be the dataset owner/uploader, but it could also be a technical consultant. If the point of contact is not the owner, then there should also be a created by and a legal contact.","secondary":"If the dataset was funded by a person or organisation who is not the owner/uploader, then that can be listed here. The contact can be an email address or it can be a link.","tertiary":"If there is a specific legal contact for the dataset, for example for takedown requests, then that can be listed here.","quaternary":"If the point of contact did not create the dataset, put the dataset creator here"},"fields":{"pointOfContactFullName":{"label":"Point of contact - full name","placeholder":"Point of contact - full name"},"pointOfContactEmail":{"label":"Point of contact - email","placeholder":"Point of contact - email"},"createdByFullName":{"label":"Created by - full name","placeholder":"Created by - full name"},"createdByEmail":{"label":"Created by - email","placeholder":"Created by - email"},"legalContactFullName":{"label":"Legal contact - full name","placeholder":"Legal contact - full name"},"legalContactEmail":{"label":"Legal contact - email","placeholder":"Legal contact - email"},"fundedByFullName":{"label":"Funded by - full name","placeholder":"Funded by - full name"},"fundedByEmail":{"label":"Funded by - email","placeholder":"Funded by - email"}}}},"agreementCard":{"title":"Agree to this before submitting for review"},"buttons":{"saveForLater":"Save for later","saveIconAlt":"Save icon"},"licenseCard":{"selected":"{abbreviation} selected","customSelected":"Custom license selected","identifier":"Identifier"},"taskOptions":{"na":"N/A","nlp":"Natural Language Processing","asr":"Automatic Speech Recognition","lid":"Language Identification","tts":"Text to Speech","mt":"Machine Translation","lm":"Language Modelling","llm":"Large Language Modelling","nlu":"Natural Language Understanding","nlg":"Natural Language Generation","call":"Computer-Aided Language Learning","rag":"Retrieval-Augmented Generation","cv":"Computer Vision","ml":"Machine Learning","other":"Other"}}}},"profile":{"title":"Profile","general":"General","fullName":{"label":"Full Name","sr":"Enter your full name"},"organization":{"label":"Organization","sr":"Enter your organization name"},"email":{"label":"Email","sr":"Enter your email address","emailChangeInstruction":"To change or edit your email, send us an email to mozilladatacollective@mozillafoundation.org"}},"settings":{"title":"Settings","mailingList":{"title":"Join the Mozilla Data Collective Mailing List","description":"Receive emails such as new dataset releases, platform features, and newsletters about Mozilla Data Collective.","checkboxLabel":"I would like to join the Mozilla Data Collective mailing list","checkboxAriaLabel":"Subscribe to mailing list"},"account":{"title":"Account","createdOn":"Your account was created on {date}","deleteAccount":"Delete my account","deleteAccountDescription":"Your account will be permanently deleted and you will lose access.","deleteAccountFailure":"There was an issue deleting your account. Please try again later."}},"organization":{"title":"Organization","general":"General","fullName":{"label":"Full Name","sr":"Enter your full name"},"email":{"label":"Email","sr":"Enter your email address","emailChangeInstruction":"To change or edit your email, send us an email to mozilladatacollective@mozillafoundation.org"},"intro":"Organizations are communities, businesses, groups, collectives, etc. Once you add your organizational details below and upload data to Mozilla Data Collective, you will receive a custom URL with all of your datasets in one place.","messages":{"success":"Organization details saved","validationError":"Please review the highlighted fields.","error":"Unable to save organization details right now."},"fields":{"name":{"label":"Organization name*","placeholder":"Mozilla Data Collective","helper":"Organization name will appear on your dataset card.","usePersonalName":"Use my name ({name})"},"region":{"label":"Location*","placeholder":"Location","helper":"Pick the primary location where your organization operates."},"website":{"label":"Website or Social Media URL*","placeholder":"https://www.myOrganization.com","helper":"This URL will be present on your organization page."},"description":{"label":"Open-ended description about your organization [markdown supported]","helper":"Provide a few goals for why you want to share data on Mozilla Data Collective.","placeholder":"Share a few goals for why you want to share data on Mozilla Data Collective"},"entityType":{"label":"Entity type","placeholder":"Select entity type","helper":"Optional. Helps us understand how to reference your datasets.","options":{"communityGroup":"Community group","academic":"Academic / Research","cooperative":"Cooperative","forProfit":"For-profit company","freeZoneEstablishment":"Free zone establishment","freeZoneLlc":"Free zone LLC","governmentInstrumentality":"Government instrumentality","governmentalUnit":"Governmental unit","incorporatedAssociation":"Incorporated association","incorporatedNonProfit":"Incorporated non-profit","incorporatedPartnership":"Incorporated partnership","individual":"Individual","limitedLiabilityPartnership":"Limited liability partnership","llc":"LLC","multiMemberLlc":"Multi-member LLC","ngo":"NGO / Non-profit","other":"Other","privateCompany":"Private company","privateCorporation":"Private corporation","privatePartnership":"Private partnership","publicCompany":"Public company","publicCorporation":"Public corporation","publicListedCorporation":"Public listed corporation","publicPartnership":"Public partnership","publicSectorGovernment":"Public sector / Government","registeredCharity":"Registered charity","singleMemberLlc":"Single-member LLC","soleEstablishment":"Sole establishment","soleProprietorship":"Sole proprietorship","taxExemptGovernmentInstrumentality":"Tax-exempt government instrumentality","trust":"Trust","unincorporatedAssociation":"Unincorporated association","unincorporatedNonProfit":"Unincorporated non-profit","unincorporatedPartnership":"Unincorporated partnership"}}}}},"datasets":{"title":"Explore Datasets","filters":{"all":"All Datasets","byType":"By Type","byLicense":"By License","byLanguage":"By Language"},"searchBar":{"placeholder":"Search datasets...","tooLong":"Search queries must be {max} characters or fewer.","clearSearch":"Clear search","closeSearch":"Close search"},"filtersBar":{"filtersLabel":"Filters:","clearAll":"Clear all","noResults":"No results","searchPlaceholder":"Search {label}...","labels":{"task":"Task","locale":"Language","license":"License","format":"Format"},"menuLabels":{"task":"Filter by task","locale":"Filter by language","license":"Filter by license","format":"Filter by format"}},"searchResults":"Search results for “{query}”","pagination":{"previous":"Previous Page","next":"Next Page"},"empty":{"title":"No datasets found","description":"Try adjusting your filters or search terms"},"featured":{"commonVoiceKinyarwanda":{"title":"Common Voice Kinyarwanda","description":"High-quality speech data for machine learning applications"},"commonVoiceChinese":{"title":"Common Voice Chinese","description":"High-quality speech data for machine learning applications"},"commonVoiceSpanish":{"title":"Common Voice Spanish","description":"High-quality speech data for machine learning applications"},"commonVoiceCatalan":{"title":"Common Voice Catalan","description":"High-quality speech data for machine learning applications"}}},"datasheet":{"page":{"notFound":"Dataset not found"},"badges":{"license":{"label":"License:","alt":"License icon"},"steward":{"label":"Steward:","alt":"Shield icon"}},"metadataPills":{"task":"Task","releaseDate":"Release Date","format":"Format","size":"Size"},"sections":{"description":{"title":"Description"},"specifics":{"title":"Specifics","licensing":{"title":"Licensing","viewLicense":"View License"}},"considerations":{"title":"Considerations","restrictionsTitle":"Restrictions/Special Constraints","forbiddenUsageTitle":"Forbidden Usage"},"processes":{"title":"Processes","ethicalReviewTitle":"Ethical Review","intendedUseTitle":"Intended Use"},"metadata":{"title":"Metadata"}},"fallbacks":{"license":"No License Provided","additionalConditions":"No Additional Conditions Provided","restrictions":"No Restrictions Provided","forbiddenUsage":"No Forbidden Usage Provided"},"contact":{"pointOfContact":"Point of Contact","createdBy":"Created by","legallyOwnedBy":"Legally owned by","fundedBy":"Funded by"},"sidebar":{"files":"Files","history":"History","historyEmpty":"No previous versions","currentFile":"Latest Version","historicFiles":"Historic Versions"},"actions":{"download":"Download","connectApi":"Connect API"},"downloadToast":{"processing":{"title":"Processing...","description":"Your download is processing"},"downloading":{"title":"Initiated","description":"Your download has started"},"error":{"title":"Error","description":"There was an error processing your download."}},"agreementDialog":{"header":{"title":"AGREEMENT"},"sections":{"license":{"title":"LICENSE","agreeLabel":"I agree to the license specified for this dataset"},"additionalConditions":{"title":"ADDITIONAL CONDITIONS","agreeLabel":"I agree to the additional conditions as outlined above"},"restrictions":{"title":"RESTRICTIONS / SPECIAL CONSTRAINTS","agreeLabel":"I agree to the restrictions and special constraints as outlined above"},"forbiddenUsage":{"title":"FORBIDDEN USAGE","agreeLabel":"I agree to the forbidden usage terms as outlined above"}},"buttons":{"download":"Download Dataset","connectApi":"Connect to API"},"icons":{"downloadAlt":"Download","apiAlt":"API Connect"}},"shareDialog":{"title":"Share","description":"Click on an icon to share this page.","cta":"Access {dataset} on Mozilla Data Collective","socialMedia":{"email":"Email","bluesky":"Bluesky","linkedin":"LinkedIn","mastodon":"Mastodon","telegram":"Telegram"}},"apiConnect":{"title":"Connect to API","description":"Use these curl commands to access the dataset via our REST API. You'll need API credentials to authenticate.","needCredentials":{"title":"Need API Credentials?","description":"Create your API credentials to authenticate your requests.","createLink":"Create Credentials"},"documentation":{"link":"View Full API Documentation"}},"codeSnippets":{"title":"Download Dataset File","steps":{"createSession":"Create Download Session","downloadFile":"Download Dataset File"},"copyTooltips":{"createSession":"Copy create session command","downloadFile":"Copy download command","copied":"Copied!"},"instructions":{"title":"Usage Instructions:","step1":"Replace YOUR_API_KEY with your actual API key in both commands","step2":"Run the create download session command to get a download token","step3":"Replace DOWNLOAD_TOKEN with the token from step 1","step4":"Run the download command to get the dataset file"}},"terms":{"download_size_acknowledgment":"You are prepared to initiate a download of {size}","no_redistribution_agreement":"You agree that you will not re-host or re-share this dataset","speaker_privacy_agreement":"You agree not to attempt to determine the identity of speakers in the Common Voice dataset","action":{"downloadButton":"Download Dataset","connectApiButton":"Connect API"},"message":{"downloadStarted":"Download started.","downloadError":"Something went wrong.","processingDownload":"Processing download."}}},"auth":{"signIn":{"title":"Sign in","description":"Sign in to your account to continue","forgotPassword":"Forgot password?","noAccount":"Don't have an account?","signingIn":"Signing in...","errors":{"invalidCredentials":"Invalid email or password.","generic":"Something went wrong. Please try again."}},"forgotPassword":{"title":"Forgot password","noAccount":"Don't have an account?","send":"Send","sendingEmail":"Sending email...","success":"If an account associated with this email address exists, a password reset email has been sent. Check your inbox and follow the instructions to reset your password.","form":{"emailPlaceholder":"Enter your email"}},"resetPassword":{"title":"Reset password","submit":"Reset password","submitting":"Updating password...","success":{"title":"Password reset successful","description":"Your password has been reset. You can now sign in with your new password."},"rememberedPassword":"Remembered your password?","errors":{"invalidToken":"This reset link is invalid or has expired. Please request a new one.","passwordMismatch":"Passwords must match.","invalidForm":"Please fix the errors below and try again.","updateFailed":"We couldn't reset your password. Please try again."},"form":{"passwordLabel":"New password","passwordPlaceholder":"Enter your new password","confirmPasswordLabel":"Confirm new password","confirmPasswordPlaceholder":"Re-enter your new password"}},"signUp":{"title":"Create a new account","description":"Join the Mozilla Data Collective community","hasAccount":"Already have an account?","terms":"By creating an account on Mozilla Data Collective, I agree to the Terms of Service, Privacy Policy, and Cookies Policy.","creatingAccount":"Creating account...","organizationEmailReminder":"If you will be using Mozilla Data Collective to access or upload on behalf of an organization, please use an email address that matches your organization.","form":{"emailPlaceholder":"Email","emailHelper":"Use your organization email if you have one.","passwordPlaceholder":"Password","preferredNamePlaceholder":"Preferred name","optionalOrganizationNamePlaceholder":"Organization name (Optional)","reasonForJoiningLabel":"Reason for joining (Optional)","reasonForJoiningPlaceholder":"Reason for joining (Optional)","reasonForJoiningOptions":{"shareDatasets":"Share datasets","findAndUseDatasets":"Find and use datasets","both":"Both"}},"errors":{"registrationFailed":"Registration failed","somethingWentWrong":"Something went wrong","mustAgreeToTerms":"You must agree to the terms","accountAlreadyExists":"An account with this email already exists"}},"verifyEmail":{"title":"Verify your email","verifying":"Verifying your email...","description":"Check your email for a verification code.","pleaseWait":"Please wait...","form":{"codeLabel":"Verification Code","codePlaceholder":"Enter code here","codeHint":"Enter the 6-character code we sent to your email","submitButton":"Verify Email","verifyingButton":"Verifying..."},"resend":{"description":"Didn't receive the email? Check your spam folder or try resending the verification email.","button":"Resend verification email","sending":"Sending...","cooldown":"Resend email ({seconds}s)"},"wrongEmail":"Wrong email?","signUpAgain":"Sign up again","errors":{"verificationFailed":"Verification failed","resendFailed":"Failed to resend verification email","generic":"Something went wrong"},"success":{"verified":"Email verified successfully! Redirecting...","emailSent":"Verification email sent! Please check your inbox.","redirecting":"Email verified successfully! Redirecting to sign in..."}}},"apiReference":{"metadata":{"title":"API Reference - Mozilla Data Collective","description":"Harness community-driven datasets with our API"},"hero":{"title":"Harness community-driven datasets with our API","version":"Version: Beta","description":"The Mozilla Data Collective API gives developers access to community-created datasets while empowering contributors to maintain control over their data.","getApiAccess":"Get API Access","browseApiDocs":"Browse API Docs"},"overview":{"title":"Mozilla Data Collective API at a glance","features":{"createCredentials":{"title":"Create access credentials","description":"Manage your API credentials by going to Profile > API"},"secureKey":{"title":"Secure your key","description":"Store your access credentials in a secret key"},"authentication":{"title":"Authentication","description":"Provide your API key in your request header to authenticate"},"selectDataset":{"title":"Select your dataset","description":"Choose from over 300 global datasets to use"},"agreeTerms":{"title":"Agree to dataset terms","description":"You will only be able to download datasets after accepting terms"},"download":{"title":"Download","description":"Use our REST endpoint or the MDC python library to get started"}},"createCredentialsLink":"Create API credentials"},"apiOverview":{"title":"API Overview","description":"Power your projects with diverse, ethically-created datasets that are just one REST call away.","getApiAccess":"Get API Access","browseApiDocs":"Browse API Docs"},"tryItOut":{"title":"Give it a try","description":"Get up and running with datacollective-python, a Python library for authenticating and interacting with the MDC API.","copy":"copy","copied":"copied!","pythonLibrary":"Python Library","browseDocs":"Browse Docs"},"linksAndDocs":{"title":"Links & Docs","getApiAccess":"Get API Access","browseApiDocs":"Browse API Docs","pythonLibrary":"Python Library","pythonLibrarySource":"Python Library Source"}},"organizationPublic":{"hero":{"nameFallback":"Untitled organization","regionFallback":"Geographical region not provided","websiteFallback":"Website or social handle not provided","entityTypeFallback":"Entity type not provided"},"about":{"title":"About us","noDescription":"This organization has not added details yet."},"datasets":{"title":"Datasets","count":"{count, plural, one {# Dataset} other {# Datasets}}","table":{"name":"Dataset name","license":"Dataset license","locale":"Dataset locale","task":"Dataset task","format":"Dataset format","size":"Dataset size","sortBy":"Sort by {label}{direction, select, ascending { (ascending)} descending { (descending)} other {}}"},"empty":"No datasets published yet."}},"admin":{"uploadVerificationRequests":{"title":"Upload Verification Requests","description":"Review and manage upload verification requests from organizations","empty":{"message":"No upload verification requests found"},"status":{"pending":"Pending","approved":"Approved","denied":"Denied"},"actions":{"approve":"Approve","deny":"Deny","revoke":"Revoke (Deny)","notAvailable":"N/A"},"table":{"organization":"Organization","organizationId":"Organization ID","submittedBy":"Submitted By","requestDate":"Request Date","status":"Status","canPublish":"Can Publish","actions":"Actions","unknownOrganization":"Community","yes":"Yes","no":"No","noActionsAvailable":"No actions available"}}},"notFound":{"title":"404","subtitle":"Page Not Found","description":"Sorry, we couldn’t find the page you’re looking for.","goHome":"Go back home"},"terms":{"heading":"Mozilla Data Collective Terms of Service","title":"Terms","subTerms":{"providers":"Data Providers","consumers":"Data Consumers"}},"privacy":{"heading":"Mozilla Data Collective Privacy Notice","title":"Privacy"}},"components":{"header":{"menuLabel":"Open menu","userMenu":"User menu"},"footer":{"attribution":"Brought to you by Mozilla Foundation","copyright":"© 2025 Mozilla Data Collective. All rights reserved.","links":{"privacy":"Privacy Policy","terms":"Terms","contact":"Contact Us","cookies":"Cookies","faqs":"FAQs","guidelines":"Participation Guidelines"}},"datasetCard":{"attributes":{"task":"Task","format":"Format","license":"License","size":"Size","created":"Created","locale":"Locale","fallback":"Not specified"}},"markdownField":{"defaultPlaceholder":"*No content to preview*"},"datasetLoadError":{"title":"Error","message":"Failed to load datasets","goHome":"Go Home"}},"errors":{"general":"Something went wrong","notFound":"Page not found","unauthorized":"You don't have permission to access this resource","network":"Network error. Please check your connection and try again","invalidSubmissionId":"Invalid submission ID","invalidInput":"Invalid input provided"},"emails":{"verification":{"subject":"Verify your email for Mozilla Data Collective","title":"Email Verification","instructions":"Please verify your email address by entering the verification code below on the Mozilla Data Collective website:","codeLabel":"Verification Code:","disclaimer":"If you didn't create an account, you can safely ignore this email.","textInstructions":"Please verify your email address by entering this verification code on the Mozilla Data Collective website:","textCodeLabel":"Verification Code: {code}","textDisclaimer":"If you didn't attempt to create an account on Mozilla Data Collective, you can safely ignore this email."},"resetPassword":{"subject":"Reset your password for Mozilla Data Collective","title":"Password Reset Request","instructions":"We received a request to reset your password. Click the button below to create a new password:","buttonText":"Reset Password","linkLabel":"Or copy and paste this link into your browser:","expirationNotice":"This link will expire in 15 minutes for security reasons.","disclaimer":"If you didn't request a password reset, you can safely ignore this email. Your password won't be changed.","textInstructions":"We received a request to reset your password for Mozilla Data Collective. Visit the following link to create a new password:","textLink":"{resetLink}","textExpirationNotice":"This link will expire in 15 minutes for security reasons.","textDisclaimer":"If you didn't request a password reset, you can safely ignore this email. Your password won't be changed unless you click the link above and create a new one."}}},"now":"$undefined","timeZone":"UTC","children":"$L15"}]


Dolgan Folklore Text Corpus	CC0-1.0	dlg	NLP	TXT	57.15 KB
Kyrgyz Folklore Text Corpus	CC0-1.0	ky	NLP	TXT	1.28 MB
Polish Public Domain 20th Century Literature Text Corpus	CC0-1.0	pl	NLP	TXT	10.86 MB
Tatar Folklore Text Corpus	CC0-1.0	tt	NLP	TXT	1.40 MB
World Factbook (JSON)	CC0-1.0	en	NLP	JSON	7.10 MB

Taruen

About us

Datasets