{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":727895495,"defaultBranch":"main","name":"calm","ownerLogin":"zeux","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-12-05T19:52:54.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/1106629?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1713981919.0","currentOid":""},"activityList":{"items":[{"before":"f6520831070fd2c616dd16ed8aecb5454459eab5","after":"56c92c70ef5dc40345d46bacbdb4b179099132d2","ref":"refs/heads/main","pushedAt":"2024-05-29T18:16:06.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"tools: Ignore consolidate.safetensors file during download by default\n\nMistral is now releasing checkpoints that combine\nconsolidated.safetensors which is used by their inference code and HF\nstyle model-*.safetensors in one repository. To avoid 2x downloads for\nmodels like Mistral-7B-Instruct-v0.3 we now ignore consolidated file\nby default.","shortMessageHtmlLink":"tools: Ignore consolidate.safetensors file during download by default"}},{"before":"2ce19a36f3768d20c36896a573aac27d87e6128a","after":"f6520831070fd2c616dd16ed8aecb5454459eab5","ref":"refs/heads/main","pushedAt":"2024-05-23T19:11:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd Cohere Aya models; these work with no changes as they share the architecture with Command-R","shortMessageHtmlLink":"Update README.md"}},{"before":"998bfbc9bcb85762219bfa30dd833abf4b59d0f0","after":"2ce19a36f3768d20c36896a573aac27d87e6128a","ref":"refs/heads/main","pushedAt":"2024-05-21T21:49:24.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nOops, forgot a comma.","shortMessageHtmlLink":"Update README.md"}},{"before":"b086fc1a9dc2acf123f9737a62591cfc40c789d8","after":"998bfbc9bcb85762219bfa30dd833abf4b59d0f0","ref":"refs/heads/main","pushedAt":"2024-05-21T21:43:54.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd Phi3 Medium model; note that we currently only support 4K context versions as 128K variants need LongRope support.","shortMessageHtmlLink":"Update README.md"}},{"before":"60486347f72ebf990c7410e66b24f2e180d212d8","after":"b086fc1a9dc2acf123f9737a62591cfc40c789d8","ref":"refs/heads/main","pushedAt":"2024-05-17T23:41:39.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd other Yi 1.5 sizes and remove Yi 1.0 because it doesn't seem to be\nvery relevant with Yi 1.5 release.","shortMessageHtmlLink":"Update README.md"}},{"before":"d72e3942a2ddfe562eb5ebea70103060b5dafaf8","after":"60486347f72ebf990c7410e66b24f2e180d212d8","ref":"refs/heads/main","pushedAt":"2024-04-27T19:33:18.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Improve tokenizer validation to make it memory safe\n\nWe now do careful out of bounds checking and validate the token packing\nto avoid out of bounds accesses for malformed files (assuming assertions\nare not compiled out of course).","shortMessageHtmlLink":"Improve tokenizer validation to make it memory safe"}},{"before":"714de76e22dad75c401ac15e2bb5b8a815fd1fd5","after":null,"ref":"refs/heads/msmem","pushedAt":"2024-04-24T18:05:19.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"}},{"before":"96a2f951bd6ca4175c407cec33bd13b4f0ee38f1","after":"d72e3942a2ddfe562eb5ebea70103060b5dafaf8","ref":"refs/heads/main","pushedAt":"2024-04-24T17:57:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd M1 Max numbers","shortMessageHtmlLink":"Update README.md"}},{"before":null,"after":"714de76e22dad75c401ac15e2bb5b8a815fd1fd5","ref":"refs/heads/msmem","pushedAt":"2024-04-24T17:52:04.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Metal: shared memory experiment again","shortMessageHtmlLink":"Metal: shared memory experiment again"}},{"before":"08fc17538ced5166b60ea1438dcf10d93c53756f","after":"96a2f951bd6ca4175c407cec33bd13b4f0ee38f1","ref":"refs/heads/main","pushedAt":"2024-04-24T03:32:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nUpdate M2 Pro numbers after tweaks","shortMessageHtmlLink":"Update README.md"}},{"before":"e096c0be4349028cdf03db67952130aee8e78ebc","after":"08fc17538ced5166b60ea1438dcf10d93c53756f","ref":"refs/heads/main","pushedAt":"2024-04-24T03:27:19.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Metal: Attempt to make matmul fp8 faster by unrolling the loop 2x\n\nThis is necessary to reduce the number of buffer loads; we also carefully\nadjust the math to reduce ALU pressure so that we extract 2 elements from\neach 32-bit weight pack in parallel.\n\nThis unfortunately regresses performance a little bit on M2 base but hopefully\nwill improve performance on M2 Pro...","shortMessageHtmlLink":"Metal: Attempt to make matmul fp8 faster by unrolling the loop 2x"}},{"before":"e90082e8583cc287129fdfa1d64adbb48051d268","after":"e096c0be4349028cdf03db67952130aee8e78ebc","ref":"refs/heads/main","pushedAt":"2024-04-24T02:52:45.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd initial M2 Pro benchmarks; we'll need some more optimizations...","shortMessageHtmlLink":"Update README.md"}},{"before":"d70d03010f3830272fbf2527ccc88892663a425b","after":"e90082e8583cc287129fdfa1d64adbb48051d268","ref":"refs/heads/main","pushedAt":"2024-04-24T01:16:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Slightly simplify json_dtype\n\nReduce redundant code by extracting dtypes into a table.","shortMessageHtmlLink":"Slightly simplify json_dtype"}},{"before":"5ce46c4b0122df51ab2f93f6dda740ff8f122f2b","after":"d70d03010f3830272fbf2527ccc88892663a425b","ref":"refs/heads/main","pushedAt":"2024-04-23T20:05:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"tools: Clamp phi-3 to 2k context\n\nDue to SWA in phi3-mini the model is unusable on >2048 seq lengths; kv\nsinks work well so this is approximately equal to SWA in practice.","shortMessageHtmlLink":"tools: Clamp phi-3 to 2k context"}},{"before":"38bc1e977839b20b4c0dd1ff43857e181292d5f7","after":"5ce46c4b0122df51ab2f93f6dda740ff8f122f2b","ref":"refs/heads/main","pushedAt":"2024-04-23T19:35:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd Phi3 to model list.","shortMessageHtmlLink":"Update README.md"}},{"before":"8504ebef6a0d0f27a6afc703a4f25bba0a63c362","after":"38bc1e977839b20b4c0dd1ff43857e181292d5f7","ref":"refs/heads/main","pushedAt":"2024-04-23T19:24:21.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Remove unused code","shortMessageHtmlLink":"Remove unused code"}},{"before":"f1c9ad67767605d263db09f80bae8a9814c77adf","after":"8504ebef6a0d0f27a6afc703a4f25bba0a63c362","ref":"refs/heads/main","pushedAt":"2024-04-22T01:28:21.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Metal: Set threadGroupSizeIsMultipleOfThreadExecutionWidth to YES\n\nThis does not improve performance on M2 base but we always have a guarantee\nthat thread group size is divisible by 32, so we can keep this as YES for now\nuntil profiling/optimization for other models is done.","shortMessageHtmlLink":"Metal: Set threadGroupSizeIsMultipleOfThreadExecutionWidth to YES"}},{"before":"d6f542495d97b05450e59256b9c100926b0b86b0","after":"f1c9ad67767605d263db09f80bae8a9814c77adf","ref":"refs/heads/main","pushedAt":"2024-04-20T15:54:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nReduce table headers to improve layout","shortMessageHtmlLink":"Update README.md"}},{"before":"2210c7a2e5a3a0af453b552321e12c0ac34036dc","after":"d6f542495d97b05450e59256b9c100926b0b86b0","ref":"refs/heads/main","pushedAt":"2024-04-20T01:20:11.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd preliminary performance information for M2 base model.","shortMessageHtmlLink":"Update README.md"}},{"before":"4434ee734d6244fe05c3aa205aadb40c6d429ffc","after":"2210c7a2e5a3a0af453b552321e12c0ac34036dc","ref":"refs/heads/main","pushedAt":"2024-04-20T00:15:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nReorganize sections a little and move NV perf results into a separate\nsection to give space for a future Apple section.","shortMessageHtmlLink":"Update README.md"}},{"before":"1eb0d8ee18389b0edb36c38844ba76630bbe381a","after":"4434ee734d6244fe05c3aa205aadb40c6d429ffc","ref":"refs/heads/main","pushedAt":"2024-04-19T16:44:56.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd performance information for Llama3 8B","shortMessageHtmlLink":"Update README.md"}},{"before":"0c96899700d7eb7a407d48ae71bcfd68d7fa4030","after":"1eb0d8ee18389b0edb36c38844ba76630bbe381a","ref":"refs/heads/main","pushedAt":"2024-04-19T06:58:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Rework chat frame detection to use tokenizer exclusively\n\nAlmost all chat formats, with the exception of baseline llama, rely on\nthe presence of special tokens; as such, it would make sense to use\ntokenizer instead of the model architecture to determine the chat style.\n\nThis is a little less ad-hoc and less prone to misbehavior on fine\ntunes.","shortMessageHtmlLink":"Rework chat frame detection to use tokenizer exclusively"}},{"before":"202608309648fcf9a465dc72d621f315ecc3ae6d","after":"0c96899700d7eb7a407d48ae71bcfd68d7fa4030","ref":"refs/heads/main","pushedAt":"2024-04-19T06:36:52.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Add a missing \\n to chat frame for Gemma\n\nThe end of turn token should end with \\n for better chat flow, similar\nto ChatML/Qwen.","shortMessageHtmlLink":"Add a missing \\n to chat frame for Gemma"}},{"before":"312d0de6d9f317c61e117cb038c0427d3db54687","after":"202608309648fcf9a465dc72d621f315ecc3ae6d","ref":"refs/heads/main","pushedAt":"2024-04-19T06:00:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Switch to snprintf in chat() for out of bounds safety\n\nAlso end prompt that's passed via command line with \\n, as this is\nnecessary for some frames and we get \\n from fgets() normally.","shortMessageHtmlLink":"Switch to snprintf in chat() for out of bounds safety"}},{"before":"3a12ad9a104aa739fe9dff5c6e11e6a3b4fe5f2f","after":"312d0de6d9f317c61e117cb038c0427d3db54687","ref":"refs/heads/main","pushedAt":"2024-04-19T05:23:28.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Add chat frame for Cohere's CommandR","shortMessageHtmlLink":"Add chat frame for Cohere's CommandR"}},{"before":"3c3cda375b808f8733f32236e9c5884b0bd6ba5e","after":"3a12ad9a104aa739fe9dff5c6e11e6a3b4fe5f2f","ref":"refs/heads/main","pushedAt":"2024-04-19T05:03:14.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Add Gemma chat framing\n\nGemma doesn't have specific support for system prompts but adding it to\nthe first user prompt seems to work.","shortMessageHtmlLink":"Add Gemma chat framing"}},{"before":"09ce329b843632f760bf4b58929ea758aca0ce95","after":"3c3cda375b808f8733f32236e9c5884b0bd6ba5e","ref":"refs/heads/main","pushedAt":"2024-04-19T04:25:29.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nRemove references to other projects as they are not super relevant anymore.","shortMessageHtmlLink":"Update README.md"}},{"before":"ad2c0ab2d18284e2b35224af07ab9a65bce12355","after":"09ce329b843632f760bf4b58929ea758aca0ce95","ref":"refs/heads/main","pushedAt":"2024-04-18T23:42:08.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Add support for Llama 3 chat prompt style\n\nThis is hacky and ideally needs to be saved in the model file somehow,\nbut full support for prompt templates is too complicated so for now\njust hack this in. This is sufficient for a good Llama3 chat experience.","shortMessageHtmlLink":"Add support for Llama 3 chat prompt style"}},{"before":"357581f379df94e602ebd16114458cf9563a09a8","after":"ad2c0ab2d18284e2b35224af07ab9a65bce12355","ref":"refs/heads/main","pushedAt":"2024-04-18T16:46:20.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd Llama3 8B to the list of tested models","shortMessageHtmlLink":"Update README.md"}},{"before":"7a1513f145e7b251fd6050f3921031f8b6b3b487","after":"357581f379df94e602ebd16114458cf9563a09a8","ref":"refs/heads/main","pushedAt":"2024-04-17T23:56:20.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"zeux","name":"Arseny Kapoulkine","path":"/zeux","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1106629?s=80&v=4"},"commit":{"message":"Update README.md\n\nAdd new OLMo variant to tested models.","shortMessageHtmlLink":"Update README.md"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEV0I6iQA","startCursor":null,"endCursor":null}},"title":"Activity · zeux/calm"}