UP | HOME |

Evite los verbos comodines con Emacs

Evite los verbos comodines con Emacs

En la interlocución diaria predominan los verbos ser, estar, hacer, tener, poder, ir o decir que empobrecen el lenguaje español. Basta con evitar su utilización para enriquecer la comunicación. Las expresiones precisas facilitan la transmisión del conocimiento. Por ejemplo, en vez de describir que «~hice~ un artículo» resulta más apropiado indicar que «escribí un artículo.»

decir.png

Aquí reseño unas palabras que intento evitar cuando redacto en español:

“es” “son” “sean” “sea” “sido” “siendo” “somos” “sea” “ser” “eran” “era” “hay” “haya” “he” “está” “están” “esté” “estamos” “estemos” “estar” “estará” “estarán” “tiene” “tienen” “tenemos” “tengo” “tener” “hacemos” “hace” “hemos” “va” “se” “nos” “mi” “nuestro” “nuestra” “me” “podemos” “puede” “pueden” “pueda” “puedan” “poder” “posible” “más”

El siguiente código implementado en emacs resalta los verbos comodines de un texto mediante M-x etm-word-catchall-comodin-mode. Además, identifica las palabras repetidas a lo largo del documento.

  ;; ============================================================
(defvar etm-word-catchall-spanish-words
  '(
    "es" "son" "sean" "sea" "sido" "siendo" "somos" "sea" "ser" "eran" "era"
    "hay" "haya" "he"
    "está" "están" "esté" "estamos" "estemos" "estar" "estará" "estarán"
    "tiene" "tienen" "tenemos" "tengo" "tener"
    "hacemos" "hace"
    "hemos"
    "va"
    "se"
    "nos" "mi" "nuestro" "nuestra" "me"
    "podemos" "puede" "pueden" "pueda" "puedan" "poder"
    "posible"
    "más"
    )
  "List of catchall Spanish words.")

;; ============================================================

(defvar etm-word-stop-words-es
  '(
    "de" "la" "el" "en" "los"
    "y" "que" "o" "a" "por"
    "una" "un" "las" "como" "con"
    "no" "si" "del" "entre"
    "al" "para" "este"
    "esta"
    )
  "List of stop words in Spanish.")

;; ============================================================

(defvar etm-word-words-commands-latex
  '(
    "usepackage"
    "marginfigure"
    "label"
    "section"
    "cdot"
    "newcommand"
    )
  "List of special words in LaTeX.")

;; ============================================================

(defvar etm-word-words-commands-rnw
  '(
    "chunk"
    "fig"
    "false"
    "tidy"
    "x"
    "true"
    "pt"
    "c"
    "r"
    "n"
    "i"
    "j"
    "m"
    "s"
    "length"
    "sepal"
    "table"
    )
  "List of special words of Rnw .")


;; ============================================================
;; Frequency of words
;; ============================================================

(defvar etm-word-number-of-frequent-words-of-current-buffer 13
  "Number of words to show or operate.")


;; https://emacs.stackexchange.com/questions/13514/how-to-obtain-the-statistic-of-the-the-frequency-of-words-in-a-buffer
(defun etm-word-count-raw-word-list (raw-word-list)
  "Create a table of frecuencies sorted by frequency."
  (cl-loop with result = nil
           for elt in raw-word-list
           do (cl-incf (cdr (or (assoc elt result)
                                (first (push (cons elt 0) result)))))
           finally return (sort result
                                (lambda (a b) (> (cdr a) (cdr b))))))

(defun etm-word-get-words-counts-no-stops (&optional firstncases)
  "Table of frequency of words from this buffer.

It does not take into account stop words or LaTeX or Rnw commands.
"
  (let* ((firstncases (or firstncases etm-word-number-of-frequent-words-of-current-buffer))
         (wordsfrombuffer (split-string (downcase (buffer-string))
                                        "[^[:alpha:]]" t))
         (words-no-stops (cl-remove-if
                          (lambda (elt) (member elt
                                                (append  etm-word-stop-words-es
                                                         etm-word-words-commands-rnw
                                                         etm-word-words-commands-latex
                                                         )))
                          wordsfrombuffer
                          ))
         (table-of-frequency (etm-word-count-raw-word-list words-no-stops))
         )
    (subseq table-of-frequency 0 (min firstncases (length table-of-frequency)))))


;; ============================================================

(defun etm-word-stats (&optional firstncases)
  "Count the  words of a buffer.

By default it prints the top ten words, excluded
spanish stop words and some LaTeX environments.

Use C-u 40 M-x etm-words-stats if you want 40 words."
  (interactive "P")
  (let  ((word-list (etm-word-get-words-counts-no-stops firstncases)))
    (with-current-buffer (get-buffer-create "*word-statistics*")
      (erase-buffer)
      (insert "| word | frequency |\n|------+-----------|\n")
      (dolist (elt word-list)
        (insert (format "| %s | %d |\n" (car elt) (cdr elt))))
      (org-mode)
      (goto-char (point-min))
      (org-table-align)
      (forward-line 2)
      )
    (pop-to-buffer "*word-statistics*")))


;;(etm-word-stats)

;; ============================================================
;; Catchall words - Palabras comodín
;; ============================================================

;; Highlight my catchall words and my reiterative words

;; Alternative programs: https://github.com/bnbeckwith/writegood-mode


(defvar etm-word-catchall-comodin-font-lock-words '()
  "The list for font-lock"
  )


;; http://ergoemacs.org/emacs/elisp_syntax_coloring.html
(defun etm-word-catchall-comodin-create-font-lock-words (&optional numofwords)
  "Create the list of words to font lock."
  (let* (
         ;; define several category of keywords
         (x-comodin etm-word-catchall-spanish-words)
         (x-frequentwords (mapcar 'car (etm-word-get-words-counts-no-stops numofwords)))
         ;; Take into account the capitization: word Word
         (x-comodin-all (append (mapcar 'capitalize x-comodin) x-comodin))
         (x-frequentwords-all (append (mapcar 'capitalize x-frequentwords) x-frequentwords))
         ;; generate regex string for each category of keywords
         (x-comodin-regexp (regexp-opt x-comodin-all 'words))
         (x-frequentwords-regexp (regexp-opt x-frequentwords-all 'words))
         )
    `(
      (,x-comodin-regexp . font-lock-warning-face)
      ;; (,x-types-regexp . font-lock-type-face)
      ;; (,x-constants-regexp . font-lock-constant-face)
      ;; (,x-events-regexp . font-lock-builtin-face)
      ;; (,x-functions-regexp . font-lock-function-name-face)
      (,x-frequentwords-regexp . font-lock-keyword-face)
      ;; note: order above matters, because once colored, that part won't change.
      ;; in general, put longer words first
      )))


(defun etm-word-catchall-comodin-highlight (&optional numofwords )
  "Higlight catchall words and most frequent NUMOFWORDS words."
  (interactive "P")
  ;; (unless (bound-and-true-p etm-word-catchall-comodin-mode)
  ;;      (etm-word-catchall-comodin-mode))
  ;; Remove previous highlights
  (etm-word-catchall-comodin-highlight-remove-all)
  ;; Define news highlights
  (setq etm-word-catchall-comodin-font-lock-words (etm-word-catchall-comodin-create-font-lock-words numofwords))
  ;; Add them to system highlights
  (font-lock-add-keywords nil etm-word-catchall-comodin-font-lock-words 'append)
  ;; Colorise buffer
  (font-lock-flush))


(defun etm-word-catchall-comodin-highlight-remove-all ()
  "Remove catchcall highlighting in buffer."
  (interactive)
  ;; (if etm-word-catchall-comodin-mode
  ;;      (etm-word-catchall-comodin-mode -1))
  ;; Remove highligh for each catchall word
  (mapc 'etm-word-catchall-comodin-highlight-symbol-remove-symbol
        (mapcar 'car etm-word-catchall-comodin-font-lock-words)))

(defun etm-word-catchall-comodin-highlight-symbol-remove-symbol (symbol)
  "For SYMBOL, remove it from the catchall words and from system highlight"
  (let ((keywords (assoc symbol etm-word-catchall-comodin-font-lock-words)))
    (setq etm-word-catchall-comodin-font-lock-words
          (delq keywords etm-word-catchall-comodin-font-lock-words))
    (font-lock-remove-keywords nil (list keywords))
    (font-lock-flush)))

(define-minor-mode etm-word-catchall-comodin-mode
  "Toggle Comodin mode.
     Highlight the catchcall words and the most frequent words."
  ;; The initial value.
  nil
  ;; The indicator for the mode line.
  " Cmdin"
  ;; The minor mode bindings. '(([C-backspace] . hungry-electric-delete))
  nil
  (if etm-word-catchall-comodin-mode
      (progn
        (etm-word-catchall-comodin-highlight-remove-all)
        (etm-word-catchall-comodin-highlight)
        )
    (etm-word-catchall-comodin-highlight-remove-all))
  :group 'word-catchall-comodin)