UP | HOME |

Listado de caracteres UTF-8 en R

2025-11-03

Listado de caracteres UTF-8 en R

La codificación UTF-8 en R se puede obtener de la siguiente forma

vapply(0:255, intToUtf8, "a")
  [1] ""       "\001"   "\002"   "\003"   "\004"   "\005"   "\006"   "\a"    
  [9] "\b"     "\t"     "\n"     "\v"     "\f"     "\r"     "\016"   "\017"  
 [17] "\020"   "\021"   "\022"   "\023"   "\024"   "\025"   "\026"   "\027"  
 [25] "\030"   "\031"   "\032"   "\033"   "\034"   "\035"   "\036"   "\037"  
 [33] " "      "!"      "\""     "#"      "$"      "%"      "&"      "'"     
 [41] "("      ")"      "*"      "+"      ","      "-"      "."      "/"     
 [49] "0"      "1"      "2"      "3"      "4"      "5"      "6"      "7"     
 [57] "8"      "9"      ":"      ";"      "<"      "="      ">"      "?"     
 [65] "@"      "A"      "B"      "C"      "D"      "E"      "F"      "G"     
 [73] "H"      "I"      "J"      "K"      "L"      "M"      "N"      "O"     
 [81] "P"      "Q"      "R"      "S"      "T"      "U"      "V"      "W"     
 [89] "X"      "Y"      "Z"      "["      "\\"     "]"      "^"      "_"     
 [97] "`"      "a"      "b"      "c"      "d"      "e"      "f"      "g"     
[105] "h"      "i"      "j"      "k"      "l"      "m"      "n"      "o"     
[113] "p"      "q"      "r"      "s"      "t"      "u"      "v"      "w"     
[121] "x"      "y"      "z"      "{"      "|"      "}"      "~"      "\177"  
[129] "\u0080" "\u0081" "\u0082" "\u0083" "\u0084" "\u0085" "\u0086" "\u0087"
[137] "\u0088" "\u0089" "\u008a" "\u008b" "\u008c" "\u008d" "\u008e" "\u008f"
[145] "\u0090" "\u0091" "\u0092" "\u0093" "\u0094" "\u0095" "\u0096" "\u0097"
[153] "\u0098" "\u0099" "\u009a" "\u009b" "\u009c" "\u009d" "\u009e" "\u009f"
[161] " "      "¡"      "¢"      "£"      "¤"      "¥"      "¦"      "§"     
[169] "¨"      "©"      "ª"      "«"      "¬"      "­"      "®"      "¯"     
[177] "°"      "±"      "²"      "³"      "´"      "µ"      "¶"      "·"     
[185] "¸"      "¹"      "º"      "»"      "¼"      "½"      "¾"      "¿"     
[193] "À"      "Á"      "Â"      "Ã"      "Ä"      "Å"      "Æ"      "Ç"     
[201] "È"      "É"      "Ê"      "Ë"      "Ì"      "Í"      "Î"      "Ï"     
[209] "Ð"      "Ñ"      "Ò"      "Ó"      "Ô"      "Õ"      "Ö"      "×"     
[217] "Ø"      "Ù"      "Ú"      "Û"      "Ü"      "Ý"      "Þ"      "ß"     
[225] "à"      "á"      "â"      "ã"      "ä"      "å"      "æ"      "ç"     
[233] "è"      "é"      "ê"      "ë"      "ì"      "í"      "î"      "ï"     
[241] "ð"      "ñ"      "ò"      "ó"      "ô"      "õ"      "ö"      "÷"     
[249] "ø"      "ù"      "ú"      "û"      "ü"      "ý"      "þ"      "ÿ"

Si deseamos una descripción de cada símbolo, podemos usar este procedimiento para descargar el nombre de cada carácter.

suppressMessages(library(tidyverse))
suppressMessages(library(rvest))

url <- "https://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=bin"

utf8_enc <- url %>%
    read_html() %>%
    html_nodes(css = 'body > table.codetable') %>%
    html_table() %>% .[[1]]

utf8_enc[, c("character","name")]

Error in library(tidyverse) : there is no package called ‘tidyverse’
# A tibble: 256 × 2
   character name     
   <chr>     <chr>    
 1 ""        <control>
 2 ""        <control>
 3 ""        <control>
 4 ""        <control>
 5 ""        <control>
 6 ""        <control>
 7 ""        <control>
 8 ""        <control>
 9 ""        <control>
10 ""        <control>
# ℹ 246 more rows
# ℹ Use `print(n = ...)` to see more rows